GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-fftw.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2001-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #if defined (HAVE_FFTW3_H)
31 # include <fftw3.h>
32 #endif
33 
34 #include "lo-error.h"
35 #include "oct-fftw.h"
36 #include "oct-locbuf.h"
37 #include "quit.h"
38 #include "singleton-cleanup.h"
39 
40 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41 # include "nproc-wrapper.h"
42 #endif
43 
44 namespace octave
45 {
46 #if defined (HAVE_FFTW)
47 
48  fftw_planner *fftw_planner::instance = nullptr;
49 
50  // Helper class to create and cache FFTW plans for both 1D and
51  // 2D. This implementation defaults to using FFTW_ESTIMATE to create
52  // the plans, which in theory is suboptimal, but provides quite
53  // reasonable performance in practice.
54 
55  // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56  // will destroy the input and output arrays. We must, therefore, create a
57  // temporary input array with the same size and 16-byte alignment as
58  // the original array when using a different planner strategy.
59  // Note that we also use any wisdom that is available, either in a
60  // FFTW3 system wide file or as supplied by the user.
61 
62  // FIXME: if we can ensure 16 byte alignment in Array<T>
63  // (<T> *data) the FFTW3 can use SIMD instructions for further
64  // acceleration.
65 
66  // Note that it is profitable to store the FFTW3 plans, for small FFTs.
67 
69  : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
70  rsimd_align (false), nthreads (1)
71  {
72  plan[0] = plan[1] = nullptr;
73  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
74  simd_align[0] = simd_align[1] = false;
75  inplace[0] = inplace[1] = false;
76  n[0] = n[1] = dim_vector ();
77 
78 #if defined (HAVE_FFTW3_THREADS)
79  int init_ret = fftw_init_threads ();
80  if (! init_ret)
81  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82 
83  // Use number of processors available to the current process
84  // This can be later changed with fftw ("threads", nthreads).
86  fftw_plan_with_nthreads (nthreads);
87 #endif
88 
89  // If we have a system wide wisdom file, import it.
90  fftw_import_system_wisdom ();
91  }
92 
94  {
95  fftw_plan *plan_p;
96 
97  plan_p = reinterpret_cast<fftw_plan *> (&rplan);
98  if (*plan_p)
99  fftw_destroy_plan (*plan_p);
100 
101  plan_p = reinterpret_cast<fftw_plan *> (&plan[0]);
102  if (*plan_p)
103  fftw_destroy_plan (*plan_p);
104 
105  plan_p = reinterpret_cast<fftw_plan *> (&plan[1]);
106  if (*plan_p)
107  fftw_destroy_plan (*plan_p);
108  }
109 
110  bool
112  {
113  bool retval = true;
114 
115  if (! instance)
116  {
117  instance = new fftw_planner ();
119  }
120 
121  return retval;
122  }
123 
124  void
126  {
127 #if defined (HAVE_FFTW3_THREADS)
128  if (instance_ok () && nt != threads ())
129  {
130  instance->nthreads = nt;
131  fftw_plan_with_nthreads (nt);
132  // Clear the current plans.
133  instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
134  }
135 #else
136  octave_unused_parameter (nt);
137 
138  (*current_liboctave_warning_handler)
139  ("unable to change number of threads without FFTW thread support");
140 #endif
141  }
142 
143 #define CHECK_SIMD_ALIGNMENT(x) \
144  (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)
145 
146  void *
147  fftw_planner::do_create_plan (int dir, const int rank,
148  const dim_vector& dims,
149  octave_idx_type howmany,
150  octave_idx_type stride,
151  octave_idx_type dist,
152  const Complex *in, Complex *out)
153  {
154  int which = (dir == FFTW_FORWARD) ? 0 : 1;
155  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]);
156  bool create_new_plan = false;
157  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
158  bool ioinplace = (in == out);
159 
160  // Don't create a new plan if we have a non SIMD plan already but
161  // can do SIMD. This prevents endlessly recreating plans if we
162  // change the alignment.
163 
164  if (plan[which] == nullptr || d[which] != dist || s[which] != stride
165  || r[which] != rank || h[which] != howmany
166  || ioinplace != inplace[which]
167  || ((ioalign != simd_align[which]) ? ! ioalign : false))
168  create_new_plan = true;
169  else
170  {
171  // We still might not have the same shape of array.
172 
173  for (int i = 0; i < rank; i++)
174  if (dims(i) != n[which](i))
175  {
176  create_new_plan = true;
177  break;
178  }
179  }
180 
181  if (create_new_plan)
182  {
183  d[which] = dist;
184  s[which] = stride;
185  r[which] = rank;
186  h[which] = howmany;
187  simd_align[which] = ioalign;
188  inplace[which] = ioinplace;
189  n[which] = dims;
190 
191  // Note reversal of dimensions for column major storage in FFTW.
192  octave_idx_type nn = 1;
193  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
194 
195  for (int i = 0, j = rank-1; i < rank; i++, j--)
196  {
197  tmp[i] = dims(j);
198  nn *= dims(j);
199  }
200 
201  int plan_flags = 0;
202  bool plan_destroys_in = true;
203 
204  switch (meth)
205  {
206  case UNKNOWN:
207  case ESTIMATE:
208  plan_flags |= FFTW_ESTIMATE;
209  plan_destroys_in = false;
210  break;
211  case MEASURE:
212  plan_flags |= FFTW_MEASURE;
213  break;
214  case PATIENT:
215  plan_flags |= FFTW_PATIENT;
216  break;
217  case EXHAUSTIVE:
218  plan_flags |= FFTW_EXHAUSTIVE;
219  break;
220  case HYBRID:
221  if (nn < 8193)
222  plan_flags |= FFTW_MEASURE;
223  else
224  {
225  plan_flags |= FFTW_ESTIMATE;
226  plan_destroys_in = false;
227  }
228  break;
229  }
230 
231  if (ioalign)
232  plan_flags &= ~FFTW_UNALIGNED;
233  else
234  plan_flags |= FFTW_UNALIGNED;
235 
236  if (*cur_plan_p)
237  fftw_destroy_plan (*cur_plan_p);
238 
239  if (plan_destroys_in)
240  {
241  // Create matrix with the same size and 16-byte alignment as input
242  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
243  itmp = reinterpret_cast<Complex *>
244  (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +
245  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
246 
247  *cur_plan_p
248  = fftw_plan_many_dft (rank, tmp, howmany,
249  reinterpret_cast<fftw_complex *> (itmp),
250  nullptr, stride, dist,
251  reinterpret_cast<fftw_complex *> (out),
252  nullptr, stride, dist, dir, plan_flags);
253  }
254  else
255  {
256  *cur_plan_p
257  = fftw_plan_many_dft (rank, tmp, howmany,
258  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
259  nullptr, stride, dist,
260  reinterpret_cast<fftw_complex *> (out),
261  nullptr, stride, dist, dir, plan_flags);
262  }
263 
264  if (*cur_plan_p == nullptr)
265  (*current_liboctave_error_handler) ("Error creating fftw plan");
266  }
267 
268  return *cur_plan_p;
269  }
270 
271  void *
272  fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
273  octave_idx_type howmany,
274  octave_idx_type stride,
275  octave_idx_type dist,
276  const double *in, Complex *out)
277  {
278  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan);
279  bool create_new_plan = false;
280  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
281 
282  // Don't create a new plan if we have a non SIMD plan already but
283  // can do SIMD. This prevents endlessly recreating plans if we
284  // change the alignment.
285 
286  if (rplan == nullptr || rd != dist || rs != stride || rr != rank
287  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
288  create_new_plan = true;
289  else
290  {
291  // We still might not have the same shape of array.
292 
293  for (int i = 0; i < rank; i++)
294  if (dims(i) != rn(i))
295  {
296  create_new_plan = true;
297  break;
298  }
299  }
300 
301  if (create_new_plan)
302  {
303  rd = dist;
304  rs = stride;
305  rr = rank;
306  rh = howmany;
307  rsimd_align = ioalign;
308  rn = dims;
309 
310  // Note reversal of dimensions for column major storage in FFTW.
311  octave_idx_type nn = 1;
312  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
313 
314  for (int i = 0, j = rank-1; i < rank; i++, j--)
315  {
316  tmp[i] = dims(j);
317  nn *= dims(j);
318  }
319 
320  int plan_flags = 0;
321  bool plan_destroys_in = true;
322 
323  switch (meth)
324  {
325  case UNKNOWN:
326  case ESTIMATE:
327  plan_flags |= FFTW_ESTIMATE;
328  plan_destroys_in = false;
329  break;
330  case MEASURE:
331  plan_flags |= FFTW_MEASURE;
332  break;
333  case PATIENT:
334  plan_flags |= FFTW_PATIENT;
335  break;
336  case EXHAUSTIVE:
337  plan_flags |= FFTW_EXHAUSTIVE;
338  break;
339  case HYBRID:
340  if (nn < 8193)
341  plan_flags |= FFTW_MEASURE;
342  else
343  {
344  plan_flags |= FFTW_ESTIMATE;
345  plan_destroys_in = false;
346  }
347  break;
348  }
349 
350  if (ioalign)
351  plan_flags &= ~FFTW_UNALIGNED;
352  else
353  plan_flags |= FFTW_UNALIGNED;
354 
355  if (*cur_plan_p)
356  fftw_destroy_plan (*cur_plan_p);
357 
358  if (plan_destroys_in)
359  {
360  // Create matrix with the same size and 16-byte alignment as input
361  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
362  itmp = reinterpret_cast<double *>
363  (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +
364  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
365 
366  *cur_plan_p
367  = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
368  nullptr, stride, dist,
369  reinterpret_cast<fftw_complex *> (out),
370  nullptr, stride, dist, plan_flags);
371  }
372  else
373  {
374  *cur_plan_p
375  = fftw_plan_many_dft_r2c (rank, tmp, howmany,
376  (const_cast<double *> (in)),
377  nullptr, stride, dist,
378  reinterpret_cast<fftw_complex *> (out),
379  nullptr, stride, dist, plan_flags);
380  }
381 
382  if (*cur_plan_p == nullptr)
383  (*current_liboctave_error_handler) ("Error creating fftw plan");
384  }
385 
386  return *cur_plan_p;
387  }
388 
391  {
392  return meth;
393  }
394 
397  {
398  FftwMethod ret = meth;
399  if (_meth == ESTIMATE || _meth == MEASURE
400  || _meth == PATIENT || _meth == EXHAUSTIVE
401  || _meth == HYBRID)
402  {
403  if (meth != _meth)
404  {
405  meth = _meth;
406  if (rplan)
407  fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan));
408  if (plan[0])
409  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0]));
410  if (plan[1])
411  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1]));
412  rplan = plan[0] = plan[1] = nullptr;
413  }
414  }
415  else
416  ret = UNKNOWN;
417  return ret;
418  }
419 
421 
423  : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
424  rsimd_align (false), nthreads (1)
425  {
426  plan[0] = plan[1] = nullptr;
427  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
428  simd_align[0] = simd_align[1] = false;
429  inplace[0] = inplace[1] = false;
430  n[0] = n[1] = dim_vector ();
431 
432 #if defined (HAVE_FFTW3F_THREADS)
433  int init_ret = fftwf_init_threads ();
434  if (! init_ret)
435  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
436 
437  // Use number of processors available to the current process
438  // This can be later changed with fftw ("threads", nthreads).
440  fftwf_plan_with_nthreads (nthreads);
441 #endif
442 
443  // If we have a system wide wisdom file, import it.
444  fftwf_import_system_wisdom ();
445  }
446 
448  {
449  fftwf_plan *plan_p;
450 
451  plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
452  if (*plan_p)
453  fftwf_destroy_plan (*plan_p);
454 
455  plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]);
456  if (*plan_p)
457  fftwf_destroy_plan (*plan_p);
458 
459  plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]);
460  if (*plan_p)
461  fftwf_destroy_plan (*plan_p);
462  }
463 
464  bool
466  {
467  bool retval = true;
468 
469  if (! instance)
470  {
471  instance = new float_fftw_planner ();
473  }
474 
475  return retval;
476  }
477 
478  void
480  {
481 #if defined (HAVE_FFTW3F_THREADS)
482  if (instance_ok () && nt != threads ())
483  {
484  instance->nthreads = nt;
485  fftwf_plan_with_nthreads (nt);
486  // Clear the current plans.
487  instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
488  }
489 #else
490  octave_unused_parameter (nt);
491 
492  (*current_liboctave_warning_handler)
493  ("unable to change number of threads without FFTW thread support");
494 #endif
495  }
496 
497  void *
498  float_fftw_planner::do_create_plan (int dir, const int rank,
499  const dim_vector& dims,
500  octave_idx_type howmany,
501  octave_idx_type stride,
502  octave_idx_type dist,
503  const FloatComplex *in,
504  FloatComplex *out)
505  {
506  int which = (dir == FFTW_FORWARD) ? 0 : 1;
507  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]);
508  bool create_new_plan = false;
509  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
510  bool ioinplace = (in == out);
511 
512  // Don't create a new plan if we have a non SIMD plan already but
513  // can do SIMD. This prevents endlessly recreating plans if we
514  // change the alignment.
515 
516  if (plan[which] == nullptr || d[which] != dist || s[which] != stride
517  || r[which] != rank || h[which] != howmany
518  || ioinplace != inplace[which]
519  || ((ioalign != simd_align[which]) ? ! ioalign : false))
520  create_new_plan = true;
521  else
522  {
523  // We still might not have the same shape of array.
524 
525  for (int i = 0; i < rank; i++)
526  if (dims(i) != n[which](i))
527  {
528  create_new_plan = true;
529  break;
530  }
531  }
532 
533  if (create_new_plan)
534  {
535  d[which] = dist;
536  s[which] = stride;
537  r[which] = rank;
538  h[which] = howmany;
539  simd_align[which] = ioalign;
540  inplace[which] = ioinplace;
541  n[which] = dims;
542 
543  // Note reversal of dimensions for column major storage in FFTW.
544  octave_idx_type nn = 1;
545  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
546 
547  for (int i = 0, j = rank-1; i < rank; i++, j--)
548  {
549  tmp[i] = dims(j);
550  nn *= dims(j);
551  }
552 
553  int plan_flags = 0;
554  bool plan_destroys_in = true;
555 
556  switch (meth)
557  {
558  case UNKNOWN:
559  case ESTIMATE:
560  plan_flags |= FFTW_ESTIMATE;
561  plan_destroys_in = false;
562  break;
563  case MEASURE:
564  plan_flags |= FFTW_MEASURE;
565  break;
566  case PATIENT:
567  plan_flags |= FFTW_PATIENT;
568  break;
569  case EXHAUSTIVE:
570  plan_flags |= FFTW_EXHAUSTIVE;
571  break;
572  case HYBRID:
573  if (nn < 8193)
574  plan_flags |= FFTW_MEASURE;
575  else
576  {
577  plan_flags |= FFTW_ESTIMATE;
578  plan_destroys_in = false;
579  }
580  break;
581  }
582 
583  if (ioalign)
584  plan_flags &= ~FFTW_UNALIGNED;
585  else
586  plan_flags |= FFTW_UNALIGNED;
587 
588  if (*cur_plan_p)
589  fftwf_destroy_plan (*cur_plan_p);
590 
591  if (plan_destroys_in)
592  {
593  // Create matrix with the same size and 16-byte alignment as input
594  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
595  itmp = reinterpret_cast<FloatComplex *>
596  (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +
597  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
598 
599  *cur_plan_p
600  = fftwf_plan_many_dft (rank, tmp, howmany,
601  reinterpret_cast<fftwf_complex *> (itmp),
602  nullptr, stride, dist,
603  reinterpret_cast<fftwf_complex *> (out),
604  nullptr, stride, dist, dir, plan_flags);
605  }
606  else
607  {
608  *cur_plan_p
609  = fftwf_plan_many_dft (rank, tmp, howmany,
610  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
611  nullptr, stride, dist,
612  reinterpret_cast<fftwf_complex *> (out),
613  nullptr, stride, dist, dir, plan_flags);
614  }
615 
616  if (*cur_plan_p == nullptr)
617  (*current_liboctave_error_handler) ("Error creating fftw plan");
618  }
619 
620  return *cur_plan_p;
621  }
622 
623  void *
624  float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
625  octave_idx_type howmany,
626  octave_idx_type stride,
627  octave_idx_type dist,
628  const float *in, FloatComplex *out)
629  {
630  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
631  bool create_new_plan = false;
632  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
633 
634  // Don't create a new plan if we have a non SIMD plan already but
635  // can do SIMD. This prevents endlessly recreating plans if we
636  // change the alignment.
637 
638  if (rplan == nullptr || rd != dist || rs != stride || rr != rank
639  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
640  create_new_plan = true;
641  else
642  {
643  // We still might not have the same shape of array.
644 
645  for (int i = 0; i < rank; i++)
646  if (dims(i) != rn(i))
647  {
648  create_new_plan = true;
649  break;
650  }
651  }
652 
653  if (create_new_plan)
654  {
655  rd = dist;
656  rs = stride;
657  rr = rank;
658  rh = howmany;
659  rsimd_align = ioalign;
660  rn = dims;
661 
662  // Note reversal of dimensions for column major storage in FFTW.
663  octave_idx_type nn = 1;
664  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
665 
666  for (int i = 0, j = rank-1; i < rank; i++, j--)
667  {
668  tmp[i] = dims(j);
669  nn *= dims(j);
670  }
671 
672  int plan_flags = 0;
673  bool plan_destroys_in = true;
674 
675  switch (meth)
676  {
677  case UNKNOWN:
678  case ESTIMATE:
679  plan_flags |= FFTW_ESTIMATE;
680  plan_destroys_in = false;
681  break;
682  case MEASURE:
683  plan_flags |= FFTW_MEASURE;
684  break;
685  case PATIENT:
686  plan_flags |= FFTW_PATIENT;
687  break;
688  case EXHAUSTIVE:
689  plan_flags |= FFTW_EXHAUSTIVE;
690  break;
691  case HYBRID:
692  if (nn < 8193)
693  plan_flags |= FFTW_MEASURE;
694  else
695  {
696  plan_flags |= FFTW_ESTIMATE;
697  plan_destroys_in = false;
698  }
699  break;
700  }
701 
702  if (ioalign)
703  plan_flags &= ~FFTW_UNALIGNED;
704  else
705  plan_flags |= FFTW_UNALIGNED;
706 
707  if (*cur_plan_p)
708  fftwf_destroy_plan (*cur_plan_p);
709 
710  if (plan_destroys_in)
711  {
712  // Create matrix with the same size and 16-byte alignment as input
713  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
714  itmp = reinterpret_cast<float *>
715  (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +
716  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
717 
718  *cur_plan_p
719  = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
720  nullptr, stride, dist,
721  reinterpret_cast<fftwf_complex *> (out),
722  nullptr, stride, dist, plan_flags);
723  }
724  else
725  {
726  *cur_plan_p
727  = fftwf_plan_many_dft_r2c (rank, tmp, howmany,
728  (const_cast<float *> (in)),
729  nullptr, stride, dist,
730  reinterpret_cast<fftwf_complex *> (out),
731  nullptr, stride, dist, plan_flags);
732  }
733 
734  if (*cur_plan_p == nullptr)
735  (*current_liboctave_error_handler) ("Error creating fftw plan");
736  }
737 
738  return *cur_plan_p;
739  }
740 
743  {
744  return meth;
745  }
746 
749  {
750  FftwMethod ret = meth;
751  if (_meth == ESTIMATE || _meth == MEASURE
752  || _meth == PATIENT || _meth == EXHAUSTIVE
753  || _meth == HYBRID)
754  {
755  if (meth != _meth)
756  {
757  meth = _meth;
758  if (rplan)
759  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan));
760  if (plan[0])
761  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0]));
762  if (plan[1])
763  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1]));
764  rplan = plan[0] = plan[1] = nullptr;
765  }
766  }
767  else
768  ret = UNKNOWN;
769  return ret;
770  }
771 
772  template <typename T>
773  static inline void
774  convert_packcomplex_1d (T *out, size_t nr, size_t nc,
775  octave_idx_type stride, octave_idx_type dist)
776  {
777  octave_quit ();
778 
779  // Fill in the missing data.
780 
781  for (size_t i = 0; i < nr; i++)
782  for (size_t j = nc/2+1; j < nc; j++)
783  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
784 
785  octave_quit ();
786  }
787 
788  template <typename T>
789  static inline void
790  convert_packcomplex_Nd (T *out, const dim_vector& dv)
791  {
792  size_t nc = dv(0);
793  size_t nr = dv(1);
794  size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
795  size_t nrp = nr * np;
796  T *ptr1, *ptr2;
797 
798  octave_quit ();
799 
800  // Create space for the missing elements.
801 
802  for (size_t i = 0; i < nrp; i++)
803  {
804  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
805  ptr2 = out + i * nc;
806  for (size_t j = 0; j < nc/2+1; j++)
807  *ptr2++ = *ptr1++;
808  }
809 
810  octave_quit ();
811 
812  // Fill in the missing data for the rank = 2 case directly for speed.
813 
814  for (size_t i = 0; i < np; i++)
815  {
816  for (size_t j = 1; j < nr; j++)
817  for (size_t k = nc/2+1; k < nc; k++)
818  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
819 
820  for (size_t j = nc/2+1; j < nc; j++)
821  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
822  }
823 
824  octave_quit ();
825 
826  // Now do the permutations needed for rank > 2 cases.
827 
828  size_t jstart = dv(0) * dv(1);
829  size_t kstep = dv(0);
830  size_t nel = dv.numel ();
831 
832  for (int inner = 2; inner < dv.ndims (); inner++)
833  {
834  size_t jmax = jstart * dv(inner);
835  for (size_t i = 0; i < nel; i+=jmax)
836  for (size_t j = jstart, jj = jmax-jstart; j < jj;
837  j+=jstart, jj-=jstart)
838  for (size_t k = 0; k < jstart; k+= kstep)
839  for (size_t l = nc/2+1; l < nc; l++)
840  {
841  T tmp = out[i+ j + k + l];
842  out[i + j + k + l] = out[i + jj + k + l];
843  out[i + jj + k + l] = tmp;
844  }
845  jstart = jmax;
846  }
847 
848  octave_quit ();
849  }
850 
851  int
852  fftw::fft (const double *in, Complex *out, size_t npts,
853  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
854  {
855  dist = (dist < 0 ? npts : dist);
856 
857  dim_vector dv (npts, 1);
858  void *vplan = fftw_planner::create_plan (1, dv, nsamples,
859  stride, dist, in, out);
860  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
861 
862  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
863  reinterpret_cast<fftw_complex *> (out));
864 
865  // Need to create other half of the transform.
866 
867  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
868 
869  return 0;
870  }
871 
872  int
873  fftw::fft (const Complex *in, Complex *out, size_t npts,
874  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
875  {
876  dist = (dist < 0 ? npts : dist);
877 
878  dim_vector dv (npts, 1);
879  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
880  nsamples, stride,
881  dist, in, out);
882  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
883 
884  fftw_execute_dft (plan,
885  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
886  reinterpret_cast<fftw_complex *> (out));
887 
888  return 0;
889  }
890 
891  int
892  fftw::ifft (const Complex *in, Complex *out, size_t npts,
893  size_t nsamples, octave_idx_type stride,
894  octave_idx_type dist)
895  {
896  dist = (dist < 0 ? npts : dist);
897 
898  dim_vector dv (npts, 1);
899  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
900  stride, dist, in, out);
901  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
902 
903  fftw_execute_dft (plan,
904  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
905  reinterpret_cast<fftw_complex *> (out));
906 
907  const Complex scale = npts;
908  for (size_t j = 0; j < nsamples; j++)
909  for (size_t i = 0; i < npts; i++)
910  out[i*stride + j*dist] /= scale;
911 
912  return 0;
913  }
914 
915  int
916  fftw::fftNd (const double *in, Complex *out, const int rank,
917  const dim_vector& dv)
918  {
919  octave_idx_type dist = 1;
920  for (int i = 0; i < rank; i++)
921  dist *= dv(i);
922 
923  // Fool with the position of the start of the output matrix, so that
924  // creating other half of the matrix won't cause cache problems.
925 
926  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
927 
928  void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
929  in, out + offset);
930  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
931 
932  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
933  reinterpret_cast<fftw_complex *> (out+ offset));
934 
935  // Need to create other half of the transform.
936 
937  convert_packcomplex_Nd (out, dv);
938 
939  return 0;
940  }
941 
942  int
943  fftw::fftNd (const Complex *in, Complex *out, const int rank,
944  const dim_vector& dv)
945  {
946  octave_idx_type dist = 1;
947  for (int i = 0; i < rank; i++)
948  dist *= dv(i);
949 
950  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
951  1, 1, dist, in, out);
952  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
953 
954  fftw_execute_dft (plan,
955  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
956  reinterpret_cast<fftw_complex *> (out));
957 
958  return 0;
959  }
960 
961  int
962  fftw::ifftNd (const Complex *in, Complex *out, const int rank,
963  const dim_vector& dv)
964  {
965  octave_idx_type dist = 1;
966  for (int i = 0; i < rank; i++)
967  dist *= dv(i);
968 
969  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
970  1, 1, dist, in, out);
971  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
972 
973  fftw_execute_dft (plan,
974  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
975  reinterpret_cast<fftw_complex *> (out));
976 
977  const size_t npts = dv.numel ();
978  const Complex scale = npts;
979  for (size_t i = 0; i < npts; i++)
980  out[i] /= scale;
981 
982  return 0;
983  }
984 
985  int
986  fftw::fft (const float *in, FloatComplex *out, size_t npts,
987  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
988  {
989  dist = (dist < 0 ? npts : dist);
990 
991  dim_vector dv (npts, 1);
992  void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
993  dist, in, out);
994  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
995 
996  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
997  reinterpret_cast<fftwf_complex *> (out));
998 
999  // Need to create other half of the transform.
1000 
1001  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1002 
1003  return 0;
1004  }
1005 
1006  int
1007  fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,
1008  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
1009  {
1010  dist = (dist < 0 ? npts : dist);
1011 
1012  dim_vector dv (npts, 1);
1013  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1014  nsamples, stride, dist,
1015  in, out);
1016  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1017 
1018  fftwf_execute_dft (plan,
1019  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1020  reinterpret_cast<fftwf_complex *> (out));
1021 
1022  return 0;
1023  }
1024 
1025  int
1026  fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,
1027  size_t nsamples, octave_idx_type stride,
1028  octave_idx_type dist)
1029  {
1030  dist = (dist < 0 ? npts : dist);
1031 
1032  dim_vector dv (npts, 1);
1033  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1034  nsamples, stride, dist,
1035  in, out);
1036  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1037 
1038  fftwf_execute_dft (plan,
1039  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1040  reinterpret_cast<fftwf_complex *> (out));
1041 
1042  const FloatComplex scale = npts;
1043  for (size_t j = 0; j < nsamples; j++)
1044  for (size_t i = 0; i < npts; i++)
1045  out[i*stride + j*dist] /= scale;
1046 
1047  return 0;
1048  }
1049 
1050  int
1051  fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1052  const dim_vector& dv)
1053  {
1054  octave_idx_type dist = 1;
1055  for (int i = 0; i < rank; i++)
1056  dist *= dv(i);
1057 
1058  // Fool with the position of the start of the output matrix, so that
1059  // creating other half of the matrix won't cause cache problems.
1060 
1061  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1062 
1063  void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1064  in, out + offset);
1065  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1066 
1067  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1068  reinterpret_cast<fftwf_complex *> (out+ offset));
1069 
1070  // Need to create other half of the transform.
1071 
1072  convert_packcomplex_Nd (out, dv);
1073 
1074  return 0;
1075  }
1076 
1077  int
1078  fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1079  const dim_vector& dv)
1080  {
1081  octave_idx_type dist = 1;
1082  for (int i = 0; i < rank; i++)
1083  dist *= dv(i);
1084 
1085  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1086  1, 1, dist, in, out);
1087  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1088 
1089  fftwf_execute_dft (plan,
1090  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1091  reinterpret_cast<fftwf_complex *> (out));
1092 
1093  return 0;
1094  }
1095 
1096  int
1097  fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1098  const dim_vector& dv)
1099  {
1100  octave_idx_type dist = 1;
1101  for (int i = 0; i < rank; i++)
1102  dist *= dv(i);
1103 
1104  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1105  1, 1, dist, in, out);
1106  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1107 
1108  fftwf_execute_dft (plan,
1109  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1110  reinterpret_cast<fftwf_complex *> (out));
1111 
1112  const size_t npts = dv.numel ();
1113  const FloatComplex scale = npts;
1114  for (size_t i = 0; i < npts; i++)
1115  out[i] /= scale;
1116 
1117  return 0;
1118  }
1119 
1120 #endif
1121 
1122  std::string
1124  {
1125 #if defined (HAVE_FFTW)
1127 #else
1128  return "none";
1129 #endif
1130  }
1131 
1132  std::string
1134  {
1135 #if defined (HAVE_FFTW)
1137 #else
1138  return "none";
1139 #endif
1140  }
1141 }
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217
static F77_INT nn
Definition: DASPK.cc:66
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:95
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:401
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:334
static fftw_planner * instance
Definition: oct-fftw.h:116
dim_vector n[2]
Definition: oct-fftw.h:155
octave_idx_type d[2]
Definition: oct-fftw.h:143
octave_idx_type rh
Definition: oct-fftw.h:173
FftwMethod do_method(void)
Definition: oct-fftw.cc:390
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:71
static int threads(void)
Definition: oct-fftw.h:109
octave_idx_type h[2]
Definition: oct-fftw.h:152
octave_idx_type s[2]
Definition: oct-fftw.h:146
octave_idx_type rd
Definition: oct-fftw.h:164
octave_idx_type rs
Definition: oct-fftw.h:167
static bool instance_ok(void)
Definition: oct-fftw.cc:111
static void cleanup_instance(void)
Definition: oct-fftw.h:118
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:147
FftwMethod meth
Definition: oct-fftw.h:135
static int fft(const double *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:852
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:916
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:962
static int ifft(const Complex *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:892
static void cleanup_instance(void)
Definition: oct-fftw.h:264
octave_idx_type s[2]
Definition: oct-fftw.h:292
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:498
static bool instance_ok(void)
Definition: oct-fftw.cc:465
octave_idx_type rd
Definition: oct-fftw.h:310
FftwMethod do_method(void)
Definition: oct-fftw.cc:742
static int threads(void)
Definition: oct-fftw.h:255
octave_idx_type rs
Definition: oct-fftw.h:313
octave_idx_type d[2]
Definition: oct-fftw.h:289
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:217
octave_idx_type rh
Definition: oct-fftw.h:319
octave_idx_type h[2]
Definition: oct-fftw.h:298
static float_fftw_planner * instance
Definition: oct-fftw.h:262
static void add(fptr f)
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5846
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:790
std::string fftw_version(void)
Definition: oct-fftw.cc:1123
std::string fftwf_version(void)
Definition: oct-fftw.cc:1133
static void convert_packcomplex_1d(T *out, size_t nr, size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:774
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37
std::complex< double > Complex
Definition: oct-cmplx.h:33
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:143
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811