GNU Octave  8.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-fftw.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2001-2023 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #if defined (HAVE_FFTW3_H)
31 # include <fftw3.h>
32 #endif
33 
34 #include "lo-error.h"
35 #include "oct-fftw.h"
36 #include "oct-locbuf.h"
37 #include "quit.h"
38 #include "singleton-cleanup.h"
39 
40 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41 # include "nproc-wrapper.h"
42 #endif
43 
45 
46 #if defined (HAVE_FFTW)
47 
49 
50 // Helper class to create and cache FFTW plans for both 1D and
51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
52 // the plans, which in theory is suboptimal, but provides quite
53 // reasonable performance in practice.
54 
55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56 // will destroy the input and output arrays. We must, therefore, create a
57 // temporary input array with the same size and 16-byte alignment as
58 // the original array when using a different planner strategy.
59 // Note that we also use any wisdom that is available, either in a
60 // FFTW3 system wide file or as supplied by the user.
61 
62 // FIXME: if we can ensure 16 byte alignment in Array<T>
63 // (<T> *data) the FFTW3 can use SIMD instructions for further
64 // acceleration.
65 
66 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
67 
69  : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
70  m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
71 {
72  m_plan[0] = m_plan[1] = nullptr;
73  m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
74  m_simd_align[0] = m_simd_align[1] = false;
75  m_inplace[0] = m_inplace[1] = false;
76  m_n[0] = m_n[1] = dim_vector ();
77 
78 #if defined (HAVE_FFTW3_THREADS)
79  int init_ret = fftw_init_threads ();
80  if (! init_ret)
81  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82 
83  // Check number of processors available to the current process
84  m_nthreads =
86 
87  // Limit number of threads to 3 by default
88  // See: https://octave.discourse.group/t/3121
89  // This can be later changed with fftw ("threads", nthreads).
90  if (m_nthreads > 3)
91  m_nthreads = 3;
92 
93  fftw_plan_with_nthreads (m_nthreads);
94 #endif
95 
96  // If we have a system wide wisdom file, import it.
97  fftw_import_system_wisdom ();
98 }
99 
101 {
102  fftw_plan *plan_p;
103 
104  plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
105  if (*plan_p)
106  fftw_destroy_plan (*plan_p);
107 
108  plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]);
109  if (*plan_p)
110  fftw_destroy_plan (*plan_p);
111 
112  plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]);
113  if (*plan_p)
114  fftw_destroy_plan (*plan_p);
115 }
116 
117 bool
119 {
120  bool retval = true;
121 
122  if (! s_instance)
123  {
124  s_instance = new fftw_planner ();
126  }
127 
128  return retval;
129 }
130 
131 void
133 {
134 #if defined (HAVE_FFTW3_THREADS)
135  if (instance_ok () && nt != threads ())
136  {
137  s_instance->m_nthreads = nt;
138  fftw_plan_with_nthreads (nt);
139  // Clear the current plans.
140  s_instance->m_rplan = nullptr;
141  s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
142  }
143 #else
144  octave_unused_parameter (nt);
145 
146  (*current_liboctave_warning_handler)
147  ("unable to change number of threads without FFTW thread support");
148 #endif
149 }
150 
151 #define CHECK_SIMD_ALIGNMENT(x) \
152  (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)
153 
154 void *
155 fftw_planner::do_create_plan (int dir, const int rank,
156  const dim_vector& dims,
157  octave_idx_type howmany,
158  octave_idx_type stride,
159  octave_idx_type dist,
160  const Complex *in, Complex *out)
161 {
162  int which = (dir == FFTW_FORWARD) ? 0 : 1;
163  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]);
164  bool create_new_plan = false;
165  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
166  bool ioinplace = (in == out);
167 
168  // Don't create a new plan if we have a non SIMD plan already but
169  // can do SIMD. This prevents endlessly recreating plans if we
170  // change the alignment.
171 
172  if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
173  || m_r[which] != rank || m_h[which] != howmany
174  || ioinplace != m_inplace[which]
175  || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
176  create_new_plan = true;
177  else
178  {
179  // We still might not have the same shape of array.
180 
181  for (int i = 0; i < rank; i++)
182  if (dims(i) != m_n[which](i))
183  {
184  create_new_plan = true;
185  break;
186  }
187  }
188 
189  if (create_new_plan)
190  {
191  m_d[which] = dist;
192  m_s[which] = stride;
193  m_r[which] = rank;
194  m_h[which] = howmany;
195  m_simd_align[which] = ioalign;
196  m_inplace[which] = ioinplace;
197  m_n[which] = dims;
198 
199  // Note reversal of dimensions for column major storage in FFTW.
200  octave_idx_type nn = 1;
201  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
202 
203  for (int i = 0, j = rank-1; i < rank; i++, j--)
204  {
205  tmp[i] = dims(j);
206  nn *= dims(j);
207  }
208 
209  int plan_flags = 0;
210  bool plan_destroys_in = true;
211 
212  switch (m_meth)
213  {
214  case UNKNOWN:
215  case ESTIMATE:
216  plan_flags |= FFTW_ESTIMATE;
217  plan_destroys_in = false;
218  break;
219  case MEASURE:
220  plan_flags |= FFTW_MEASURE;
221  break;
222  case PATIENT:
223  plan_flags |= FFTW_PATIENT;
224  break;
225  case EXHAUSTIVE:
226  plan_flags |= FFTW_EXHAUSTIVE;
227  break;
228  case HYBRID:
229  if (nn < 8193)
230  plan_flags |= FFTW_MEASURE;
231  else
232  {
233  plan_flags |= FFTW_ESTIMATE;
234  plan_destroys_in = false;
235  }
236  break;
237  }
238 
239  if (ioalign)
240  plan_flags &= ~FFTW_UNALIGNED;
241  else
242  plan_flags |= FFTW_UNALIGNED;
243 
244  if (*cur_plan_p)
245  fftw_destroy_plan (*cur_plan_p);
246 
247  if (plan_destroys_in)
248  {
249  // Create matrix with the same size and 16-byte alignment as input
250  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
251  itmp = reinterpret_cast<Complex *>
252  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
253  ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
254 
255  *cur_plan_p
256  = fftw_plan_many_dft (rank, tmp, howmany,
257  reinterpret_cast<fftw_complex *> (itmp),
258  nullptr, stride, dist,
259  reinterpret_cast<fftw_complex *> (out),
260  nullptr, stride, dist, dir, plan_flags);
261  }
262  else
263  {
264  *cur_plan_p
265  = fftw_plan_many_dft (rank, tmp, howmany,
266  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
267  nullptr, stride, dist,
268  reinterpret_cast<fftw_complex *> (out),
269  nullptr, stride, dist, dir, plan_flags);
270  }
271 
272  if (*cur_plan_p == nullptr)
273  (*current_liboctave_error_handler) ("Error creating FFTW plan");
274  }
275 
276  return *cur_plan_p;
277 }
278 
279 void *
280 fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
281  octave_idx_type howmany,
282  octave_idx_type stride,
283  octave_idx_type dist,
284  const double *in, Complex *out)
285 {
286  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
287  bool create_new_plan = false;
288  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
289 
290  // Don't create a new plan if we have a non SIMD plan already but
291  // can do SIMD. This prevents endlessly recreating plans if we
292  // change the alignment.
293 
294  if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
295  || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
296  create_new_plan = true;
297  else
298  {
299  // We still might not have the same shape of array.
300 
301  for (int i = 0; i < rank; i++)
302  if (dims(i) != m_rn(i))
303  {
304  create_new_plan = true;
305  break;
306  }
307  }
308 
309  if (create_new_plan)
310  {
311  m_rd = dist;
312  m_rs = stride;
313  m_rr = rank;
314  m_rh = howmany;
315  m_rsimd_align = ioalign;
316  m_rn = dims;
317 
318  // Note reversal of dimensions for column major storage in FFTW.
319  octave_idx_type nn = 1;
320  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
321 
322  for (int i = 0, j = rank-1; i < rank; i++, j--)
323  {
324  tmp[i] = dims(j);
325  nn *= dims(j);
326  }
327 
328  int plan_flags = 0;
329  bool plan_destroys_in = true;
330 
331  switch (m_meth)
332  {
333  case UNKNOWN:
334  case ESTIMATE:
335  plan_flags |= FFTW_ESTIMATE;
336  plan_destroys_in = false;
337  break;
338  case MEASURE:
339  plan_flags |= FFTW_MEASURE;
340  break;
341  case PATIENT:
342  plan_flags |= FFTW_PATIENT;
343  break;
344  case EXHAUSTIVE:
345  plan_flags |= FFTW_EXHAUSTIVE;
346  break;
347  case HYBRID:
348  if (nn < 8193)
349  plan_flags |= FFTW_MEASURE;
350  else
351  {
352  plan_flags |= FFTW_ESTIMATE;
353  plan_destroys_in = false;
354  }
355  break;
356  }
357 
358  if (ioalign)
359  plan_flags &= ~FFTW_UNALIGNED;
360  else
361  plan_flags |= FFTW_UNALIGNED;
362 
363  if (*cur_plan_p)
364  fftw_destroy_plan (*cur_plan_p);
365 
366  if (plan_destroys_in)
367  {
368  // Create matrix with the same size and 16-byte alignment as input
369  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
370  itmp = reinterpret_cast<double *>
371  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
372  ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
373 
374  *cur_plan_p
375  = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
376  nullptr, stride, dist,
377  reinterpret_cast<fftw_complex *> (out),
378  nullptr, stride, dist, plan_flags);
379  }
380  else
381  {
382  *cur_plan_p
383  = fftw_plan_many_dft_r2c (rank, tmp, howmany,
384  (const_cast<double *> (in)),
385  nullptr, stride, dist,
386  reinterpret_cast<fftw_complex *> (out),
387  nullptr, stride, dist, plan_flags);
388  }
389 
390  if (*cur_plan_p == nullptr)
391  (*current_liboctave_error_handler) ("Error creating FFTW plan");
392  }
393 
394  return *cur_plan_p;
395 }
396 
399 {
400  return m_meth;
401 }
402 
405 {
406  FftwMethod ret = m_meth;
407  if (_meth == ESTIMATE || _meth == MEASURE
408  || _meth == PATIENT || _meth == EXHAUSTIVE
409  || _meth == HYBRID)
410  {
411  if (m_meth != _meth)
412  {
413  m_meth = _meth;
414  if (m_rplan)
415  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan));
416  if (m_plan[0])
417  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0]));
418  if (m_plan[1])
419  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1]));
420  m_rplan = m_plan[0] = m_plan[1] = nullptr;
421  }
422  }
423  else
424  ret = UNKNOWN;
425  return ret;
426 }
427 
429 
431  : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
432  m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
433 {
434  m_plan[0] = m_plan[1] = nullptr;
435  m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
436  m_simd_align[0] = m_simd_align[1] = false;
437  m_inplace[0] = m_inplace[1] = false;
438  m_n[0] = m_n[1] = dim_vector ();
439 
440 #if defined (HAVE_FFTW3F_THREADS)
441  int init_ret = fftwf_init_threads ();
442  if (! init_ret)
443  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
444 
445  // Use number of processors available to the current process
446  // This can be later changed with fftw ("threads", nthreads).
447  m_nthreads =
449 
450  fftwf_plan_with_nthreads (m_nthreads);
451 #endif
452 
453  // If we have a system wide wisdom file, import it.
454  fftwf_import_system_wisdom ();
455 }
456 
458 {
459  fftwf_plan *plan_p;
460 
461  plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
462  if (*plan_p)
463  fftwf_destroy_plan (*plan_p);
464 
465  plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]);
466  if (*plan_p)
467  fftwf_destroy_plan (*plan_p);
468 
469  plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]);
470  if (*plan_p)
471  fftwf_destroy_plan (*plan_p);
472 }
473 
474 bool
476 {
477  bool retval = true;
478 
479  if (! s_instance)
480  {
483  }
484 
485  return retval;
486 }
487 
488 void
490 {
491 #if defined (HAVE_FFTW3F_THREADS)
492  if (instance_ok () && nt != threads ())
493  {
494  s_instance->m_nthreads = nt;
495  fftwf_plan_with_nthreads (nt);
496  // Clear the current plans.
497  s_instance->m_rplan = nullptr;
498  s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
499  }
500 #else
501  octave_unused_parameter (nt);
502 
503  (*current_liboctave_warning_handler)
504  ("unable to change number of threads without FFTW thread support");
505 #endif
506 }
507 
508 void *
509 float_fftw_planner::do_create_plan (int dir, const int rank,
510  const dim_vector& dims,
511  octave_idx_type howmany,
512  octave_idx_type stride,
513  octave_idx_type dist,
514  const FloatComplex *in,
515  FloatComplex *out)
516 {
517  int which = (dir == FFTW_FORWARD) ? 0 : 1;
518  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]);
519  bool create_new_plan = false;
520  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
521  bool ioinplace = (in == out);
522 
523  // Don't create a new plan if we have a non SIMD plan already but
524  // can do SIMD. This prevents endlessly recreating plans if we
525  // change the alignment.
526 
527  if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
528  || m_r[which] != rank || m_h[which] != howmany
529  || ioinplace != m_inplace[which]
530  || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
531  create_new_plan = true;
532  else
533  {
534  // We still might not have the same shape of array.
535 
536  for (int i = 0; i < rank; i++)
537  if (dims(i) != m_n[which](i))
538  {
539  create_new_plan = true;
540  break;
541  }
542  }
543 
544  if (create_new_plan)
545  {
546  m_d[which] = dist;
547  m_s[which] = stride;
548  m_r[which] = rank;
549  m_h[which] = howmany;
550  m_simd_align[which] = ioalign;
551  m_inplace[which] = ioinplace;
552  m_n[which] = dims;
553 
554  // Note reversal of dimensions for column major storage in FFTW.
555  octave_idx_type nn = 1;
556  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
557 
558  for (int i = 0, j = rank-1; i < rank; i++, j--)
559  {
560  tmp[i] = dims(j);
561  nn *= dims(j);
562  }
563 
564  int plan_flags = 0;
565  bool plan_destroys_in = true;
566 
567  switch (m_meth)
568  {
569  case UNKNOWN:
570  case ESTIMATE:
571  plan_flags |= FFTW_ESTIMATE;
572  plan_destroys_in = false;
573  break;
574  case MEASURE:
575  plan_flags |= FFTW_MEASURE;
576  break;
577  case PATIENT:
578  plan_flags |= FFTW_PATIENT;
579  break;
580  case EXHAUSTIVE:
581  plan_flags |= FFTW_EXHAUSTIVE;
582  break;
583  case HYBRID:
584  if (nn < 8193)
585  plan_flags |= FFTW_MEASURE;
586  else
587  {
588  plan_flags |= FFTW_ESTIMATE;
589  plan_destroys_in = false;
590  }
591  break;
592  }
593 
594  if (ioalign)
595  plan_flags &= ~FFTW_UNALIGNED;
596  else
597  plan_flags |= FFTW_UNALIGNED;
598 
599  if (*cur_plan_p)
600  fftwf_destroy_plan (*cur_plan_p);
601 
602  if (plan_destroys_in)
603  {
604  // Create matrix with the same size and 16-byte alignment as input
605  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
606  itmp = reinterpret_cast<FloatComplex *>
607  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
608  ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
609 
610  *cur_plan_p
611  = fftwf_plan_many_dft (rank, tmp, howmany,
612  reinterpret_cast<fftwf_complex *> (itmp),
613  nullptr, stride, dist,
614  reinterpret_cast<fftwf_complex *> (out),
615  nullptr, stride, dist, dir, plan_flags);
616  }
617  else
618  {
619  *cur_plan_p
620  = fftwf_plan_many_dft (rank, tmp, howmany,
621  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
622  nullptr, stride, dist,
623  reinterpret_cast<fftwf_complex *> (out),
624  nullptr, stride, dist, dir, plan_flags);
625  }
626 
627  if (*cur_plan_p == nullptr)
628  (*current_liboctave_error_handler) ("Error creating FFTW plan");
629  }
630 
631  return *cur_plan_p;
632 }
633 
634 void *
635 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
636  octave_idx_type howmany,
637  octave_idx_type stride,
638  octave_idx_type dist,
639  const float *in, FloatComplex *out)
640 {
641  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
642  bool create_new_plan = false;
643  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
644 
645  // Don't create a new plan if we have a non SIMD plan already but
646  // can do SIMD. This prevents endlessly recreating plans if we
647  // change the alignment.
648 
649  if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
650  || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
651  create_new_plan = true;
652  else
653  {
654  // We still might not have the same shape of array.
655 
656  for (int i = 0; i < rank; i++)
657  if (dims(i) != m_rn(i))
658  {
659  create_new_plan = true;
660  break;
661  }
662  }
663 
664  if (create_new_plan)
665  {
666  m_rd = dist;
667  m_rs = stride;
668  m_rr = rank;
669  m_rh = howmany;
670  m_rsimd_align = ioalign;
671  m_rn = dims;
672 
673  // Note reversal of dimensions for column major storage in FFTW.
674  octave_idx_type nn = 1;
675  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
676 
677  for (int i = 0, j = rank-1; i < rank; i++, j--)
678  {
679  tmp[i] = dims(j);
680  nn *= dims(j);
681  }
682 
683  int plan_flags = 0;
684  bool plan_destroys_in = true;
685 
686  switch (m_meth)
687  {
688  case UNKNOWN:
689  case ESTIMATE:
690  plan_flags |= FFTW_ESTIMATE;
691  plan_destroys_in = false;
692  break;
693  case MEASURE:
694  plan_flags |= FFTW_MEASURE;
695  break;
696  case PATIENT:
697  plan_flags |= FFTW_PATIENT;
698  break;
699  case EXHAUSTIVE:
700  plan_flags |= FFTW_EXHAUSTIVE;
701  break;
702  case HYBRID:
703  if (nn < 8193)
704  plan_flags |= FFTW_MEASURE;
705  else
706  {
707  plan_flags |= FFTW_ESTIMATE;
708  plan_destroys_in = false;
709  }
710  break;
711  }
712 
713  if (ioalign)
714  plan_flags &= ~FFTW_UNALIGNED;
715  else
716  plan_flags |= FFTW_UNALIGNED;
717 
718  if (*cur_plan_p)
719  fftwf_destroy_plan (*cur_plan_p);
720 
721  if (plan_destroys_in)
722  {
723  // Create matrix with the same size and 16-byte alignment as input
724  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
725  itmp = reinterpret_cast<float *>
726  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
727  ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
728 
729  *cur_plan_p
730  = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
731  nullptr, stride, dist,
732  reinterpret_cast<fftwf_complex *> (out),
733  nullptr, stride, dist, plan_flags);
734  }
735  else
736  {
737  *cur_plan_p
738  = fftwf_plan_many_dft_r2c (rank, tmp, howmany,
739  (const_cast<float *> (in)),
740  nullptr, stride, dist,
741  reinterpret_cast<fftwf_complex *> (out),
742  nullptr, stride, dist, plan_flags);
743  }
744 
745  if (*cur_plan_p == nullptr)
746  (*current_liboctave_error_handler) ("Error creating FFTW plan");
747  }
748 
749  return *cur_plan_p;
750 }
751 
754 {
755  return m_meth;
756 }
757 
760 {
761  FftwMethod ret = m_meth;
762  if (_meth == ESTIMATE || _meth == MEASURE
763  || _meth == PATIENT || _meth == EXHAUSTIVE
764  || _meth == HYBRID)
765  {
766  if (m_meth != _meth)
767  {
768  m_meth = _meth;
769  if (m_rplan)
770  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan));
771  if (m_plan[0])
772  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0]));
773  if (m_plan[1])
774  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1]));
775  m_rplan = m_plan[0] = m_plan[1] = nullptr;
776  }
777  }
778  else
779  ret = UNKNOWN;
780  return ret;
781 }
782 
783 template <typename T>
784 static inline void
785 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,
786  octave_idx_type stride, octave_idx_type dist)
787 {
788  octave_quit ();
789 
790  // Fill in the missing data.
791 
792  for (std::size_t i = 0; i < nr; i++)
793  for (std::size_t j = nc/2+1; j < nc; j++)
794  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
795 
796  octave_quit ();
797 }
798 
799 template <typename T>
800 static inline void
802 {
803  std::size_t nc = dv(0);
804  std::size_t nr = dv(1);
805  std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
806  std::size_t nrp = nr * np;
807  T *ptr1, *ptr2;
808 
809  octave_quit ();
810 
811  // Create space for the missing elements.
812 
813  for (std::size_t i = 0; i < nrp; i++)
814  {
815  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
816  ptr2 = out + i * nc;
817  for (std::size_t j = 0; j < nc/2+1; j++)
818  *ptr2++ = *ptr1++;
819  }
820 
821  octave_quit ();
822 
823  // Fill in the missing data for the rank = 2 case directly for speed.
824 
825  for (std::size_t i = 0; i < np; i++)
826  {
827  for (std::size_t j = 1; j < nr; j++)
828  for (std::size_t k = nc/2+1; k < nc; k++)
829  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
830 
831  for (std::size_t j = nc/2+1; j < nc; j++)
832  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
833  }
834 
835  octave_quit ();
836 
837  // Now do the permutations needed for rank > 2 cases.
838 
839  std::size_t jstart = dv(0) * dv(1);
840  std::size_t kstep = dv(0);
841  std::size_t nel = dv.numel ();
842 
843  for (int inner = 2; inner < dv.ndims (); inner++)
844  {
845  std::size_t jmax = jstart * dv(inner);
846  for (std::size_t i = 0; i < nel; i+=jmax)
847  for (std::size_t j = jstart, jj = jmax-jstart; j < jj;
848  j+=jstart, jj-=jstart)
849  for (std::size_t k = 0; k < jstart; k+= kstep)
850  for (std::size_t l = nc/2+1; l < nc; l++)
851  {
852  T tmp = out[i+ j + k + l];
853  out[i + j + k + l] = out[i + jj + k + l];
854  out[i + jj + k + l] = tmp;
855  }
856  jstart = jmax;
857  }
858 
859  octave_quit ();
860 }
861 
862 int
863 fftw::fft (const double *in, Complex *out, std::size_t npts,
864  std::size_t nsamples, octave_idx_type stride,
865  octave_idx_type dist)
866 {
867  dist = (dist < 0 ? npts : dist);
868 
869  dim_vector dv (npts, 1);
870  void *vplan = fftw_planner::create_plan (1, dv, nsamples,
871  stride, dist, in, out);
872  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
873 
874  fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),
875  reinterpret_cast<fftw_complex *> (out));
876 
877  // Need to create other half of the transform.
878 
879  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
880 
881  return 0;
882 }
883 
884 int
885 fftw::fft (const Complex *in, Complex *out, std::size_t npts,
886  std::size_t nsamples, octave_idx_type stride,
887  octave_idx_type dist)
888 {
889  dist = (dist < 0 ? npts : dist);
890 
891  dim_vector dv (npts, 1);
892  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
893  nsamples, stride,
894  dist, in, out);
895  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
896 
897  fftw_execute_dft (m_plan,
898  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
899  reinterpret_cast<fftw_complex *> (out));
900 
901  return 0;
902 }
903 
904 int
905 fftw::ifft (const Complex *in, Complex *out, std::size_t npts,
906  std::size_t nsamples, octave_idx_type stride,
907  octave_idx_type dist)
908 {
909  dist = (dist < 0 ? npts : dist);
910 
911  dim_vector dv (npts, 1);
912  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
913  stride, dist, in, out);
914  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
915 
916  fftw_execute_dft (m_plan,
917  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
918  reinterpret_cast<fftw_complex *> (out));
919 
920  const Complex scale = npts;
921  for (std::size_t j = 0; j < nsamples; j++)
922  for (std::size_t i = 0; i < npts; i++)
923  out[i*stride + j*dist] /= scale;
924 
925  return 0;
926 }
927 
928 int
929 fftw::fftNd (const double *in, Complex *out, const int rank,
930  const dim_vector& dv)
931 {
932  octave_idx_type dist = 1;
933  for (int i = 0; i < rank; i++)
934  dist *= dv(i);
935 
936  // Fool with the position of the start of the output matrix, so that
937  // creating other half of the matrix won't cause cache problems.
938 
939  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
940 
941  void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
942  in, out + offset);
943  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
944 
945  fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),
946  reinterpret_cast<fftw_complex *> (out+ offset));
947 
948  // Need to create other half of the transform.
949 
950  convert_packcomplex_Nd (out, dv);
951 
952  return 0;
953 }
954 
955 int
956 fftw::fftNd (const Complex *in, Complex *out, const int rank,
957  const dim_vector& dv)
958 {
959  octave_idx_type dist = 1;
960  for (int i = 0; i < rank; i++)
961  dist *= dv(i);
962 
963  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
964  1, 1, dist, in, out);
965  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
966 
967  fftw_execute_dft (m_plan,
968  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
969  reinterpret_cast<fftw_complex *> (out));
970 
971  return 0;
972 }
973 
974 int
975 fftw::ifftNd (const Complex *in, Complex *out, const int rank,
976  const dim_vector& dv)
977 {
978  octave_idx_type dist = 1;
979  for (int i = 0; i < rank; i++)
980  dist *= dv(i);
981 
982  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
983  1, 1, dist, in, out);
984  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
985 
986  fftw_execute_dft (m_plan,
987  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
988  reinterpret_cast<fftw_complex *> (out));
989 
990  const std::size_t npts = dv.numel ();
991  const Complex scale = npts;
992  for (std::size_t i = 0; i < npts; i++)
993  out[i] /= scale;
994 
995  return 0;
996 }
997 
998 int
999 fftw::fft (const float *in, FloatComplex *out, std::size_t npts,
1000  std::size_t nsamples, octave_idx_type stride,
1001  octave_idx_type dist)
1002 {
1003  dist = (dist < 0 ? npts : dist);
1004 
1005  dim_vector dv (npts, 1);
1006  void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
1007  dist, in, out);
1008  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1009 
1010  fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),
1011  reinterpret_cast<fftwf_complex *> (out));
1012 
1013  // Need to create other half of the transform.
1014 
1015  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1016 
1017  return 0;
1018 }
1019 
1020 int
1021 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1022  std::size_t nsamples, octave_idx_type stride,
1023  octave_idx_type dist)
1024 {
1025  dist = (dist < 0 ? npts : dist);
1026 
1027  dim_vector dv (npts, 1);
1028  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1029  nsamples, stride, dist,
1030  in, out);
1031  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1032 
1033  fftwf_execute_dft (m_plan,
1034  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1035  reinterpret_cast<fftwf_complex *> (out));
1036 
1037  return 0;
1038 }
1039 
1040 int
1041 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1042  std::size_t nsamples, octave_idx_type stride,
1043  octave_idx_type dist)
1044 {
1045  dist = (dist < 0 ? npts : dist);
1046 
1047  dim_vector dv (npts, 1);
1048  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1049  nsamples, stride, dist,
1050  in, out);
1051  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1052 
1053  fftwf_execute_dft (m_plan,
1054  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1055  reinterpret_cast<fftwf_complex *> (out));
1056 
1057  const FloatComplex scale = npts;
1058  for (std::size_t j = 0; j < nsamples; j++)
1059  for (std::size_t i = 0; i < npts; i++)
1060  out[i*stride + j*dist] /= scale;
1061 
1062  return 0;
1063 }
1064 
1065 int
1066 fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1067  const dim_vector& dv)
1068 {
1069  octave_idx_type dist = 1;
1070  for (int i = 0; i < rank; i++)
1071  dist *= dv(i);
1072 
1073  // Fool with the position of the start of the output matrix, so that
1074  // creating other half of the matrix won't cause cache problems.
1075 
1076  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1077 
1078  void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1079  in, out + offset);
1080  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1081 
1082  fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),
1083  reinterpret_cast<fftwf_complex *> (out+ offset));
1084 
1085  // Need to create other half of the transform.
1086 
1087  convert_packcomplex_Nd (out, dv);
1088 
1089  return 0;
1090 }
1091 
1092 int
1093 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1094  const dim_vector& dv)
1095 {
1096  octave_idx_type dist = 1;
1097  for (int i = 0; i < rank; i++)
1098  dist *= dv(i);
1099 
1100  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1101  1, 1, dist, in, out);
1102  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1103 
1104  fftwf_execute_dft (m_plan,
1105  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1106  reinterpret_cast<fftwf_complex *> (out));
1107 
1108  return 0;
1109 }
1110 
1111 int
1112 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1113  const dim_vector& dv)
1114 {
1115  octave_idx_type dist = 1;
1116  for (int i = 0; i < rank; i++)
1117  dist *= dv(i);
1118 
1119  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1120  1, 1, dist, in, out);
1121  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1122 
1123  fftwf_execute_dft (m_plan,
1124  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1125  reinterpret_cast<fftwf_complex *> (out));
1126 
1127  const std::size_t npts = dv.numel ();
1128  const FloatComplex scale = npts;
1129  for (std::size_t i = 0; i < npts; i++)
1130  out[i] /= scale;
1131 
1132  return 0;
1133 }
1134 
1135 #endif
1136 
1137 std::string
1139 {
1140 #if defined (HAVE_FFTW)
1142 #else
1143  return "none";
1144 #endif
1145 }
1146 
1147 std::string
1149 {
1150 #if defined (HAVE_FFTW)
1152 #else
1153  return "none";
1154 #endif
1155 }
1156 
OCTAVE_END_NAMESPACE(octave)
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217
static F77_INT nn
Definition: DASPK.cc:66
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:335
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:257
static fftw_planner * s_instance
Definition: oct-fftw.h:116
octave_idx_type m_rh
Definition: oct-fftw.h:174
static void cleanup_instance(void)
Definition: oct-fftw.h:118
octave_idx_type m_rs
Definition: oct-fftw.h:168
dim_vector m_rn
Definition: oct-fftw.h:177
bool m_rsimd_align
Definition: oct-fftw.h:179
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:155
int m_nthreads
Definition: oct-fftw.h:183
octave_idx_type m_d[2]
Definition: oct-fftw.h:144
static bool instance_ok(void)
Definition: oct-fftw.cc:118
static int threads(void)
Definition: oct-fftw.h:109
octave_idx_type m_h[2]
Definition: oct-fftw.h:153
bool m_inplace[2]
Definition: oct-fftw.h:159
bool m_simd_align[2]
Definition: oct-fftw.h:158
void * m_rplan
Definition: oct-fftw.h:162
fftw_planner(void)
Definition: oct-fftw.cc:68
octave_idx_type m_s[2]
Definition: oct-fftw.h:147
FftwMethod do_method(void)
Definition: oct-fftw.cc:398
~fftw_planner(void)
Definition: oct-fftw.cc:100
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:71
FftwMethod m_meth
Definition: oct-fftw.h:136
dim_vector m_n[2]
Definition: oct-fftw.h:156
int m_r[2]
Definition: oct-fftw.h:150
octave_idx_type m_rd
Definition: oct-fftw.h:165
void * m_plan[2]
Definition: oct-fftw.h:141
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:975
static int fft(const double *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:863
static int ifft(const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:905
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:929
void * m_plan[2]
Definition: oct-fftw.h:288
bool m_simd_align[2]
Definition: oct-fftw.h:305
~float_fftw_planner(void)
Definition: oct-fftw.cc:457
float_fftw_planner(void)
Definition: oct-fftw.cc:430
static bool instance_ok(void)
Definition: oct-fftw.cc:475
octave_idx_type m_rs
Definition: oct-fftw.h:315
octave_idx_type m_s[2]
Definition: oct-fftw.h:294
static void cleanup_instance(void)
Definition: oct-fftw.h:265
octave_idx_type m_d[2]
Definition: oct-fftw.h:291
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:218
dim_vector m_rn
Definition: oct-fftw.h:324
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:509
FftwMethod do_method(void)
Definition: oct-fftw.cc:753
dim_vector m_n[2]
Definition: oct-fftw.h:303
static int threads(void)
Definition: oct-fftw.h:256
octave_idx_type m_h[2]
Definition: oct-fftw.h:300
bool m_inplace[2]
Definition: oct-fftw.h:306
FftwMethod m_meth
Definition: oct-fftw.h:283
octave_idx_type m_rh
Definition: oct-fftw.h:321
octave_idx_type m_rd
Definition: oct-fftw.h:312
static float_fftw_planner * s_instance
Definition: oct-fftw.h:263
static void add(fptr f)
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5851
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37
std::complex< double > Complex
Definition: oct-cmplx.h:33
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34
static void convert_packcomplex_1d(T *out, std::size_t nr, std::size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:785
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:801
std::string fftwf_version(void)
Definition: oct-fftw.cc:1148
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:151
std::string fftw_version(void)
Definition: oct-fftw.cc:1138
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44