GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-fftw.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2001-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #if defined (HAVE_FFTW3_H)
31 # include <fftw3.h>
32 #endif
33 
34 #include "lo-error.h"
35 #include "oct-fftw.h"
36 #include "oct-locbuf.h"
37 #include "quit.h"
38 #include "singleton-cleanup.h"
39 
40 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41 # include "nproc-wrapper.h"
42 #endif
43 
45 
46 #if defined (HAVE_FFTW)
47 
48 fftw_planner *fftw_planner::s_instance = nullptr;
49 
50 // Helper class to create and cache FFTW plans for both 1D and
51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
52 // the plans, which in theory is suboptimal, but provides quite
53 // reasonable performance in practice.
54 
55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56 // will destroy the input and output arrays. We must, therefore, create a
57 // temporary input array with the same size and 16-byte alignment as
58 // the original array when using a different planner strategy.
59 // Note that we also use any wisdom that is available, either in a
60 // FFTW3 system wide file or as supplied by the user.
61 
62 // FIXME: if we can ensure 16 byte alignment in Array<T>
63 // (<T> *data) the FFTW3 can use SIMD instructions for further
64 // acceleration.
65 
66 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
67 
69  : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
70  m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
71 {
72  m_plan[0] = m_plan[1] = nullptr;
73  m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
74  m_simd_align[0] = m_simd_align[1] = false;
75  m_inplace[0] = m_inplace[1] = false;
76  m_n[0] = m_n[1] = dim_vector ();
77 
78 #if defined (HAVE_FFTW3_THREADS)
79  int init_ret = fftw_init_threads ();
80  if (! init_ret)
81  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82 
83  // Check number of processors available to the current process
84  m_nthreads =
86 
87  // Limit number of threads to 3 by default
88  // See: https://octave.discourse.group/t/3121
89  // This can be later changed with fftw ("threads", nthreads).
90  if (m_nthreads > 3)
91  m_nthreads = 3;
92 
93  fftw_plan_with_nthreads (m_nthreads);
94 #endif
95 
96  // If we have a system wide wisdom file, import it.
97  fftw_import_system_wisdom ();
98 }
99 
101 {
102  fftw_plan *plan_p;
103 
104  plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
105  if (*plan_p)
106  fftw_destroy_plan (*plan_p);
107 
108  plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]);
109  if (*plan_p)
110  fftw_destroy_plan (*plan_p);
111 
112  plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]);
113  if (*plan_p)
114  fftw_destroy_plan (*plan_p);
115 }
116 
117 bool
119 {
120  bool retval = true;
121 
122  if (! s_instance)
123  {
124  s_instance = new fftw_planner ();
125  singleton_cleanup_list::add (cleanup_instance);
126  }
127 
128  return retval;
129 }
130 
131 void
133 {
134 #if defined (HAVE_FFTW3_THREADS)
135  if (instance_ok () && nt != threads ())
136  {
137  s_instance->m_nthreads = nt;
138  fftw_plan_with_nthreads (nt);
139  // Clear the current plans.
140  s_instance->m_rplan = nullptr;
141  s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
142  }
143 #else
144  octave_unused_parameter (nt);
145 
146  (*current_liboctave_warning_handler)
147  ("unable to change number of threads without FFTW thread support");
148 #endif
149 }
150 
151 #define CHECK_SIMD_ALIGNMENT(x) \
152  (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)
153 
154 void *
155 fftw_planner::do_create_plan (int dir, const int rank,
156  const dim_vector& dims,
157  octave_idx_type howmany,
158  octave_idx_type stride,
159  octave_idx_type dist,
160  const Complex *in, Complex *out)
161 {
162  int which = (dir == FFTW_FORWARD) ? 0 : 1;
163  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]);
164  bool create_new_plan = false;
165  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
166  bool ioinplace = (in == out);
167 
168  // Don't create a new plan if we have a non SIMD plan already but
169  // can do SIMD. This prevents endlessly recreating plans if we
170  // change the alignment.
171 
172  if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
173  || m_r[which] != rank || m_h[which] != howmany
174  || ioinplace != m_inplace[which]
175  || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
176  create_new_plan = true;
177  else
178  {
179  // We still might not have the same shape of array.
180 
181  for (int i = 0; i < rank; i++)
182  if (dims(i) != m_n[which](i))
183  {
184  create_new_plan = true;
185  break;
186  }
187  }
188 
189  if (create_new_plan)
190  {
191  m_d[which] = dist;
192  m_s[which] = stride;
193  m_r[which] = rank;
194  m_h[which] = howmany;
195  m_simd_align[which] = ioalign;
196  m_inplace[which] = ioinplace;
197  m_n[which] = dims;
198 
199  // Note reversal of dimensions for column major storage in FFTW.
200  octave_idx_type nn = 1;
201  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
202 
203  for (int i = 0, j = rank-1; i < rank; i++, j--)
204  {
205  tmp[i] = dims(j);
206  nn *= dims(j);
207  }
208 
209  int plan_flags = 0;
210  bool plan_destroys_in = true;
211 
212  switch (m_meth)
213  {
214  case UNKNOWN:
215  case ESTIMATE:
216  plan_flags |= FFTW_ESTIMATE;
217  plan_destroys_in = false;
218  break;
219  case MEASURE:
220  plan_flags |= FFTW_MEASURE;
221  break;
222  case PATIENT:
223  plan_flags |= FFTW_PATIENT;
224  break;
225  case EXHAUSTIVE:
226  plan_flags |= FFTW_EXHAUSTIVE;
227  break;
228  case HYBRID:
229  if (nn < 8193)
230  plan_flags |= FFTW_MEASURE;
231  else
232  {
233  plan_flags |= FFTW_ESTIMATE;
234  plan_destroys_in = false;
235  }
236  break;
237  }
238 
239  if (ioalign)
240  plan_flags &= ~FFTW_UNALIGNED;
241  else
242  plan_flags |= FFTW_UNALIGNED;
243 
244  if (*cur_plan_p)
245  fftw_destroy_plan (*cur_plan_p);
246 
248  itmp = const_cast<Complex *> (in);
249  Complex *otmp = out;
250 
251  if (plan_destroys_in)
252  {
253  // Create matrix with the same size and 16-byte alignment as input
254  OCTAVE_SCOPED_BUFFER (Complex, itmp, nn * howmany + 32);
255  itmp = reinterpret_cast<Complex *>
256  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
257  + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
258 
259  if (in == out)
260  otmp = itmp;
261  }
262 
263  *cur_plan_p
264  = fftw_plan_many_dft (rank, tmp, howmany,
265  reinterpret_cast<fftw_complex *> (itmp),
266  nullptr, stride, dist,
267  reinterpret_cast<fftw_complex *> (otmp),
268  nullptr, stride, dist, dir, plan_flags);
269 
270  if (*cur_plan_p == nullptr)
271  (*current_liboctave_error_handler) ("Error creating FFTW plan");
272  }
273 
274  return *cur_plan_p;
275 }
276 
277 void *
278 fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
279  octave_idx_type howmany,
280  octave_idx_type stride,
281  octave_idx_type dist,
282  const double *in, Complex *out)
283 {
284  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
285  bool create_new_plan = false;
286  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
287 
288  // Don't create a new plan if we have a non SIMD plan already but
289  // can do SIMD. This prevents endlessly recreating plans if we
290  // change the alignment.
291 
292  if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
293  || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
294  create_new_plan = true;
295  else
296  {
297  // We still might not have the same shape of array.
298 
299  for (int i = 0; i < rank; i++)
300  if (dims(i) != m_rn(i))
301  {
302  create_new_plan = true;
303  break;
304  }
305  }
306 
307  if (create_new_plan)
308  {
309  m_rd = dist;
310  m_rs = stride;
311  m_rr = rank;
312  m_rh = howmany;
313  m_rsimd_align = ioalign;
314  m_rn = dims;
315 
316  // Note reversal of dimensions for column major storage in FFTW.
317  octave_idx_type nn = 1;
318  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
319 
320  for (int i = 0, j = rank-1; i < rank; i++, j--)
321  {
322  tmp[i] = dims(j);
323  nn *= dims(j);
324  }
325 
326  int plan_flags = 0;
327  bool plan_destroys_in = true;
328 
329  switch (m_meth)
330  {
331  case UNKNOWN:
332  case ESTIMATE:
333  plan_flags |= FFTW_ESTIMATE;
334  plan_destroys_in = false;
335  break;
336  case MEASURE:
337  plan_flags |= FFTW_MEASURE;
338  break;
339  case PATIENT:
340  plan_flags |= FFTW_PATIENT;
341  break;
342  case EXHAUSTIVE:
343  plan_flags |= FFTW_EXHAUSTIVE;
344  break;
345  case HYBRID:
346  if (nn < 8193)
347  plan_flags |= FFTW_MEASURE;
348  else
349  {
350  plan_flags |= FFTW_ESTIMATE;
351  plan_destroys_in = false;
352  }
353  break;
354  }
355 
356  if (ioalign)
357  plan_flags &= ~FFTW_UNALIGNED;
358  else
359  plan_flags |= FFTW_UNALIGNED;
360 
361  if (*cur_plan_p)
362  fftw_destroy_plan (*cur_plan_p);
363 
364  OCTAVE_SCOPED_BUFFER_ANCHOR (double, itmp);
365  itmp = const_cast<double *> (in);
366  Complex *otmp = out;
367 
368  if (plan_destroys_in)
369  {
370  // Create matrix with the same size and 16-byte alignment as input
371  octave_idx_type in_place = reinterpret_cast<double *> (out) == in;
372  OCTAVE_SCOPED_BUFFER (double, itmp,
373  nn * howmany * (in_place + 1) + 32);
374  itmp = reinterpret_cast<double *>
375  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
376  + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
377 
378  if (in_place)
379  otmp = reinterpret_cast<Complex *> (itmp);
380  }
381 
382  *cur_plan_p
383  = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
384  nullptr, stride, dist,
385  reinterpret_cast<fftw_complex *> (otmp),
386  nullptr, stride, dist, plan_flags);
387 
388  if (*cur_plan_p == nullptr)
389  (*current_liboctave_error_handler) ("Error creating FFTW plan");
390  }
391 
392  return *cur_plan_p;
393 }
394 
396 fftw_planner::do_method ()
397 {
398  return m_meth;
399 }
400 
402 fftw_planner::do_method (FftwMethod _meth)
403 {
404  FftwMethod ret = m_meth;
405  if (_meth == ESTIMATE || _meth == MEASURE
406  || _meth == PATIENT || _meth == EXHAUSTIVE
407  || _meth == HYBRID)
408  {
409  if (m_meth != _meth)
410  {
411  m_meth = _meth;
412  if (m_rplan)
413  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan));
414  if (m_plan[0])
415  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0]));
416  if (m_plan[1])
417  fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1]));
418  m_rplan = m_plan[0] = m_plan[1] = nullptr;
419  }
420  }
421  else
422  ret = UNKNOWN;
423  return ret;
424 }
425 
426 float_fftw_planner *float_fftw_planner::s_instance = nullptr;
427 
429  : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
430  m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
431 {
432  m_plan[0] = m_plan[1] = nullptr;
433  m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
434  m_simd_align[0] = m_simd_align[1] = false;
435  m_inplace[0] = m_inplace[1] = false;
436  m_n[0] = m_n[1] = dim_vector ();
437 
438 #if defined (HAVE_FFTW3F_THREADS)
439  int init_ret = fftwf_init_threads ();
440  if (! init_ret)
441  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
442 
443  // Use number of processors available to the current process
444  // This can be later changed with fftw ("threads", nthreads).
445  m_nthreads =
447 
448  fftwf_plan_with_nthreads (m_nthreads);
449 #endif
450 
451  // If we have a system wide wisdom file, import it.
452  fftwf_import_system_wisdom ();
453 }
454 
456 {
457  fftwf_plan *plan_p;
458 
459  plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
460  if (*plan_p)
461  fftwf_destroy_plan (*plan_p);
462 
463  plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]);
464  if (*plan_p)
465  fftwf_destroy_plan (*plan_p);
466 
467  plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]);
468  if (*plan_p)
469  fftwf_destroy_plan (*plan_p);
470 }
471 
472 bool
474 {
475  bool retval = true;
476 
477  if (! s_instance)
478  {
479  s_instance = new float_fftw_planner ();
480  singleton_cleanup_list::add (cleanup_instance);
481  }
482 
483  return retval;
484 }
485 
486 void
488 {
489 #if defined (HAVE_FFTW3F_THREADS)
490  if (instance_ok () && nt != threads ())
491  {
492  s_instance->m_nthreads = nt;
493  fftwf_plan_with_nthreads (nt);
494  // Clear the current plans.
495  s_instance->m_rplan = nullptr;
496  s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
497  }
498 #else
499  octave_unused_parameter (nt);
500 
501  (*current_liboctave_warning_handler)
502  ("unable to change number of threads without FFTW thread support");
503 #endif
504 }
505 
506 void *
507 float_fftw_planner::do_create_plan (int dir, const int rank,
508  const dim_vector& dims,
509  octave_idx_type howmany,
510  octave_idx_type stride,
511  octave_idx_type dist,
512  const FloatComplex *in,
513  FloatComplex *out)
514 {
515  int which = (dir == FFTW_FORWARD) ? 0 : 1;
516  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]);
517  bool create_new_plan = false;
518  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
519  bool ioinplace = (in == out);
520 
521  // Don't create a new plan if we have a non SIMD plan already but
522  // can do SIMD. This prevents endlessly recreating plans if we
523  // change the alignment.
524 
525  if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
526  || m_r[which] != rank || m_h[which] != howmany
527  || ioinplace != m_inplace[which]
528  || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
529  create_new_plan = true;
530  else
531  {
532  // We still might not have the same shape of array.
533 
534  for (int i = 0; i < rank; i++)
535  if (dims(i) != m_n[which](i))
536  {
537  create_new_plan = true;
538  break;
539  }
540  }
541 
542  if (create_new_plan)
543  {
544  m_d[which] = dist;
545  m_s[which] = stride;
546  m_r[which] = rank;
547  m_h[which] = howmany;
548  m_simd_align[which] = ioalign;
549  m_inplace[which] = ioinplace;
550  m_n[which] = dims;
551 
552  // Note reversal of dimensions for column major storage in FFTW.
553  octave_idx_type nn = 1;
554  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
555 
556  for (int i = 0, j = rank-1; i < rank; i++, j--)
557  {
558  tmp[i] = dims(j);
559  nn *= dims(j);
560  }
561 
562  int plan_flags = 0;
563  bool plan_destroys_in = true;
564 
565  switch (m_meth)
566  {
567  case UNKNOWN:
568  case ESTIMATE:
569  plan_flags |= FFTW_ESTIMATE;
570  plan_destroys_in = false;
571  break;
572  case MEASURE:
573  plan_flags |= FFTW_MEASURE;
574  break;
575  case PATIENT:
576  plan_flags |= FFTW_PATIENT;
577  break;
578  case EXHAUSTIVE:
579  plan_flags |= FFTW_EXHAUSTIVE;
580  break;
581  case HYBRID:
582  if (nn < 8193)
583  plan_flags |= FFTW_MEASURE;
584  else
585  {
586  plan_flags |= FFTW_ESTIMATE;
587  plan_destroys_in = false;
588  }
589  break;
590  }
591 
592  if (ioalign)
593  plan_flags &= ~FFTW_UNALIGNED;
594  else
595  plan_flags |= FFTW_UNALIGNED;
596 
597  if (*cur_plan_p)
598  fftwf_destroy_plan (*cur_plan_p);
599 
601  itmp = const_cast<FloatComplex *> (in);
602  FloatComplex *otmp = out;
603 
604  if (plan_destroys_in)
605  {
606  // Create matrix with the same size and 16-byte alignment as input
607  OCTAVE_SCOPED_BUFFER (FloatComplex, itmp, nn * howmany + 32);
608  itmp = reinterpret_cast<FloatComplex *>
609  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
610  + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
611 
612  if (out == in)
613  otmp = itmp;
614  }
615 
616  *cur_plan_p
617  = fftwf_plan_many_dft (rank, tmp, howmany,
618  reinterpret_cast<fftwf_complex *> (itmp),
619  nullptr, stride, dist,
620  reinterpret_cast<fftwf_complex *> (otmp),
621  nullptr, stride, dist, dir, plan_flags);
622 
623  if (*cur_plan_p == nullptr)
624  (*current_liboctave_error_handler) ("Error creating FFTW plan");
625  }
626 
627  return *cur_plan_p;
628 }
629 
630 void *
631 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
632  octave_idx_type howmany,
633  octave_idx_type stride,
634  octave_idx_type dist,
635  const float *in, FloatComplex *out)
636 {
637  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
638  bool create_new_plan = false;
639  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
640 
641  // Don't create a new plan if we have a non SIMD plan already but
642  // can do SIMD. This prevents endlessly recreating plans if we
643  // change the alignment.
644 
645  if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
646  || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
647  create_new_plan = true;
648  else
649  {
650  // We still might not have the same shape of array.
651 
652  for (int i = 0; i < rank; i++)
653  if (dims(i) != m_rn(i))
654  {
655  create_new_plan = true;
656  break;
657  }
658  }
659 
660  if (create_new_plan)
661  {
662  m_rd = dist;
663  m_rs = stride;
664  m_rr = rank;
665  m_rh = howmany;
666  m_rsimd_align = ioalign;
667  m_rn = dims;
668 
669  // Note reversal of dimensions for column major storage in FFTW.
670  octave_idx_type nn = 1;
671  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
672 
673  for (int i = 0, j = rank-1; i < rank; i++, j--)
674  {
675  tmp[i] = dims(j);
676  nn *= dims(j);
677  }
678 
679  int plan_flags = 0;
680  bool plan_destroys_in = true;
681 
682  switch (m_meth)
683  {
684  case UNKNOWN:
685  case ESTIMATE:
686  plan_flags |= FFTW_ESTIMATE;
687  plan_destroys_in = false;
688  break;
689  case MEASURE:
690  plan_flags |= FFTW_MEASURE;
691  break;
692  case PATIENT:
693  plan_flags |= FFTW_PATIENT;
694  break;
695  case EXHAUSTIVE:
696  plan_flags |= FFTW_EXHAUSTIVE;
697  break;
698  case HYBRID:
699  if (nn < 8193)
700  plan_flags |= FFTW_MEASURE;
701  else
702  {
703  plan_flags |= FFTW_ESTIMATE;
704  plan_destroys_in = false;
705  }
706  break;
707  }
708 
709  if (ioalign)
710  plan_flags &= ~FFTW_UNALIGNED;
711  else
712  plan_flags |= FFTW_UNALIGNED;
713 
714  if (*cur_plan_p)
715  fftwf_destroy_plan (*cur_plan_p);
716 
717  OCTAVE_SCOPED_BUFFER_ANCHOR (float, itmp);
718  itmp = const_cast<float *> (in);
719  FloatComplex *otmp = out;
720 
721  if (plan_destroys_in)
722  {
723  // Create matrix with the same size and 16-byte alignment as input
724  octave_idx_type in_place = reinterpret_cast<float *> (out) == in;
725  OCTAVE_SCOPED_BUFFER (float, itmp,
726  nn * howmany * (in_place + 1) + 32);
727  itmp = reinterpret_cast<float *>
728  (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
729  + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
730 
731  if (in_place)
732  otmp = reinterpret_cast<FloatComplex *> (itmp);
733  }
734 
735  *cur_plan_p
736  = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
737  nullptr, stride, dist,
738  reinterpret_cast<fftwf_complex *> (otmp),
739  nullptr, stride, dist, plan_flags);
740 
741  if (*cur_plan_p == nullptr)
742  (*current_liboctave_error_handler) ("Error creating FFTW plan");
743  }
744 
745  return *cur_plan_p;
746 }
747 
749 float_fftw_planner::do_method ()
750 {
751  return m_meth;
752 }
753 
755 float_fftw_planner::do_method (FftwMethod _meth)
756 {
757  FftwMethod ret = m_meth;
758  if (_meth == ESTIMATE || _meth == MEASURE
759  || _meth == PATIENT || _meth == EXHAUSTIVE
760  || _meth == HYBRID)
761  {
762  if (m_meth != _meth)
763  {
764  m_meth = _meth;
765  if (m_rplan)
766  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan));
767  if (m_plan[0])
768  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0]));
769  if (m_plan[1])
770  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1]));
771  m_rplan = m_plan[0] = m_plan[1] = nullptr;
772  }
773  }
774  else
775  ret = UNKNOWN;
776  return ret;
777 }
778 
779 template <typename T>
780 static inline void
781 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,
782  octave_idx_type stride, octave_idx_type dist)
783 {
784  octave_quit ();
785 
786  // Fill in the missing data.
787 
788  for (std::size_t i = 0; i < nr; i++)
789  for (std::size_t j = nc/2+1; j < nc; j++)
790  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
791 
792  octave_quit ();
793 }
794 
795 template <typename T>
796 static inline void
797 convert_packcomplex_Nd (T *out, const dim_vector& dv)
798 {
799  std::size_t nc = dv(0);
800  std::size_t nr = dv(1);
801  std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
802  std::size_t nrp = nr * np;
803  T *ptr1, *ptr2;
804 
805  octave_quit ();
806 
807  // Create space for the missing elements.
808 
809  for (std::size_t i = 0; i < nrp; i++)
810  {
811  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
812  ptr2 = out + i * nc;
813  for (std::size_t j = 0; j < nc/2+1; j++)
814  *ptr2++ = *ptr1++;
815  }
816 
817  octave_quit ();
818 
819  // Fill in the missing data for the rank = 2 case directly for speed.
820 
821  for (std::size_t i = 0; i < np; i++)
822  {
823  for (std::size_t j = 1; j < nr; j++)
824  for (std::size_t k = nc/2+1; k < nc; k++)
825  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
826 
827  for (std::size_t j = nc/2+1; j < nc; j++)
828  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
829  }
830 
831  octave_quit ();
832 
833  // Now do the permutations needed for rank > 2 cases.
834 
835  std::size_t jstart = dv(0) * dv(1);
836  std::size_t kstep = dv(0);
837  std::size_t nel = dv.numel ();
838 
839  for (int inner = 2; inner < dv.ndims (); inner++)
840  {
841  std::size_t jmax = jstart * dv(inner);
842  for (std::size_t i = 0; i < nel; i+=jmax)
843  for (std::size_t j = jstart, jj = jmax-jstart; j < jj;
844  j+=jstart, jj-=jstart)
845  for (std::size_t k = 0; k < jstart; k+= kstep)
846  for (std::size_t l = nc/2+1; l < nc; l++)
847  {
848  T tmp = out[i+ j + k + l];
849  out[i + j + k + l] = out[i + jj + k + l];
850  out[i + jj + k + l] = tmp;
851  }
852  jstart = jmax;
853  }
854 
855  octave_quit ();
856 }
857 
858 int
859 fftw::fft (const double *in, Complex *out, std::size_t npts,
860  std::size_t nsamples, octave_idx_type stride,
861  octave_idx_type dist)
862 {
863  dist = (dist < 0 ? npts : dist);
864 
865  dim_vector dv (npts, 1);
866  void *vplan = fftw_planner::create_plan (1, dv, nsamples,
867  stride, dist, in, out);
868  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
869 
870  fftw_execute_dft_r2c (m_plan, (const_cast<double *> (in)),
871  reinterpret_cast<fftw_complex *> (out));
872 
873  // Need to create other half of the transform.
874 
875  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
876 
877  return 0;
878 }
879 
880 int
881 fftw::fft (const Complex *in, Complex *out, std::size_t npts,
882  std::size_t nsamples, octave_idx_type stride,
883  octave_idx_type dist)
884 {
885  dist = (dist < 0 ? npts : dist);
886 
887  dim_vector dv (npts, 1);
888  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
889  nsamples, stride,
890  dist, in, out);
891  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
892 
893  fftw_execute_dft (m_plan,
894  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
895  reinterpret_cast<fftw_complex *> (out));
896 
897  return 0;
898 }
899 
900 int
901 fftw::ifft (const Complex *in, Complex *out, std::size_t npts,
902  std::size_t nsamples, octave_idx_type stride,
903  octave_idx_type dist)
904 {
905  dist = (dist < 0 ? npts : dist);
906 
907  dim_vector dv (npts, 1);
908  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
909  stride, dist, in, out);
910  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
911 
912  fftw_execute_dft (m_plan,
913  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
914  reinterpret_cast<fftw_complex *> (out));
915 
916  const Complex scale = npts;
917  for (std::size_t j = 0; j < nsamples; j++)
918  for (std::size_t i = 0; i < npts; i++)
919  out[i*stride + j*dist] /= scale;
920 
921  return 0;
922 }
923 
924 int
925 fftw::fftNd (const double *in, Complex *out, const int rank,
926  const dim_vector& dv)
927 {
928  octave_idx_type dist = 1;
929  for (int i = 0; i < rank; i++)
930  dist *= dv(i);
931 
932  // Fool with the position of the start of the output matrix, so that
933  // creating other half of the matrix won't cause cache problems.
934 
935  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
936 
937  void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
938  in, out + offset);
939  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
940 
941  fftw_execute_dft_r2c (m_plan, (const_cast<double *> (in)),
942  reinterpret_cast<fftw_complex *> (out+ offset));
943 
944  // Need to create other half of the transform.
945 
946  convert_packcomplex_Nd (out, dv);
947 
948  return 0;
949 }
950 
951 int
952 fftw::fftNd (const Complex *in, Complex *out, const int rank,
953  const dim_vector& dv)
954 {
955  octave_idx_type dist = 1;
956  for (int i = 0; i < rank; i++)
957  dist *= dv(i);
958 
959  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
960  1, 1, dist, in, out);
961  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
962 
963  fftw_execute_dft (m_plan,
964  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
965  reinterpret_cast<fftw_complex *> (out));
966 
967  return 0;
968 }
969 
970 int
971 fftw::ifftNd (const Complex *in, Complex *out, const int rank,
972  const dim_vector& dv)
973 {
974  octave_idx_type dist = 1;
975  for (int i = 0; i < rank; i++)
976  dist *= dv(i);
977 
978  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
979  1, 1, dist, in, out);
980  fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
981 
982  fftw_execute_dft (m_plan,
983  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
984  reinterpret_cast<fftw_complex *> (out));
985 
986  const std::size_t npts = dv.numel ();
987  const Complex scale = npts;
988  for (std::size_t i = 0; i < npts; i++)
989  out[i] /= scale;
990 
991  return 0;
992 }
993 
994 int
995 fftw::fft (const float *in, FloatComplex *out, std::size_t npts,
996  std::size_t nsamples, octave_idx_type stride,
997  octave_idx_type dist)
998 {
999  dist = (dist < 0 ? npts : dist);
1000 
1001  dim_vector dv (npts, 1);
1002  void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
1003  dist, in, out);
1004  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1005 
1006  fftwf_execute_dft_r2c (m_plan, (const_cast<float *> (in)),
1007  reinterpret_cast<fftwf_complex *> (out));
1008 
1009  // Need to create other half of the transform.
1010 
1011  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1012 
1013  return 0;
1014 }
1015 
1016 int
1017 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1018  std::size_t nsamples, octave_idx_type stride,
1019  octave_idx_type dist)
1020 {
1021  dist = (dist < 0 ? npts : dist);
1022 
1023  dim_vector dv (npts, 1);
1024  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1025  nsamples, stride, dist,
1026  in, out);
1027  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1028 
1029  fftwf_execute_dft (m_plan,
1030  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1031  reinterpret_cast<fftwf_complex *> (out));
1032 
1033  return 0;
1034 }
1035 
1036 int
1037 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1038  std::size_t nsamples, octave_idx_type stride,
1039  octave_idx_type dist)
1040 {
1041  dist = (dist < 0 ? npts : dist);
1042 
1043  dim_vector dv (npts, 1);
1044  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1045  nsamples, stride, dist,
1046  in, out);
1047  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1048 
1049  fftwf_execute_dft (m_plan,
1050  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1051  reinterpret_cast<fftwf_complex *> (out));
1052 
1053  const FloatComplex scale = npts;
1054  for (std::size_t j = 0; j < nsamples; j++)
1055  for (std::size_t i = 0; i < npts; i++)
1056  out[i*stride + j*dist] /= scale;
1057 
1058  return 0;
1059 }
1060 
1061 int
1062 fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1063  const dim_vector& dv)
1064 {
1065  octave_idx_type dist = 1;
1066  for (int i = 0; i < rank; i++)
1067  dist *= dv(i);
1068 
1069  // Fool with the position of the start of the output matrix, so that
1070  // creating other half of the matrix won't cause cache problems.
1071 
1072  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1073 
1074  void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1075  in, out + offset);
1076  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1077 
1078  fftwf_execute_dft_r2c (m_plan, (const_cast<float *> (in)),
1079  reinterpret_cast<fftwf_complex *> (out+ offset));
1080 
1081  // Need to create other half of the transform.
1082 
1083  convert_packcomplex_Nd (out, dv);
1084 
1085  return 0;
1086 }
1087 
1088 int
1089 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1090  const dim_vector& dv)
1091 {
1092  octave_idx_type dist = 1;
1093  for (int i = 0; i < rank; i++)
1094  dist *= dv(i);
1095 
1096  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1097  1, 1, dist, in, out);
1098  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1099 
1100  fftwf_execute_dft (m_plan,
1101  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1102  reinterpret_cast<fftwf_complex *> (out));
1103 
1104  return 0;
1105 }
1106 
1107 int
1108 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1109  const dim_vector& dv)
1110 {
1111  octave_idx_type dist = 1;
1112  for (int i = 0; i < rank; i++)
1113  dist *= dv(i);
1114 
1115  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1116  1, 1, dist, in, out);
1117  fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1118 
1119  fftwf_execute_dft (m_plan,
1120  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1121  reinterpret_cast<fftwf_complex *> (out));
1122 
1123  const std::size_t npts = dv.numel ();
1124  const FloatComplex scale = npts;
1125  for (std::size_t i = 0; i < npts; i++)
1126  out[i] /= scale;
1127 
1128  return 0;
1129 }
1130 
1131 #endif
1132 
1133 std::string
1135 {
1136 #if defined (HAVE_FFTW)
1138 #else
1139  return "none";
1140 #endif
1141 }
1142 
1143 std::string
1145 {
1146 #if defined (HAVE_FFTW)
1148 #else
1149  return "none";
1150 #endif
1151 }
1152 
1153 OCTAVE_END_NAMESPACE(octave)
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:335
octave_idx_type ndims() const
Number of dimensions.
Definition: dim-vector.h:257
static bool instance_ok()
Definition: oct-fftw.cc:118
static int threads()
Definition: oct-fftw.h:105
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:67
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:971
static int fft(const double *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:859
static int ifft(const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:901
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:925
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:209
static bool instance_ok()
Definition: oct-fftw.cc:473
static int threads()
Definition: oct-fftw.h:247
static void add(fptr f)
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5500
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37
std::complex< double > Complex
Definition: oct-cmplx.h:33
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34
std::string fftwf_version()
Definition: oct-fftw.cc:1144
std::string fftw_version()
Definition: oct-fftw.cc:1134
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:151
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
#define OCTAVE_SCOPED_BUFFER_ANCHOR(T, buf)
Definition: oct-locbuf.h:57
#define OCTAVE_SCOPED_BUFFER(T, buf, size)
Definition: oct-locbuf.h:69