GNU Octave 7.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-fftw.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2001-2022 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#if defined (HAVE_FFTW3_H)
31# include <fftw3.h>
32#endif
33
34#include "lo-error.h"
35#include "oct-fftw.h"
36#include "oct-locbuf.h"
37#include "quit.h"
38#include "singleton-cleanup.h"
39
40#if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41# include "nproc-wrapper.h"
42#endif
43
44namespace octave
45{
46#if defined (HAVE_FFTW)
47
48 fftw_planner *fftw_planner::s_instance = nullptr;
49
50 // Helper class to create and cache FFTW plans for both 1D and
51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
52 // the plans, which in theory is suboptimal, but provides quite
53 // reasonable performance in practice.
54
55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56 // will destroy the input and output arrays. We must, therefore, create a
57 // temporary input array with the same size and 16-byte alignment as
58 // the original array when using a different planner strategy.
59 // Note that we also use any wisdom that is available, either in a
60 // FFTW3 system wide file or as supplied by the user.
61
62 // FIXME: if we can ensure 16 byte alignment in Array<T>
63 // (<T> *data) the FFTW3 can use SIMD instructions for further
64 // acceleration.
65
66 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
67
69 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
70 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
71 {
72 m_plan[0] = m_plan[1] = nullptr;
73 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
74 m_simd_align[0] = m_simd_align[1] = false;
75 m_inplace[0] = m_inplace[1] = false;
76 m_n[0] = m_n[1] = dim_vector ();
77
78#if defined (HAVE_FFTW3_THREADS)
79 int init_ret = fftw_init_threads ();
80 if (! init_ret)
81 (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82
83 // Use number of processors available to the current process
84 // This can be later changed with fftw ("threads", nthreads).
87
88 fftw_plan_with_nthreads (m_nthreads);
89#endif
90
91 // If we have a system wide wisdom file, import it.
92 fftw_import_system_wisdom ();
93 }
94
96 {
97 fftw_plan *plan_p;
98
99 plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
100 if (*plan_p)
101 fftw_destroy_plan (*plan_p);
102
103 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]);
104 if (*plan_p)
105 fftw_destroy_plan (*plan_p);
106
107 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]);
108 if (*plan_p)
109 fftw_destroy_plan (*plan_p);
110 }
111
112 bool
114 {
115 bool retval = true;
116
117 if (! s_instance)
118 {
119 s_instance = new fftw_planner ();
121 }
122
123 return retval;
124 }
125
126 void
128 {
129#if defined (HAVE_FFTW3_THREADS)
130 if (instance_ok () && nt != threads ())
131 {
133 fftw_plan_with_nthreads (nt);
134 // Clear the current plans.
135 s_instance->m_rplan = nullptr;
136 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
137 }
138#else
139 octave_unused_parameter (nt);
140
141 (*current_liboctave_warning_handler)
142 ("unable to change number of threads without FFTW thread support");
143#endif
144 }
145
146#define CHECK_SIMD_ALIGNMENT(x) \
147 (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)
148
149 void *
150 fftw_planner::do_create_plan (int dir, const int rank,
151 const dim_vector& dims,
152 octave_idx_type howmany,
153 octave_idx_type stride,
154 octave_idx_type dist,
155 const Complex *in, Complex *out)
156 {
157 int which = (dir == FFTW_FORWARD) ? 0 : 1;
158 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]);
159 bool create_new_plan = false;
160 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
161 bool ioinplace = (in == out);
162
163 // Don't create a new plan if we have a non SIMD plan already but
164 // can do SIMD. This prevents endlessly recreating plans if we
165 // change the alignment.
166
167 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
168 || m_r[which] != rank || m_h[which] != howmany
169 || ioinplace != m_inplace[which]
170 || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
171 create_new_plan = true;
172 else
173 {
174 // We still might not have the same shape of array.
175
176 for (int i = 0; i < rank; i++)
177 if (dims(i) != m_n[which](i))
178 {
179 create_new_plan = true;
180 break;
181 }
182 }
183
184 if (create_new_plan)
185 {
186 m_d[which] = dist;
187 m_s[which] = stride;
188 m_r[which] = rank;
189 m_h[which] = howmany;
190 m_simd_align[which] = ioalign;
191 m_inplace[which] = ioinplace;
192 m_n[which] = dims;
193
194 // Note reversal of dimensions for column major storage in FFTW.
196 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
197
198 for (int i = 0, j = rank-1; i < rank; i++, j--)
199 {
200 tmp[i] = dims(j);
201 nn *= dims(j);
202 }
203
204 int plan_flags = 0;
205 bool plan_destroys_in = true;
206
207 switch (m_meth)
208 {
209 case UNKNOWN:
210 case ESTIMATE:
211 plan_flags |= FFTW_ESTIMATE;
212 plan_destroys_in = false;
213 break;
214 case MEASURE:
215 plan_flags |= FFTW_MEASURE;
216 break;
217 case PATIENT:
218 plan_flags |= FFTW_PATIENT;
219 break;
220 case EXHAUSTIVE:
221 plan_flags |= FFTW_EXHAUSTIVE;
222 break;
223 case HYBRID:
224 if (nn < 8193)
225 plan_flags |= FFTW_MEASURE;
226 else
227 {
228 plan_flags |= FFTW_ESTIMATE;
229 plan_destroys_in = false;
230 }
231 break;
232 }
233
234 if (ioalign)
235 plan_flags &= ~FFTW_UNALIGNED;
236 else
237 plan_flags |= FFTW_UNALIGNED;
238
239 if (*cur_plan_p)
240 fftw_destroy_plan (*cur_plan_p);
241
242 if (plan_destroys_in)
243 {
244 // Create matrix with the same size and 16-byte alignment as input
245 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
246 itmp = reinterpret_cast<Complex *>
247 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
248 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
249
250 *cur_plan_p
251 = fftw_plan_many_dft (rank, tmp, howmany,
252 reinterpret_cast<fftw_complex *> (itmp),
253 nullptr, stride, dist,
254 reinterpret_cast<fftw_complex *> (out),
255 nullptr, stride, dist, dir, plan_flags);
256 }
257 else
258 {
259 *cur_plan_p
260 = fftw_plan_many_dft (rank, tmp, howmany,
261 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
262 nullptr, stride, dist,
263 reinterpret_cast<fftw_complex *> (out),
264 nullptr, stride, dist, dir, plan_flags);
265 }
266
267 if (*cur_plan_p == nullptr)
268 (*current_liboctave_error_handler) ("Error creating FFTW plan");
269 }
270
271 return *cur_plan_p;
272 }
273
274 void *
275 fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
276 octave_idx_type howmany,
277 octave_idx_type stride,
278 octave_idx_type dist,
279 const double *in, Complex *out)
280 {
281 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
282 bool create_new_plan = false;
283 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
284
285 // Don't create a new plan if we have a non SIMD plan already but
286 // can do SIMD. This prevents endlessly recreating plans if we
287 // change the alignment.
288
289 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
290 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
291 create_new_plan = true;
292 else
293 {
294 // We still might not have the same shape of array.
295
296 for (int i = 0; i < rank; i++)
297 if (dims(i) != m_rn(i))
298 {
299 create_new_plan = true;
300 break;
301 }
302 }
303
304 if (create_new_plan)
305 {
306 m_rd = dist;
307 m_rs = stride;
308 m_rr = rank;
309 m_rh = howmany;
310 m_rsimd_align = ioalign;
311 m_rn = dims;
312
313 // Note reversal of dimensions for column major storage in FFTW.
315 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
316
317 for (int i = 0, j = rank-1; i < rank; i++, j--)
318 {
319 tmp[i] = dims(j);
320 nn *= dims(j);
321 }
322
323 int plan_flags = 0;
324 bool plan_destroys_in = true;
325
326 switch (m_meth)
327 {
328 case UNKNOWN:
329 case ESTIMATE:
330 plan_flags |= FFTW_ESTIMATE;
331 plan_destroys_in = false;
332 break;
333 case MEASURE:
334 plan_flags |= FFTW_MEASURE;
335 break;
336 case PATIENT:
337 plan_flags |= FFTW_PATIENT;
338 break;
339 case EXHAUSTIVE:
340 plan_flags |= FFTW_EXHAUSTIVE;
341 break;
342 case HYBRID:
343 if (nn < 8193)
344 plan_flags |= FFTW_MEASURE;
345 else
346 {
347 plan_flags |= FFTW_ESTIMATE;
348 plan_destroys_in = false;
349 }
350 break;
351 }
352
353 if (ioalign)
354 plan_flags &= ~FFTW_UNALIGNED;
355 else
356 plan_flags |= FFTW_UNALIGNED;
357
358 if (*cur_plan_p)
359 fftw_destroy_plan (*cur_plan_p);
360
361 if (plan_destroys_in)
362 {
363 // Create matrix with the same size and 16-byte alignment as input
364 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
365 itmp = reinterpret_cast<double *>
366 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
367 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
368
369 *cur_plan_p
370 = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
371 nullptr, stride, dist,
372 reinterpret_cast<fftw_complex *> (out),
373 nullptr, stride, dist, plan_flags);
374 }
375 else
376 {
377 *cur_plan_p
378 = fftw_plan_many_dft_r2c (rank, tmp, howmany,
379 (const_cast<double *> (in)),
380 nullptr, stride, dist,
381 reinterpret_cast<fftw_complex *> (out),
382 nullptr, stride, dist, plan_flags);
383 }
384
385 if (*cur_plan_p == nullptr)
386 (*current_liboctave_error_handler) ("Error creating FFTW plan");
387 }
388
389 return *cur_plan_p;
390 }
391
394 {
395 return m_meth;
396 }
397
400 {
401 FftwMethod ret = m_meth;
402 if (_meth == ESTIMATE || _meth == MEASURE
403 || _meth == PATIENT || _meth == EXHAUSTIVE
404 || _meth == HYBRID)
405 {
406 if (m_meth != _meth)
407 {
408 m_meth = _meth;
409 if (m_rplan)
410 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan));
411 if (m_plan[0])
412 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0]));
413 if (m_plan[1])
414 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1]));
415 m_rplan = m_plan[0] = m_plan[1] = nullptr;
416 }
417 }
418 else
419 ret = UNKNOWN;
420 return ret;
421 }
422
424
426 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
427 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
428 {
429 m_plan[0] = m_plan[1] = nullptr;
430 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
431 m_simd_align[0] = m_simd_align[1] = false;
432 m_inplace[0] = m_inplace[1] = false;
433 m_n[0] = m_n[1] = dim_vector ();
434
435#if defined (HAVE_FFTW3F_THREADS)
436 int init_ret = fftwf_init_threads ();
437 if (! init_ret)
438 (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
439
440 // Use number of processors available to the current process
441 // This can be later changed with fftw ("threads", nthreads).
442 m_nthreads =
444
445 fftwf_plan_with_nthreads (m_nthreads);
446#endif
447
448 // If we have a system wide wisdom file, import it.
449 fftwf_import_system_wisdom ();
450 }
451
453 {
454 fftwf_plan *plan_p;
455
456 plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
457 if (*plan_p)
458 fftwf_destroy_plan (*plan_p);
459
460 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]);
461 if (*plan_p)
462 fftwf_destroy_plan (*plan_p);
463
464 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]);
465 if (*plan_p)
466 fftwf_destroy_plan (*plan_p);
467 }
468
469 bool
471 {
472 bool retval = true;
473
474 if (! s_instance)
475 {
478 }
479
480 return retval;
481 }
482
483 void
485 {
486#if defined (HAVE_FFTW3F_THREADS)
487 if (instance_ok () && nt != threads ())
488 {
490 fftwf_plan_with_nthreads (nt);
491 // Clear the current plans.
492 s_instance->m_rplan = nullptr;
493 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
494 }
495#else
496 octave_unused_parameter (nt);
497
498 (*current_liboctave_warning_handler)
499 ("unable to change number of threads without FFTW thread support");
500#endif
501 }
502
503 void *
504 float_fftw_planner::do_create_plan (int dir, const int rank,
505 const dim_vector& dims,
506 octave_idx_type howmany,
507 octave_idx_type stride,
508 octave_idx_type dist,
509 const FloatComplex *in,
510 FloatComplex *out)
511 {
512 int which = (dir == FFTW_FORWARD) ? 0 : 1;
513 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]);
514 bool create_new_plan = false;
515 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
516 bool ioinplace = (in == out);
517
518 // Don't create a new plan if we have a non SIMD plan already but
519 // can do SIMD. This prevents endlessly recreating plans if we
520 // change the alignment.
521
522 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
523 || m_r[which] != rank || m_h[which] != howmany
524 || ioinplace != m_inplace[which]
525 || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
526 create_new_plan = true;
527 else
528 {
529 // We still might not have the same shape of array.
530
531 for (int i = 0; i < rank; i++)
532 if (dims(i) != m_n[which](i))
533 {
534 create_new_plan = true;
535 break;
536 }
537 }
538
539 if (create_new_plan)
540 {
541 m_d[which] = dist;
542 m_s[which] = stride;
543 m_r[which] = rank;
544 m_h[which] = howmany;
545 m_simd_align[which] = ioalign;
546 m_inplace[which] = ioinplace;
547 m_n[which] = dims;
548
549 // Note reversal of dimensions for column major storage in FFTW.
551 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
552
553 for (int i = 0, j = rank-1; i < rank; i++, j--)
554 {
555 tmp[i] = dims(j);
556 nn *= dims(j);
557 }
558
559 int plan_flags = 0;
560 bool plan_destroys_in = true;
561
562 switch (m_meth)
563 {
564 case UNKNOWN:
565 case ESTIMATE:
566 plan_flags |= FFTW_ESTIMATE;
567 plan_destroys_in = false;
568 break;
569 case MEASURE:
570 plan_flags |= FFTW_MEASURE;
571 break;
572 case PATIENT:
573 plan_flags |= FFTW_PATIENT;
574 break;
575 case EXHAUSTIVE:
576 plan_flags |= FFTW_EXHAUSTIVE;
577 break;
578 case HYBRID:
579 if (nn < 8193)
580 plan_flags |= FFTW_MEASURE;
581 else
582 {
583 plan_flags |= FFTW_ESTIMATE;
584 plan_destroys_in = false;
585 }
586 break;
587 }
588
589 if (ioalign)
590 plan_flags &= ~FFTW_UNALIGNED;
591 else
592 plan_flags |= FFTW_UNALIGNED;
593
594 if (*cur_plan_p)
595 fftwf_destroy_plan (*cur_plan_p);
596
597 if (plan_destroys_in)
598 {
599 // Create matrix with the same size and 16-byte alignment as input
600 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
601 itmp = reinterpret_cast<FloatComplex *>
602 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
603 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
604
605 *cur_plan_p
606 = fftwf_plan_many_dft (rank, tmp, howmany,
607 reinterpret_cast<fftwf_complex *> (itmp),
608 nullptr, stride, dist,
609 reinterpret_cast<fftwf_complex *> (out),
610 nullptr, stride, dist, dir, plan_flags);
611 }
612 else
613 {
614 *cur_plan_p
615 = fftwf_plan_many_dft (rank, tmp, howmany,
616 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
617 nullptr, stride, dist,
618 reinterpret_cast<fftwf_complex *> (out),
619 nullptr, stride, dist, dir, plan_flags);
620 }
621
622 if (*cur_plan_p == nullptr)
623 (*current_liboctave_error_handler) ("Error creating FFTW plan");
624 }
625
626 return *cur_plan_p;
627 }
628
629 void *
630 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
631 octave_idx_type howmany,
632 octave_idx_type stride,
633 octave_idx_type dist,
634 const float *in, FloatComplex *out)
635 {
636 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
637 bool create_new_plan = false;
638 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
639
640 // Don't create a new plan if we have a non SIMD plan already but
641 // can do SIMD. This prevents endlessly recreating plans if we
642 // change the alignment.
643
644 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
645 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
646 create_new_plan = true;
647 else
648 {
649 // We still might not have the same shape of array.
650
651 for (int i = 0; i < rank; i++)
652 if (dims(i) != m_rn(i))
653 {
654 create_new_plan = true;
655 break;
656 }
657 }
658
659 if (create_new_plan)
660 {
661 m_rd = dist;
662 m_rs = stride;
663 m_rr = rank;
664 m_rh = howmany;
665 m_rsimd_align = ioalign;
666 m_rn = dims;
667
668 // Note reversal of dimensions for column major storage in FFTW.
670 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
671
672 for (int i = 0, j = rank-1; i < rank; i++, j--)
673 {
674 tmp[i] = dims(j);
675 nn *= dims(j);
676 }
677
678 int plan_flags = 0;
679 bool plan_destroys_in = true;
680
681 switch (m_meth)
682 {
683 case UNKNOWN:
684 case ESTIMATE:
685 plan_flags |= FFTW_ESTIMATE;
686 plan_destroys_in = false;
687 break;
688 case MEASURE:
689 plan_flags |= FFTW_MEASURE;
690 break;
691 case PATIENT:
692 plan_flags |= FFTW_PATIENT;
693 break;
694 case EXHAUSTIVE:
695 plan_flags |= FFTW_EXHAUSTIVE;
696 break;
697 case HYBRID:
698 if (nn < 8193)
699 plan_flags |= FFTW_MEASURE;
700 else
701 {
702 plan_flags |= FFTW_ESTIMATE;
703 plan_destroys_in = false;
704 }
705 break;
706 }
707
708 if (ioalign)
709 plan_flags &= ~FFTW_UNALIGNED;
710 else
711 plan_flags |= FFTW_UNALIGNED;
712
713 if (*cur_plan_p)
714 fftwf_destroy_plan (*cur_plan_p);
715
716 if (plan_destroys_in)
717 {
718 // Create matrix with the same size and 16-byte alignment as input
719 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
720 itmp = reinterpret_cast<float *>
721 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
722 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
723
724 *cur_plan_p
725 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
726 nullptr, stride, dist,
727 reinterpret_cast<fftwf_complex *> (out),
728 nullptr, stride, dist, plan_flags);
729 }
730 else
731 {
732 *cur_plan_p
733 = fftwf_plan_many_dft_r2c (rank, tmp, howmany,
734 (const_cast<float *> (in)),
735 nullptr, stride, dist,
736 reinterpret_cast<fftwf_complex *> (out),
737 nullptr, stride, dist, plan_flags);
738 }
739
740 if (*cur_plan_p == nullptr)
741 (*current_liboctave_error_handler) ("Error creating FFTW plan");
742 }
743
744 return *cur_plan_p;
745 }
746
749 {
750 return m_meth;
751 }
752
755 {
756 FftwMethod ret = m_meth;
757 if (_meth == ESTIMATE || _meth == MEASURE
758 || _meth == PATIENT || _meth == EXHAUSTIVE
759 || _meth == HYBRID)
760 {
761 if (m_meth != _meth)
762 {
763 m_meth = _meth;
764 if (m_rplan)
765 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan));
766 if (m_plan[0])
767 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0]));
768 if (m_plan[1])
769 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1]));
770 m_rplan = m_plan[0] = m_plan[1] = nullptr;
771 }
772 }
773 else
774 ret = UNKNOWN;
775 return ret;
776 }
777
778 template <typename T>
779 static inline void
780 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,
781 octave_idx_type stride, octave_idx_type dist)
782 {
783 octave_quit ();
784
785 // Fill in the missing data.
786
787 for (std::size_t i = 0; i < nr; i++)
788 for (std::size_t j = nc/2+1; j < nc; j++)
789 out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
790
791 octave_quit ();
792 }
793
794 template <typename T>
795 static inline void
797 {
798 std::size_t nc = dv(0);
799 std::size_t nr = dv(1);
800 std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
801 std::size_t nrp = nr * np;
802 T *ptr1, *ptr2;
803
804 octave_quit ();
805
806 // Create space for the missing elements.
807
808 for (std::size_t i = 0; i < nrp; i++)
809 {
810 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
811 ptr2 = out + i * nc;
812 for (std::size_t j = 0; j < nc/2+1; j++)
813 *ptr2++ = *ptr1++;
814 }
815
816 octave_quit ();
817
818 // Fill in the missing data for the rank = 2 case directly for speed.
819
820 for (std::size_t i = 0; i < np; i++)
821 {
822 for (std::size_t j = 1; j < nr; j++)
823 for (std::size_t k = nc/2+1; k < nc; k++)
824 out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
825
826 for (std::size_t j = nc/2+1; j < nc; j++)
827 out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
828 }
829
830 octave_quit ();
831
832 // Now do the permutations needed for rank > 2 cases.
833
834 std::size_t jstart = dv(0) * dv(1);
835 std::size_t kstep = dv(0);
836 std::size_t nel = dv.numel ();
837
838 for (int inner = 2; inner < dv.ndims (); inner++)
839 {
840 std::size_t jmax = jstart * dv(inner);
841 for (std::size_t i = 0; i < nel; i+=jmax)
842 for (std::size_t j = jstart, jj = jmax-jstart; j < jj;
843 j+=jstart, jj-=jstart)
844 for (std::size_t k = 0; k < jstart; k+= kstep)
845 for (std::size_t l = nc/2+1; l < nc; l++)
846 {
847 T tmp = out[i+ j + k + l];
848 out[i + j + k + l] = out[i + jj + k + l];
849 out[i + jj + k + l] = tmp;
850 }
851 jstart = jmax;
852 }
853
854 octave_quit ();
855 }
856
857 int
858 fftw::fft (const double *in, Complex *out, std::size_t npts,
859 std::size_t nsamples, octave_idx_type stride,
860 octave_idx_type dist)
861 {
862 dist = (dist < 0 ? npts : dist);
863
864 dim_vector dv (npts, 1);
865 void *vplan = fftw_planner::create_plan (1, dv, nsamples,
866 stride, dist, in, out);
867 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
868
869 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),
870 reinterpret_cast<fftw_complex *> (out));
871
872 // Need to create other half of the transform.
873
874 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
875
876 return 0;
877 }
878
879 int
880 fftw::fft (const Complex *in, Complex *out, std::size_t npts,
881 std::size_t nsamples, octave_idx_type stride,
882 octave_idx_type dist)
883 {
884 dist = (dist < 0 ? npts : dist);
885
886 dim_vector dv (npts, 1);
887 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
888 nsamples, stride,
889 dist, in, out);
890 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
891
892 fftw_execute_dft (m_plan,
893 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
894 reinterpret_cast<fftw_complex *> (out));
895
896 return 0;
897 }
898
899 int
900 fftw::ifft (const Complex *in, Complex *out, std::size_t npts,
901 std::size_t nsamples, octave_idx_type stride,
902 octave_idx_type dist)
903 {
904 dist = (dist < 0 ? npts : dist);
905
906 dim_vector dv (npts, 1);
907 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
908 stride, dist, in, out);
909 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
910
911 fftw_execute_dft (m_plan,
912 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
913 reinterpret_cast<fftw_complex *> (out));
914
915 const Complex scale = npts;
916 for (std::size_t j = 0; j < nsamples; j++)
917 for (std::size_t i = 0; i < npts; i++)
918 out[i*stride + j*dist] /= scale;
919
920 return 0;
921 }
922
923 int
924 fftw::fftNd (const double *in, Complex *out, const int rank,
925 const dim_vector& dv)
926 {
927 octave_idx_type dist = 1;
928 for (int i = 0; i < rank; i++)
929 dist *= dv(i);
930
931 // Fool with the position of the start of the output matrix, so that
932 // creating other half of the matrix won't cause cache problems.
933
934 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
935
936 void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
937 in, out + offset);
938 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
939
940 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),
941 reinterpret_cast<fftw_complex *> (out+ offset));
942
943 // Need to create other half of the transform.
944
945 convert_packcomplex_Nd (out, dv);
946
947 return 0;
948 }
949
950 int
951 fftw::fftNd (const Complex *in, Complex *out, const int rank,
952 const dim_vector& dv)
953 {
954 octave_idx_type dist = 1;
955 for (int i = 0; i < rank; i++)
956 dist *= dv(i);
957
958 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
959 1, 1, dist, in, out);
960 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
961
962 fftw_execute_dft (m_plan,
963 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
964 reinterpret_cast<fftw_complex *> (out));
965
966 return 0;
967 }
968
969 int
970 fftw::ifftNd (const Complex *in, Complex *out, const int rank,
971 const dim_vector& dv)
972 {
973 octave_idx_type dist = 1;
974 for (int i = 0; i < rank; i++)
975 dist *= dv(i);
976
977 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
978 1, 1, dist, in, out);
979 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
980
981 fftw_execute_dft (m_plan,
982 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
983 reinterpret_cast<fftw_complex *> (out));
984
985 const std::size_t npts = dv.numel ();
986 const Complex scale = npts;
987 for (std::size_t i = 0; i < npts; i++)
988 out[i] /= scale;
989
990 return 0;
991 }
992
993 int
994 fftw::fft (const float *in, FloatComplex *out, std::size_t npts,
995 std::size_t nsamples, octave_idx_type stride,
996 octave_idx_type dist)
997 {
998 dist = (dist < 0 ? npts : dist);
999
1000 dim_vector dv (npts, 1);
1001 void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
1002 dist, in, out);
1003 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1004
1005 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),
1006 reinterpret_cast<fftwf_complex *> (out));
1007
1008 // Need to create other half of the transform.
1009
1010 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1011
1012 return 0;
1013 }
1014
1015 int
1016 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1017 std::size_t nsamples, octave_idx_type stride,
1018 octave_idx_type dist)
1019 {
1020 dist = (dist < 0 ? npts : dist);
1021
1022 dim_vector dv (npts, 1);
1023 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1024 nsamples, stride, dist,
1025 in, out);
1026 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1027
1028 fftwf_execute_dft (m_plan,
1029 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1030 reinterpret_cast<fftwf_complex *> (out));
1031
1032 return 0;
1033 }
1034
1035 int
1036 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1037 std::size_t nsamples, octave_idx_type stride,
1038 octave_idx_type dist)
1039 {
1040 dist = (dist < 0 ? npts : dist);
1041
1042 dim_vector dv (npts, 1);
1043 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1044 nsamples, stride, dist,
1045 in, out);
1046 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1047
1048 fftwf_execute_dft (m_plan,
1049 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1050 reinterpret_cast<fftwf_complex *> (out));
1051
1052 const FloatComplex scale = npts;
1053 for (std::size_t j = 0; j < nsamples; j++)
1054 for (std::size_t i = 0; i < npts; i++)
1055 out[i*stride + j*dist] /= scale;
1056
1057 return 0;
1058 }
1059
1060 int
1061 fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1062 const dim_vector& dv)
1063 {
1064 octave_idx_type dist = 1;
1065 for (int i = 0; i < rank; i++)
1066 dist *= dv(i);
1067
1068 // Fool with the position of the start of the output matrix, so that
1069 // creating other half of the matrix won't cause cache problems.
1070
1071 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1072
1073 void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1074 in, out + offset);
1075 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1076
1077 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),
1078 reinterpret_cast<fftwf_complex *> (out+ offset));
1079
1080 // Need to create other half of the transform.
1081
1082 convert_packcomplex_Nd (out, dv);
1083
1084 return 0;
1085 }
1086
1087 int
1088 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1089 const dim_vector& dv)
1090 {
1091 octave_idx_type dist = 1;
1092 for (int i = 0; i < rank; i++)
1093 dist *= dv(i);
1094
1095 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1096 1, 1, dist, in, out);
1097 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1098
1099 fftwf_execute_dft (m_plan,
1100 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1101 reinterpret_cast<fftwf_complex *> (out));
1102
1103 return 0;
1104 }
1105
1106 int
1107 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1108 const dim_vector& dv)
1109 {
1110 octave_idx_type dist = 1;
1111 for (int i = 0; i < rank; i++)
1112 dist *= dv(i);
1113
1114 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1115 1, 1, dist, in, out);
1116 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1117
1118 fftwf_execute_dft (m_plan,
1119 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1120 reinterpret_cast<fftwf_complex *> (out));
1121
1122 const std::size_t npts = dv.numel ();
1123 const FloatComplex scale = npts;
1124 for (std::size_t i = 0; i < npts; i++)
1125 out[i] /= scale;
1126
1127 return 0;
1128 }
1129
1130#endif
1131
1132 std::string
1134 {
1135#if defined (HAVE_FFTW)
1137#else
1138 return "none";
1139#endif
1140 }
1141
1142 std::string
1144 {
1145#if defined (HAVE_FFTW)
1147#else
1148 return "none";
1149#endif
1150 }
1151}
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217
static F77_INT nn
Definition: DASPK.cc:66
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:335
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:257
octave_idx_type m_rs
Definition: oct-fftw.h:168
void * m_plan[2]
Definition: oct-fftw.h:141
octave_idx_type m_d[2]
Definition: oct-fftw.h:144
dim_vector m_n[2]
Definition: oct-fftw.h:156
FftwMethod m_meth
Definition: oct-fftw.h:136
static fftw_planner * s_instance
Definition: oct-fftw.h:116
FftwMethod do_method(void)
Definition: oct-fftw.cc:393
static int threads(void)
Definition: oct-fftw.h:109
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:71
octave_idx_type m_rd
Definition: oct-fftw.h:165
octave_idx_type m_h[2]
Definition: oct-fftw.h:153
dim_vector m_rn
Definition: oct-fftw.h:177
octave_idx_type m_rh
Definition: oct-fftw.h:174
bool m_simd_align[2]
Definition: oct-fftw.h:158
static bool instance_ok(void)
Definition: oct-fftw.cc:113
static void cleanup_instance(void)
Definition: oct-fftw.h:118
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:150
octave_idx_type m_s[2]
Definition: oct-fftw.h:147
static int ifft(const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:900
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:924
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:970
static int fft(const double *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:858
static void cleanup_instance(void)
Definition: oct-fftw.h:265
octave_idx_type m_d[2]
Definition: oct-fftw.h:291
octave_idx_type m_rd
Definition: oct-fftw.h:312
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:504
static bool instance_ok(void)
Definition: oct-fftw.cc:470
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:218
octave_idx_type m_h[2]
Definition: oct-fftw.h:300
FftwMethod do_method(void)
Definition: oct-fftw.cc:748
static float_fftw_planner * s_instance
Definition: oct-fftw.h:263
octave_idx_type m_rs
Definition: oct-fftw.h:315
octave_idx_type m_s[2]
Definition: oct-fftw.h:294
static int threads(void)
Definition: oct-fftw.h:256
octave_idx_type m_rh
Definition: oct-fftw.h:321
static void add(fptr f)
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5893
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:796
std::string fftw_version(void)
Definition: oct-fftw.cc:1133
std::string fftwf_version(void)
Definition: oct-fftw.cc:1143
static void convert_packcomplex_1d(T *out, std::size_t nr, std::size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:780
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37
std::complex< double > Complex
Definition: oct-cmplx.h:33
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:146
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44