GNU Octave 10.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
oct-fftw.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2001-2025 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#if defined (HAVE_FFTW3_H)
31# include <fftw3.h>
32#endif
33
34#include "lo-error.h"
35#include "oct-fftw.h"
36#include "oct-locbuf.h"
37#include "quit.h"
38#include "singleton-cleanup.h"
39
40#if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41# include "nproc-wrapper.h"
42#endif
43
45
46#if defined (HAVE_FFTW)
47
48fftw_planner *fftw_planner::s_instance = nullptr;
49
50// Helper class to create and cache FFTW plans for both 1D and
51// 2D. This implementation defaults to using FFTW_ESTIMATE to create
52// the plans, which in theory is suboptimal, but provides quite
53// reasonable performance in practice.
54
55// Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56// will destroy the input and output arrays. We must, therefore, create a
57// temporary input array with the same size and 16-byte alignment as
58// the original array when using a different planner strategy.
59// Note that we also use any wisdom that is available, either in a
60// FFTW3 system wide file or as supplied by the user.
61
62// FIXME: if we can ensure 16 byte alignment in Array<T>
63// (<T> *data) the FFTW3 can use SIMD instructions for further
64// acceleration.
65
66// Note that it is profitable to store the FFTW3 plans, for small FFTs.
67
69 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
70 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
71{
72 m_plan[0] = m_plan[1] = nullptr;
73 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
74 m_simd_align[0] = m_simd_align[1] = false;
75 m_inplace[0] = m_inplace[1] = false;
76 m_n[0] = m_n[1] = dim_vector ();
77
78#if defined (HAVE_FFTW3_THREADS)
79 int init_ret = fftw_init_threads ();
80 if (! init_ret)
81 (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82
83 // Check number of processors available to the current process
84 m_nthreads =
86
87 // Limit number of threads to 3 by default
88 // See: https://octave.discourse.group/t/3121
89 // This can be later changed with fftw ("threads", nthreads).
90 if (m_nthreads > 3)
91 m_nthreads = 3;
92
93 fftw_plan_with_nthreads (m_nthreads);
94#endif
95
96 // If we have a system wide wisdom file, import it.
97 fftw_import_system_wisdom ();
98}
99
101{
102 fftw_plan *plan_p;
103
104 plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
105 if (*plan_p)
106 fftw_destroy_plan (*plan_p);
107
108 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]);
109 if (*plan_p)
110 fftw_destroy_plan (*plan_p);
111
112 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]);
113 if (*plan_p)
114 fftw_destroy_plan (*plan_p);
115}
116
117bool
119{
120 bool retval = true;
121
122 if (! s_instance)
123 {
124 s_instance = new fftw_planner ();
125 singleton_cleanup_list::add (cleanup_instance);
126 }
127
128 return retval;
129}
130
131void
133{
134#if defined (HAVE_FFTW3_THREADS)
135 if (instance_ok () && nt != threads ())
136 {
137 s_instance->m_nthreads = nt;
138 fftw_plan_with_nthreads (nt);
139 // Clear the current plans.
140 s_instance->m_rplan = nullptr;
141 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
142 }
143#else
144 octave_unused_parameter (nt);
145
146 (*current_liboctave_warning_handler)
147 ("unable to change number of threads without FFTW thread support");
148#endif
149}
150
151#define CHECK_SIMD_ALIGNMENT(x) \
152 (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)
153
154void *
155fftw_planner::do_create_plan (int dir, const int rank,
156 const dim_vector& dims,
157 octave_idx_type howmany,
158 octave_idx_type stride,
159 octave_idx_type dist,
160 const Complex *in, Complex *out)
161{
162 int which = (dir == FFTW_FORWARD) ? 0 : 1;
163 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]);
164 bool create_new_plan = false;
165 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
166 bool ioinplace = (in == out);
167
168 // Don't create a new plan if we have a non SIMD plan already but
169 // can do SIMD. This prevents endlessly recreating plans if we
170 // change the alignment.
171
172 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
173 || m_r[which] != rank || m_h[which] != howmany
174 || ioinplace != m_inplace[which]
175 || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
176 create_new_plan = true;
177 else
178 {
179 // We still might not have the same shape of array.
180
181 for (int i = 0; i < rank; i++)
182 if (dims(i) != m_n[which](i))
183 {
184 create_new_plan = true;
185 break;
186 }
187 }
188
189 if (create_new_plan)
190 {
191 m_d[which] = dist;
192 m_s[which] = stride;
193 m_r[which] = rank;
194 m_h[which] = howmany;
195 m_simd_align[which] = ioalign;
196 m_inplace[which] = ioinplace;
197 m_n[which] = dims;
198
199 // Note reversal of dimensions for column major storage in FFTW.
200 octave_idx_type nn = 1;
201 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
202
203 for (int i = 0, j = rank-1; i < rank; i++, j--)
204 {
205 tmp[i] = dims(j);
206 nn *= dims(j);
207 }
208
209 int plan_flags = 0;
210 bool plan_destroys_in = true;
211
212 switch (m_meth)
213 {
214 case UNKNOWN:
215 case ESTIMATE:
216 plan_flags |= FFTW_ESTIMATE;
217 plan_destroys_in = false;
218 break;
219 case MEASURE:
220 plan_flags |= FFTW_MEASURE;
221 break;
222 case PATIENT:
223 plan_flags |= FFTW_PATIENT;
224 break;
225 case EXHAUSTIVE:
226 plan_flags |= FFTW_EXHAUSTIVE;
227 break;
228 case HYBRID:
229 if (nn < 8193)
230 plan_flags |= FFTW_MEASURE;
231 else
232 {
233 plan_flags |= FFTW_ESTIMATE;
234 plan_destroys_in = false;
235 }
236 break;
237 }
238
239 if (ioalign)
240 plan_flags &= ~FFTW_UNALIGNED;
241 else
242 plan_flags |= FFTW_UNALIGNED;
243
244 if (*cur_plan_p)
245 fftw_destroy_plan (*cur_plan_p);
246
248 itmp = const_cast<Complex *> (in);
249 Complex *otmp = out;
250
251 if (plan_destroys_in)
252 {
253 // Create matrix with the same size and 16-byte alignment as input
254 OCTAVE_SCOPED_BUFFER (Complex, itmp, nn * howmany + 32);
255 itmp = reinterpret_cast<Complex *>
256 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
257 + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
258
259 if (in == out)
260 otmp = itmp;
261 }
262
263 *cur_plan_p
264 = fftw_plan_many_dft (rank, tmp, howmany,
265 reinterpret_cast<fftw_complex *> (itmp),
266 nullptr, stride, dist,
267 reinterpret_cast<fftw_complex *> (otmp),
268 nullptr, stride, dist, dir, plan_flags);
269
270 if (*cur_plan_p == nullptr)
271 (*current_liboctave_error_handler) ("Error creating FFTW plan");
272 }
273
274 return *cur_plan_p;
275}
276
277void *
278fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
279 octave_idx_type howmany,
280 octave_idx_type stride,
281 octave_idx_type dist,
282 const double *in, Complex *out)
283{
284 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);
285 bool create_new_plan = false;
286 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
287
288 // Don't create a new plan if we have a non SIMD plan already but
289 // can do SIMD. This prevents endlessly recreating plans if we
290 // change the alignment.
291
292 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
293 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
294 create_new_plan = true;
295 else
296 {
297 // We still might not have the same shape of array.
298
299 for (int i = 0; i < rank; i++)
300 if (dims(i) != m_rn(i))
301 {
302 create_new_plan = true;
303 break;
304 }
305 }
306
307 if (create_new_plan)
308 {
309 m_rd = dist;
310 m_rs = stride;
311 m_rr = rank;
312 m_rh = howmany;
313 m_rsimd_align = ioalign;
314 m_rn = dims;
315
316 // Note reversal of dimensions for column major storage in FFTW.
317 octave_idx_type nn = 1;
318 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
319
320 for (int i = 0, j = rank-1; i < rank; i++, j--)
321 {
322 tmp[i] = dims(j);
323 nn *= dims(j);
324 }
325
326 int plan_flags = 0;
327 bool plan_destroys_in = true;
328
329 switch (m_meth)
330 {
331 case UNKNOWN:
332 case ESTIMATE:
333 plan_flags |= FFTW_ESTIMATE;
334 plan_destroys_in = false;
335 break;
336 case MEASURE:
337 plan_flags |= FFTW_MEASURE;
338 break;
339 case PATIENT:
340 plan_flags |= FFTW_PATIENT;
341 break;
342 case EXHAUSTIVE:
343 plan_flags |= FFTW_EXHAUSTIVE;
344 break;
345 case HYBRID:
346 if (nn < 8193)
347 plan_flags |= FFTW_MEASURE;
348 else
349 {
350 plan_flags |= FFTW_ESTIMATE;
351 plan_destroys_in = false;
352 }
353 break;
354 }
355
356 if (ioalign)
357 plan_flags &= ~FFTW_UNALIGNED;
358 else
359 plan_flags |= FFTW_UNALIGNED;
360
361 if (*cur_plan_p)
362 fftw_destroy_plan (*cur_plan_p);
363
364 OCTAVE_SCOPED_BUFFER_ANCHOR (double, itmp);
365 itmp = const_cast<double *> (in);
366 Complex *otmp = out;
367
368 if (plan_destroys_in)
369 {
370 // Create matrix with the same size and 16-byte alignment as input
371 octave_idx_type in_place = reinterpret_cast<double *> (out) == in;
372 OCTAVE_SCOPED_BUFFER (double, itmp,
373 nn * howmany * (in_place + 1) + 32);
374 itmp = reinterpret_cast<double *>
375 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
376 + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
377
378 if (in_place)
379 otmp = reinterpret_cast<Complex *> (itmp);
380 }
381
382 *cur_plan_p
383 = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
384 nullptr, stride, dist,
385 reinterpret_cast<fftw_complex *> (otmp),
386 nullptr, stride, dist, plan_flags);
387
388 if (*cur_plan_p == nullptr)
389 (*current_liboctave_error_handler) ("Error creating FFTW plan");
390 }
391
392 return *cur_plan_p;
393}
394
396fftw_planner::do_method ()
397{
398 return m_meth;
399}
400
402fftw_planner::do_method (FftwMethod _meth)
403{
404 FftwMethod ret = m_meth;
405 if (_meth == ESTIMATE || _meth == MEASURE
406 || _meth == PATIENT || _meth == EXHAUSTIVE
407 || _meth == HYBRID)
408 {
409 if (m_meth != _meth)
410 {
411 m_meth = _meth;
412 if (m_rplan)
413 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan));
414 if (m_plan[0])
415 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0]));
416 if (m_plan[1])
417 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1]));
418 m_rplan = m_plan[0] = m_plan[1] = nullptr;
419 }
420 }
421 else
422 ret = UNKNOWN;
423 return ret;
424}
425
426float_fftw_planner *float_fftw_planner::s_instance = nullptr;
427
429 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),
430 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)
431{
432 m_plan[0] = m_plan[1] = nullptr;
433 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;
434 m_simd_align[0] = m_simd_align[1] = false;
435 m_inplace[0] = m_inplace[1] = false;
436 m_n[0] = m_n[1] = dim_vector ();
437
438#if defined (HAVE_FFTW3F_THREADS)
439 int init_ret = fftwf_init_threads ();
440 if (! init_ret)
441 (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
442
443 // Use number of processors available to the current process
444 // This can be later changed with fftw ("threads", nthreads).
445 m_nthreads =
447
448 fftwf_plan_with_nthreads (m_nthreads);
449#endif
450
451 // If we have a system wide wisdom file, import it.
452 fftwf_import_system_wisdom ();
453}
454
456{
457 fftwf_plan *plan_p;
458
459 plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
460 if (*plan_p)
461 fftwf_destroy_plan (*plan_p);
462
463 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]);
464 if (*plan_p)
465 fftwf_destroy_plan (*plan_p);
466
467 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]);
468 if (*plan_p)
469 fftwf_destroy_plan (*plan_p);
470}
471
472bool
474{
475 bool retval = true;
476
477 if (! s_instance)
478 {
479 s_instance = new float_fftw_planner ();
480 singleton_cleanup_list::add (cleanup_instance);
481 }
482
483 return retval;
484}
485
486void
488{
489#if defined (HAVE_FFTW3F_THREADS)
490 if (instance_ok () && nt != threads ())
491 {
492 s_instance->m_nthreads = nt;
493 fftwf_plan_with_nthreads (nt);
494 // Clear the current plans.
495 s_instance->m_rplan = nullptr;
496 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;
497 }
498#else
499 octave_unused_parameter (nt);
500
501 (*current_liboctave_warning_handler)
502 ("unable to change number of threads without FFTW thread support");
503#endif
504}
505
506void *
507float_fftw_planner::do_create_plan (int dir, const int rank,
508 const dim_vector& dims,
509 octave_idx_type howmany,
510 octave_idx_type stride,
511 octave_idx_type dist,
512 const FloatComplex *in,
513 FloatComplex *out)
514{
515 int which = (dir == FFTW_FORWARD) ? 0 : 1;
516 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]);
517 bool create_new_plan = false;
518 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
519 bool ioinplace = (in == out);
520
521 // Don't create a new plan if we have a non SIMD plan already but
522 // can do SIMD. This prevents endlessly recreating plans if we
523 // change the alignment.
524
525 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride
526 || m_r[which] != rank || m_h[which] != howmany
527 || ioinplace != m_inplace[which]
528 || ((ioalign != m_simd_align[which]) ? ! ioalign : false))
529 create_new_plan = true;
530 else
531 {
532 // We still might not have the same shape of array.
533
534 for (int i = 0; i < rank; i++)
535 if (dims(i) != m_n[which](i))
536 {
537 create_new_plan = true;
538 break;
539 }
540 }
541
542 if (create_new_plan)
543 {
544 m_d[which] = dist;
545 m_s[which] = stride;
546 m_r[which] = rank;
547 m_h[which] = howmany;
548 m_simd_align[which] = ioalign;
549 m_inplace[which] = ioinplace;
550 m_n[which] = dims;
551
552 // Note reversal of dimensions for column major storage in FFTW.
553 octave_idx_type nn = 1;
554 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
555
556 for (int i = 0, j = rank-1; i < rank; i++, j--)
557 {
558 tmp[i] = dims(j);
559 nn *= dims(j);
560 }
561
562 int plan_flags = 0;
563 bool plan_destroys_in = true;
564
565 switch (m_meth)
566 {
567 case UNKNOWN:
568 case ESTIMATE:
569 plan_flags |= FFTW_ESTIMATE;
570 plan_destroys_in = false;
571 break;
572 case MEASURE:
573 plan_flags |= FFTW_MEASURE;
574 break;
575 case PATIENT:
576 plan_flags |= FFTW_PATIENT;
577 break;
578 case EXHAUSTIVE:
579 plan_flags |= FFTW_EXHAUSTIVE;
580 break;
581 case HYBRID:
582 if (nn < 8193)
583 plan_flags |= FFTW_MEASURE;
584 else
585 {
586 plan_flags |= FFTW_ESTIMATE;
587 plan_destroys_in = false;
588 }
589 break;
590 }
591
592 if (ioalign)
593 plan_flags &= ~FFTW_UNALIGNED;
594 else
595 plan_flags |= FFTW_UNALIGNED;
596
597 if (*cur_plan_p)
598 fftwf_destroy_plan (*cur_plan_p);
599
601 itmp = const_cast<FloatComplex *> (in);
602 FloatComplex *otmp = out;
603
604 if (plan_destroys_in)
605 {
606 // Create matrix with the same size and 16-byte alignment as input
607 OCTAVE_SCOPED_BUFFER (FloatComplex, itmp, nn * howmany + 32);
608 itmp = reinterpret_cast<FloatComplex *>
609 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
610 + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
611
612 if (out == in)
613 otmp = itmp;
614 }
615
616 *cur_plan_p
617 = fftwf_plan_many_dft (rank, tmp, howmany,
618 reinterpret_cast<fftwf_complex *> (itmp),
619 nullptr, stride, dist,
620 reinterpret_cast<fftwf_complex *> (otmp),
621 nullptr, stride, dist, dir, plan_flags);
622
623 if (*cur_plan_p == nullptr)
624 (*current_liboctave_error_handler) ("Error creating FFTW plan");
625 }
626
627 return *cur_plan_p;
628}
629
630void *
631float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
632 octave_idx_type howmany,
633 octave_idx_type stride,
634 octave_idx_type dist,
635 const float *in, FloatComplex *out)
636{
637 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);
638 bool create_new_plan = false;
639 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
640
641 // Don't create a new plan if we have a non SIMD plan already but
642 // can do SIMD. This prevents endlessly recreating plans if we
643 // change the alignment.
644
645 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank
646 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))
647 create_new_plan = true;
648 else
649 {
650 // We still might not have the same shape of array.
651
652 for (int i = 0; i < rank; i++)
653 if (dims(i) != m_rn(i))
654 {
655 create_new_plan = true;
656 break;
657 }
658 }
659
660 if (create_new_plan)
661 {
662 m_rd = dist;
663 m_rs = stride;
664 m_rr = rank;
665 m_rh = howmany;
666 m_rsimd_align = ioalign;
667 m_rn = dims;
668
669 // Note reversal of dimensions for column major storage in FFTW.
670 octave_idx_type nn = 1;
671 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
672
673 for (int i = 0, j = rank-1; i < rank; i++, j--)
674 {
675 tmp[i] = dims(j);
676 nn *= dims(j);
677 }
678
679 int plan_flags = 0;
680 bool plan_destroys_in = true;
681
682 switch (m_meth)
683 {
684 case UNKNOWN:
685 case ESTIMATE:
686 plan_flags |= FFTW_ESTIMATE;
687 plan_destroys_in = false;
688 break;
689 case MEASURE:
690 plan_flags |= FFTW_MEASURE;
691 break;
692 case PATIENT:
693 plan_flags |= FFTW_PATIENT;
694 break;
695 case EXHAUSTIVE:
696 plan_flags |= FFTW_EXHAUSTIVE;
697 break;
698 case HYBRID:
699 if (nn < 8193)
700 plan_flags |= FFTW_MEASURE;
701 else
702 {
703 plan_flags |= FFTW_ESTIMATE;
704 plan_destroys_in = false;
705 }
706 break;
707 }
708
709 if (ioalign)
710 plan_flags &= ~FFTW_UNALIGNED;
711 else
712 plan_flags |= FFTW_UNALIGNED;
713
714 if (*cur_plan_p)
715 fftwf_destroy_plan (*cur_plan_p);
716
717 OCTAVE_SCOPED_BUFFER_ANCHOR (float, itmp);
718 itmp = const_cast<float *> (in);
719 FloatComplex *otmp = out;
720
721 if (plan_destroys_in)
722 {
723 // Create matrix with the same size and 16-byte alignment as input
724 octave_idx_type in_place = reinterpret_cast<float *> (out) == in;
725 OCTAVE_SCOPED_BUFFER (float, itmp,
726 nn * howmany * (in_place + 1) + 32);
727 itmp = reinterpret_cast<float *>
728 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF)
729 + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
730
731 if (in_place)
732 otmp = reinterpret_cast<FloatComplex *> (itmp);
733 }
734
735 *cur_plan_p
736 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
737 nullptr, stride, dist,
738 reinterpret_cast<fftwf_complex *> (otmp),
739 nullptr, stride, dist, plan_flags);
740
741 if (*cur_plan_p == nullptr)
742 (*current_liboctave_error_handler) ("Error creating FFTW plan");
743 }
744
745 return *cur_plan_p;
746}
747
749float_fftw_planner::do_method ()
750{
751 return m_meth;
752}
753
755float_fftw_planner::do_method (FftwMethod _meth)
756{
757 FftwMethod ret = m_meth;
758 if (_meth == ESTIMATE || _meth == MEASURE
759 || _meth == PATIENT || _meth == EXHAUSTIVE
760 || _meth == HYBRID)
761 {
762 if (m_meth != _meth)
763 {
764 m_meth = _meth;
765 if (m_rplan)
766 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan));
767 if (m_plan[0])
768 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0]));
769 if (m_plan[1])
770 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1]));
771 m_rplan = m_plan[0] = m_plan[1] = nullptr;
772 }
773 }
774 else
775 ret = UNKNOWN;
776 return ret;
777}
778
779template <typename T>
780static inline void
781convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,
782 octave_idx_type stride, octave_idx_type dist)
783{
784 octave_quit ();
785
786 // Fill in the missing data.
787
788 for (std::size_t i = 0; i < nr; i++)
789 for (std::size_t j = nc/2+1; j < nc; j++)
790 out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
791
792 octave_quit ();
793}
794
795template <typename T>
796static inline void
797convert_packcomplex_Nd (T *out, const dim_vector& dv)
798{
799 std::size_t nc = dv(0);
800 std::size_t nr = dv(1);
801 std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
802 std::size_t nrp = nr * np;
803 T *ptr1, *ptr2;
804
805 octave_quit ();
806
807 // Create space for the missing elements.
808
809 for (std::size_t i = 0; i < nrp; i++)
810 {
811 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
812 ptr2 = out + i * nc;
813 for (std::size_t j = 0; j < nc/2+1; j++)
814 *ptr2++ = *ptr1++;
815 }
816
817 octave_quit ();
818
819 // Fill in the missing data for the rank = 2 case directly for speed.
820
821 for (std::size_t i = 0; i < np; i++)
822 {
823 for (std::size_t j = 1; j < nr; j++)
824 for (std::size_t k = nc/2+1; k < nc; k++)
825 out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
826
827 for (std::size_t j = nc/2+1; j < nc; j++)
828 out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
829 }
830
831 octave_quit ();
832
833 // Now do the permutations needed for rank > 2 cases.
834
835 std::size_t jstart = dv(0) * dv(1);
836 std::size_t kstep = dv(0);
837 std::size_t nel = dv.numel ();
838
839 for (int inner = 2; inner < dv.ndims (); inner++)
840 {
841 std::size_t jmax = jstart * dv(inner);
842 for (std::size_t i = 0; i < nel; i+=jmax)
843 for (std::size_t j = jstart, jj = jmax-jstart; j < jj;
844 j+=jstart, jj-=jstart)
845 for (std::size_t k = 0; k < jstart; k+= kstep)
846 for (std::size_t l = nc/2+1; l < nc; l++)
847 {
848 T tmp = out[i+ j + k + l];
849 out[i + j + k + l] = out[i + jj + k + l];
850 out[i + jj + k + l] = tmp;
851 }
852 jstart = jmax;
853 }
854
855 octave_quit ();
856}
857
858int
859fftw::fft (const double *in, Complex *out, std::size_t npts,
860 std::size_t nsamples, octave_idx_type stride,
861 octave_idx_type dist)
862{
863 dist = (dist < 0 ? npts : dist);
864
865 dim_vector dv (npts, 1);
866 void *vplan = fftw_planner::create_plan (1, dv, nsamples,
867 stride, dist, in, out);
868 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
869
870 fftw_execute_dft_r2c (m_plan, (const_cast<double *> (in)),
871 reinterpret_cast<fftw_complex *> (out));
872
873 // Need to create other half of the transform.
874
875 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
876
877 return 0;
878}
879
880int
881fftw::fft (const Complex *in, Complex *out, std::size_t npts,
882 std::size_t nsamples, octave_idx_type stride,
883 octave_idx_type dist)
884{
885 dist = (dist < 0 ? npts : dist);
886
887 dim_vector dv (npts, 1);
888 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
889 nsamples, stride,
890 dist, in, out);
891 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
892
893 fftw_execute_dft (m_plan,
894 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
895 reinterpret_cast<fftw_complex *> (out));
896
897 return 0;
898}
899
900int
901fftw::ifft (const Complex *in, Complex *out, std::size_t npts,
902 std::size_t nsamples, octave_idx_type stride,
903 octave_idx_type dist)
904{
905 dist = (dist < 0 ? npts : dist);
906
907 dim_vector dv (npts, 1);
908 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
909 stride, dist, in, out);
910 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
911
912 fftw_execute_dft (m_plan,
913 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
914 reinterpret_cast<fftw_complex *> (out));
915
916 const Complex scale = npts;
917 for (std::size_t j = 0; j < nsamples; j++)
918 for (std::size_t i = 0; i < npts; i++)
919 out[i*stride + j*dist] /= scale;
920
921 return 0;
922}
923
924int
925fftw::fftNd (const double *in, Complex *out, const int rank,
926 const dim_vector& dv)
927{
928 octave_idx_type dist = 1;
929 for (int i = 0; i < rank; i++)
930 dist *= dv(i);
931
932 // Fool with the position of the start of the output matrix, so that
933 // creating other half of the matrix won't cause cache problems.
934
935 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
936
937 void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
938 in, out + offset);
939 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
940
941 fftw_execute_dft_r2c (m_plan, (const_cast<double *> (in)),
942 reinterpret_cast<fftw_complex *> (out+ offset));
943
944 // Need to create other half of the transform.
945
946 convert_packcomplex_Nd (out, dv);
947
948 return 0;
949}
950
951int
952fftw::fftNd (const Complex *in, Complex *out, const int rank,
953 const dim_vector& dv)
954{
955 octave_idx_type dist = 1;
956 for (int i = 0; i < rank; i++)
957 dist *= dv(i);
958
959 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
960 1, 1, dist, in, out);
961 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
962
963 fftw_execute_dft (m_plan,
964 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
965 reinterpret_cast<fftw_complex *> (out));
966
967 return 0;
968}
969
970int
971fftw::ifftNd (const Complex *in, Complex *out, const int rank,
972 const dim_vector& dv)
973{
974 octave_idx_type dist = 1;
975 for (int i = 0; i < rank; i++)
976 dist *= dv(i);
977
978 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
979 1, 1, dist, in, out);
980 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);
981
982 fftw_execute_dft (m_plan,
983 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
984 reinterpret_cast<fftw_complex *> (out));
985
986 const std::size_t npts = dv.numel ();
987 const Complex scale = npts;
988 for (std::size_t i = 0; i < npts; i++)
989 out[i] /= scale;
990
991 return 0;
992}
993
994int
995fftw::fft (const float *in, FloatComplex *out, std::size_t npts,
996 std::size_t nsamples, octave_idx_type stride,
997 octave_idx_type dist)
998{
999 dist = (dist < 0 ? npts : dist);
1000
1001 dim_vector dv (npts, 1);
1002 void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
1003 dist, in, out);
1004 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1005
1006 fftwf_execute_dft_r2c (m_plan, (const_cast<float *> (in)),
1007 reinterpret_cast<fftwf_complex *> (out));
1008
1009 // Need to create other half of the transform.
1010
1011 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1012
1013 return 0;
1014}
1015
1016int
1017fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1018 std::size_t nsamples, octave_idx_type stride,
1019 octave_idx_type dist)
1020{
1021 dist = (dist < 0 ? npts : dist);
1022
1023 dim_vector dv (npts, 1);
1024 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1025 nsamples, stride, dist,
1026 in, out);
1027 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1028
1029 fftwf_execute_dft (m_plan,
1030 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1031 reinterpret_cast<fftwf_complex *> (out));
1032
1033 return 0;
1034}
1035
1036int
1037fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1038 std::size_t nsamples, octave_idx_type stride,
1039 octave_idx_type dist)
1040{
1041 dist = (dist < 0 ? npts : dist);
1042
1043 dim_vector dv (npts, 1);
1044 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1045 nsamples, stride, dist,
1046 in, out);
1047 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1048
1049 fftwf_execute_dft (m_plan,
1050 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1051 reinterpret_cast<fftwf_complex *> (out));
1052
1053 const FloatComplex scale = npts;
1054 for (std::size_t j = 0; j < nsamples; j++)
1055 for (std::size_t i = 0; i < npts; i++)
1056 out[i*stride + j*dist] /= scale;
1057
1058 return 0;
1059}
1060
1061int
1062fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1063 const dim_vector& dv)
1064{
1065 octave_idx_type dist = 1;
1066 for (int i = 0; i < rank; i++)
1067 dist *= dv(i);
1068
1069 // Fool with the position of the start of the output matrix, so that
1070 // creating other half of the matrix won't cause cache problems.
1071
1072 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1073
1074 void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1075 in, out + offset);
1076 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1077
1078 fftwf_execute_dft_r2c (m_plan, (const_cast<float *> (in)),
1079 reinterpret_cast<fftwf_complex *> (out+ offset));
1080
1081 // Need to create other half of the transform.
1082
1083 convert_packcomplex_Nd (out, dv);
1084
1085 return 0;
1086}
1087
1088int
1089fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1090 const dim_vector& dv)
1091{
1092 octave_idx_type dist = 1;
1093 for (int i = 0; i < rank; i++)
1094 dist *= dv(i);
1095
1096 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1097 1, 1, dist, in, out);
1098 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1099
1100 fftwf_execute_dft (m_plan,
1101 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1102 reinterpret_cast<fftwf_complex *> (out));
1103
1104 return 0;
1105}
1106
1107int
1108fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1109 const dim_vector& dv)
1110{
1111 octave_idx_type dist = 1;
1112 for (int i = 0; i < rank; i++)
1113 dist *= dv(i);
1114
1115 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1116 1, 1, dist, in, out);
1117 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);
1118
1119 fftwf_execute_dft (m_plan,
1120 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
1121 reinterpret_cast<fftwf_complex *> (out));
1122
1123 const std::size_t npts = dv.numel ();
1124 const FloatComplex scale = npts;
1125 for (std::size_t i = 0; i < npts; i++)
1126 out[i] /= scale;
1127
1128 return 0;
1129}
1130
1131#endif
1132
1133std::string
1135{
1136#if defined (HAVE_FFTW)
1137 return ::fftw_version;
1138#else
1139 return "none";
1140#endif
1141}
1142
1143std::string
1145{
1146#if defined (HAVE_FFTW)
1147 return ::fftwf_version;
1148#else
1149 return "none";
1150#endif
1151}
1152
1153OCTAVE_END_NAMESPACE(octave)
ComplexColumnVector conj(const ComplexColumnVector &a)
Vector representing the dimensions (size) of an Array.
Definition dim-vector.h:90
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition dim-vector.h:331
octave_idx_type ndims() const
Number of dimensions.
Definition dim-vector.h:253
static bool instance_ok()
Definition oct-fftw.cc:118
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition oct-fftw.h:65
static int threads()
Definition oct-fftw.h:103
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition oct-fftw.cc:971
static int fft(const double *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition oct-fftw.cc:859
static int ifft(const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition oct-fftw.cc:901
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition oct-fftw.cc:925
static bool instance_ok()
Definition oct-fftw.cc:473
static int threads()
Definition oct-fftw.h:243
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition oct-fftw.h:205
static void add(fptr f)
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void scale(Matrix &m, double x, double y, double z)
Definition graphics.cc:5731
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
std::complex< double > Complex
Definition oct-cmplx.h:33
std::complex< float > FloatComplex
Definition oct-cmplx.h:34
std::string fftwf_version()
Definition oct-fftw.cc:1144
std::string fftw_version()
Definition oct-fftw.cc:1134
#define CHECK_SIMD_ALIGNMENT(x)
Definition oct-fftw.cc:151
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
#define OCTAVE_SCOPED_BUFFER_ANCHOR(T, buf)
Definition oct-locbuf.h:57
#define OCTAVE_SCOPED_BUFFER(T, buf, size)
Definition oct-locbuf.h:69