GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
oct-fftw.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2001-2013 John W. Eaton
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 
27 #if defined (HAVE_FFTW)
28 
29 #include <iostream>
30 #include <vector>
31 
32 #include "lo-error.h"
33 #include "oct-fftw.h"
34 #include "quit.h"
35 #include "oct-locbuf.h"
36 #include "singleton-cleanup.h"
37 
38 #if defined (HAVE_FFTW3_THREADS)
39 #include "nproc.h"
40 #endif
41 
43 
44 // Helper class to create and cache FFTW plans for both 1D and
45 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
46 // the plans, which in theory is suboptimal, but provides quite
47 // reasonable performance in practice.
48 
49 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
50 // will destroy the input and output arrays. We must, therefore, create a
51 // temporary input array with the same size and 16-byte alignment as
52 // the original array when using a different planner strategy.
53 // Note that we also use any wisdom that is available, either in a
54 // FFTW3 system wide file or as supplied by the user.
55 
56 // FIXME: if we can ensure 16 byte alignment in Array<T>
57 // (<T> *data) the FFTW3 can use SIMD instructions for further
58 // acceleration.
59 
60 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
61 
63  : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
64  rsimd_align (false)
65 {
66  plan[0] = plan[1] = 0;
67  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
68  simd_align[0] = simd_align[1] = false;
69  inplace[0] = inplace[1] = false;
70  n[0] = n[1] = dim_vector ();
71 
72 #if defined (HAVE_FFTW3_THREADS)
73  int init_ret = fftw_init_threads ();
74  if (!init_ret)
75  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
76  //Use number of processors available to the current process
77  //This can be later changed with fftw ("threads", nthreads)
78  nthreads = num_processors (NPROC_CURRENT);
79  fftw_plan_with_nthreads (nthreads);
80 #endif
81 
82  // If we have a system wide wisdom file, import it.
83  fftw_import_system_wisdom ();
84 }
85 
87 {
88  fftw_plan *plan_p;
89 
90  plan_p = &rplan;
91  if (*plan_p)
92  fftw_destroy_plan (*plan_p);
93 
94  plan_p = &plan[0];
95  if (*plan_p)
96  fftw_destroy_plan (*plan_p);
97 
98  plan_p = &plan[1];
99  if (*plan_p)
100  fftw_destroy_plan (*plan_p);
101 }
102 
103 bool
105 {
106  bool retval = true;
107 
108  if (! instance)
109  {
110  instance = new octave_fftw_planner ();
111 
112  if (instance)
114  }
115 
116  if (! instance)
117  {
118  (*current_liboctave_error_handler)
119  ("unable to create octave_fftw_planner object!");
120 
121  retval = false;
122  }
123 
124  return retval;
125 }
126 
127 #define CHECK_SIMD_ALIGNMENT(x) \
128  (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)
129 
130 fftw_plan
131 octave_fftw_planner::do_create_plan (int dir, const int rank,
132  const dim_vector dims,
133  octave_idx_type howmany,
134  octave_idx_type stride,
135  octave_idx_type dist,
136  const Complex *in, Complex *out)
137 {
138  int which = (dir == FFTW_FORWARD) ? 0 : 1;
139  fftw_plan *cur_plan_p = &plan[which];
140  bool create_new_plan = false;
141  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
142  bool ioinplace = (in == out);
143 
144  // Don't create a new plan if we have a non SIMD plan already but
145  // can do SIMD. This prevents endlessly recreating plans if we
146  // change the alignment.
147 
148  if (plan[which] == 0 || d[which] != dist || s[which] != stride
149  || r[which] != rank || h[which] != howmany
150  || ioinplace != inplace[which]
151  || ((ioalign != simd_align[which]) ? !ioalign : false))
152  create_new_plan = true;
153  else
154  {
155  // We still might not have the same shape of array.
156 
157  for (int i = 0; i < rank; i++)
158  if (dims(i) != n[which](i))
159  {
160  create_new_plan = true;
161  break;
162  }
163  }
164 
165  if (create_new_plan)
166  {
167  d[which] = dist;
168  s[which] = stride;
169  r[which] = rank;
170  h[which] = howmany;
171  simd_align[which] = ioalign;
172  inplace[which] = ioinplace;
173  n[which] = dims;
174 
175  // Note reversal of dimensions for column major storage in FFTW.
176  octave_idx_type nn = 1;
177  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
178 
179  for (int i = 0, j = rank-1; i < rank; i++, j--)
180  {
181  tmp[i] = dims(j);
182  nn *= dims(j);
183  }
184 
185  int plan_flags = 0;
186  bool plan_destroys_in = true;
187 
188  switch (meth)
189  {
190  case UNKNOWN:
191  case ESTIMATE:
192  plan_flags |= FFTW_ESTIMATE;
193  plan_destroys_in = false;
194  break;
195  case MEASURE:
196  plan_flags |= FFTW_MEASURE;
197  break;
198  case PATIENT:
199  plan_flags |= FFTW_PATIENT;
200  break;
201  case EXHAUSTIVE:
202  plan_flags |= FFTW_EXHAUSTIVE;
203  break;
204  case HYBRID:
205  if (nn < 8193)
206  plan_flags |= FFTW_MEASURE;
207  else
208  {
209  plan_flags |= FFTW_ESTIMATE;
210  plan_destroys_in = false;
211  }
212  break;
213  }
214 
215  if (ioalign)
216  plan_flags &= ~FFTW_UNALIGNED;
217  else
218  plan_flags |= FFTW_UNALIGNED;
219 
220  if (*cur_plan_p)
221  fftw_destroy_plan (*cur_plan_p);
222 
223  if (plan_destroys_in)
224  {
225  // Create matrix with the same size and 16-byte alignment as input
226  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
227  itmp = reinterpret_cast<Complex *>
228  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
229  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
230 
231  *cur_plan_p =
232  fftw_plan_many_dft (rank, tmp, howmany,
233  reinterpret_cast<fftw_complex *> (itmp),
234  0, stride, dist,
235  reinterpret_cast<fftw_complex *> (out),
236  0, stride, dist, dir, plan_flags);
237  }
238  else
239  {
240  *cur_plan_p =
241  fftw_plan_many_dft (rank, tmp, howmany,
242  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
243  0, stride, dist,
244  reinterpret_cast<fftw_complex *> (out),
245  0, stride, dist, dir, plan_flags);
246  }
247 
248  if (*cur_plan_p == 0)
249  (*current_liboctave_error_handler) ("Error creating fftw plan");
250  }
251 
252  return *cur_plan_p;
253 }
254 
255 fftw_plan
257  octave_idx_type howmany,
258  octave_idx_type stride,
259  octave_idx_type dist,
260  const double *in, Complex *out)
261 {
262  fftw_plan *cur_plan_p = &rplan;
263  bool create_new_plan = false;
264  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
265 
266  // Don't create a new plan if we have a non SIMD plan already but
267  // can do SIMD. This prevents endlessly recreating plans if we
268  // change the alignment.
269 
270  if (rplan == 0 || rd != dist || rs != stride || rr != rank
271  || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
272  create_new_plan = true;
273  else
274  {
275  // We still might not have the same shape of array.
276 
277  for (int i = 0; i < rank; i++)
278  if (dims(i) != rn(i))
279  {
280  create_new_plan = true;
281  break;
282  }
283  }
284 
285  if (create_new_plan)
286  {
287  rd = dist;
288  rs = stride;
289  rr = rank;
290  rh = howmany;
291  rsimd_align = ioalign;
292  rn = dims;
293 
294  // Note reversal of dimensions for column major storage in FFTW.
295  octave_idx_type nn = 1;
296  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
297 
298  for (int i = 0, j = rank-1; i < rank; i++, j--)
299  {
300  tmp[i] = dims(j);
301  nn *= dims(j);
302  }
303 
304  int plan_flags = 0;
305  bool plan_destroys_in = true;
306 
307  switch (meth)
308  {
309  case UNKNOWN:
310  case ESTIMATE:
311  plan_flags |= FFTW_ESTIMATE;
312  plan_destroys_in = false;
313  break;
314  case MEASURE:
315  plan_flags |= FFTW_MEASURE;
316  break;
317  case PATIENT:
318  plan_flags |= FFTW_PATIENT;
319  break;
320  case EXHAUSTIVE:
321  plan_flags |= FFTW_EXHAUSTIVE;
322  break;
323  case HYBRID:
324  if (nn < 8193)
325  plan_flags |= FFTW_MEASURE;
326  else
327  {
328  plan_flags |= FFTW_ESTIMATE;
329  plan_destroys_in = false;
330  }
331  break;
332  }
333 
334  if (ioalign)
335  plan_flags &= ~FFTW_UNALIGNED;
336  else
337  plan_flags |= FFTW_UNALIGNED;
338 
339  if (*cur_plan_p)
340  fftw_destroy_plan (*cur_plan_p);
341 
342  if (plan_destroys_in)
343  {
344  // Create matrix with the same size and 16-byte alignment as input
345  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
346  itmp = reinterpret_cast<double *>
347  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
348  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
349 
350  *cur_plan_p =
351  fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
352  0, stride, dist,
353  reinterpret_cast<fftw_complex *> (out),
354  0, stride, dist, plan_flags);
355  }
356  else
357  {
358  *cur_plan_p =
359  fftw_plan_many_dft_r2c (rank, tmp, howmany,
360  (const_cast<double *> (in)),
361  0, stride, dist,
362  reinterpret_cast<fftw_complex *> (out),
363  0, stride, dist, plan_flags);
364  }
365 
366  if (*cur_plan_p == 0)
367  (*current_liboctave_error_handler) ("Error creating fftw plan");
368  }
369 
370  return *cur_plan_p;
371 }
372 
375 {
376  return meth;
377 }
378 
381 {
382  FftwMethod ret = meth;
383  if (_meth == ESTIMATE || _meth == MEASURE
384  || _meth == PATIENT || _meth == EXHAUSTIVE
385  || _meth == HYBRID)
386  {
387  if (meth != _meth)
388  {
389  meth = _meth;
390  if (rplan)
391  fftw_destroy_plan (rplan);
392  if (plan[0])
393  fftw_destroy_plan (plan[0]);
394  if (plan[1])
395  fftw_destroy_plan (plan[1]);
396  rplan = plan[0] = plan[1] = 0;
397  }
398  }
399  else
400  ret = UNKNOWN;
401  return ret;
402 }
403 
405 
407  : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
408  rsimd_align (false)
409 {
410  plan[0] = plan[1] = 0;
411  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
412  simd_align[0] = simd_align[1] = false;
413  inplace[0] = inplace[1] = false;
414  n[0] = n[1] = dim_vector ();
415 
416 #if defined (HAVE_FFTW3F_THREADS)
417  int init_ret = fftwf_init_threads ();
418  if (!init_ret)
419  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
420  //Use number of processors available to the current process
421  //This can be later changed with fftw ("threads", nthreads)
422  nthreads = num_processors (NPROC_CURRENT);
423  fftwf_plan_with_nthreads (nthreads);
424 #endif
425 
426  // If we have a system wide wisdom file, import it.
427  fftwf_import_system_wisdom ();
428 }
429 
431 {
432  fftwf_plan *plan_p;
433 
434  plan_p = &rplan;
435  if (*plan_p)
436  fftwf_destroy_plan (*plan_p);
437 
438  plan_p = &plan[0];
439  if (*plan_p)
440  fftwf_destroy_plan (*plan_p);
441 
442  plan_p = &plan[1];
443  if (*plan_p)
444  fftwf_destroy_plan (*plan_p);
445 }
446 
447 bool
449 {
450  bool retval = true;
451 
452  if (! instance)
453  {
455 
456  if (instance)
458  }
459 
460  if (! instance)
461  {
462  (*current_liboctave_error_handler)
463  ("unable to create octave_fftw_planner object!");
464 
465  retval = false;
466  }
467 
468  return retval;
469 }
470 
471 fftwf_plan
473  const dim_vector dims,
474  octave_idx_type howmany,
475  octave_idx_type stride,
476  octave_idx_type dist,
477  const FloatComplex *in,
478  FloatComplex *out)
479 {
480  int which = (dir == FFTW_FORWARD) ? 0 : 1;
481  fftwf_plan *cur_plan_p = &plan[which];
482  bool create_new_plan = false;
483  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
484  bool ioinplace = (in == out);
485 
486  // Don't create a new plan if we have a non SIMD plan already but
487  // can do SIMD. This prevents endlessly recreating plans if we
488  // change the alignment.
489 
490  if (plan[which] == 0 || d[which] != dist || s[which] != stride
491  || r[which] != rank || h[which] != howmany
492  || ioinplace != inplace[which]
493  || ((ioalign != simd_align[which]) ? !ioalign : false))
494  create_new_plan = true;
495  else
496  {
497  // We still might not have the same shape of array.
498 
499  for (int i = 0; i < rank; i++)
500  if (dims(i) != n[which](i))
501  {
502  create_new_plan = true;
503  break;
504  }
505  }
506 
507  if (create_new_plan)
508  {
509  d[which] = dist;
510  s[which] = stride;
511  r[which] = rank;
512  h[which] = howmany;
513  simd_align[which] = ioalign;
514  inplace[which] = ioinplace;
515  n[which] = dims;
516 
517  // Note reversal of dimensions for column major storage in FFTW.
518  octave_idx_type nn = 1;
519  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
520 
521  for (int i = 0, j = rank-1; i < rank; i++, j--)
522  {
523  tmp[i] = dims(j);
524  nn *= dims(j);
525  }
526 
527  int plan_flags = 0;
528  bool plan_destroys_in = true;
529 
530  switch (meth)
531  {
532  case UNKNOWN:
533  case ESTIMATE:
534  plan_flags |= FFTW_ESTIMATE;
535  plan_destroys_in = false;
536  break;
537  case MEASURE:
538  plan_flags |= FFTW_MEASURE;
539  break;
540  case PATIENT:
541  plan_flags |= FFTW_PATIENT;
542  break;
543  case EXHAUSTIVE:
544  plan_flags |= FFTW_EXHAUSTIVE;
545  break;
546  case HYBRID:
547  if (nn < 8193)
548  plan_flags |= FFTW_MEASURE;
549  else
550  {
551  plan_flags |= FFTW_ESTIMATE;
552  plan_destroys_in = false;
553  }
554  break;
555  }
556 
557  if (ioalign)
558  plan_flags &= ~FFTW_UNALIGNED;
559  else
560  plan_flags |= FFTW_UNALIGNED;
561 
562  if (*cur_plan_p)
563  fftwf_destroy_plan (*cur_plan_p);
564 
565  if (plan_destroys_in)
566  {
567  // Create matrix with the same size and 16-byte alignment as input
568  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
569  itmp = reinterpret_cast<FloatComplex *>
570  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
571  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
572 
573  *cur_plan_p =
574  fftwf_plan_many_dft (rank, tmp, howmany,
575  reinterpret_cast<fftwf_complex *> (itmp),
576  0, stride, dist,
577  reinterpret_cast<fftwf_complex *> (out),
578  0, stride, dist, dir, plan_flags);
579  }
580  else
581  {
582  *cur_plan_p =
583  fftwf_plan_many_dft (rank, tmp, howmany,
584  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
585  0, stride, dist,
586  reinterpret_cast<fftwf_complex *> (out),
587  0, stride, dist, dir, plan_flags);
588  }
589 
590  if (*cur_plan_p == 0)
591  (*current_liboctave_error_handler) ("Error creating fftw plan");
592  }
593 
594  return *cur_plan_p;
595 }
596 
597 fftwf_plan
599  const dim_vector dims,
600  octave_idx_type howmany,
601  octave_idx_type stride,
602  octave_idx_type dist,
603  const float *in, FloatComplex *out)
604 {
605  fftwf_plan *cur_plan_p = &rplan;
606  bool create_new_plan = false;
607  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
608 
609  // Don't create a new plan if we have a non SIMD plan already but
610  // can do SIMD. This prevents endlessly recreating plans if we
611  // change the alignment.
612 
613  if (rplan == 0 || rd != dist || rs != stride || rr != rank
614  || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
615  create_new_plan = true;
616  else
617  {
618  // We still might not have the same shape of array.
619 
620  for (int i = 0; i < rank; i++)
621  if (dims(i) != rn(i))
622  {
623  create_new_plan = true;
624  break;
625  }
626  }
627 
628  if (create_new_plan)
629  {
630  rd = dist;
631  rs = stride;
632  rr = rank;
633  rh = howmany;
634  rsimd_align = ioalign;
635  rn = dims;
636 
637  // Note reversal of dimensions for column major storage in FFTW.
638  octave_idx_type nn = 1;
639  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
640 
641  for (int i = 0, j = rank-1; i < rank; i++, j--)
642  {
643  tmp[i] = dims(j);
644  nn *= dims(j);
645  }
646 
647  int plan_flags = 0;
648  bool plan_destroys_in = true;
649 
650  switch (meth)
651  {
652  case UNKNOWN:
653  case ESTIMATE:
654  plan_flags |= FFTW_ESTIMATE;
655  plan_destroys_in = false;
656  break;
657  case MEASURE:
658  plan_flags |= FFTW_MEASURE;
659  break;
660  case PATIENT:
661  plan_flags |= FFTW_PATIENT;
662  break;
663  case EXHAUSTIVE:
664  plan_flags |= FFTW_EXHAUSTIVE;
665  break;
666  case HYBRID:
667  if (nn < 8193)
668  plan_flags |= FFTW_MEASURE;
669  else
670  {
671  plan_flags |= FFTW_ESTIMATE;
672  plan_destroys_in = false;
673  }
674  break;
675  }
676 
677  if (ioalign)
678  plan_flags &= ~FFTW_UNALIGNED;
679  else
680  plan_flags |= FFTW_UNALIGNED;
681 
682  if (*cur_plan_p)
683  fftwf_destroy_plan (*cur_plan_p);
684 
685  if (plan_destroys_in)
686  {
687  // Create matrix with the same size and 16-byte alignment as input
688  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
689  itmp = reinterpret_cast<float *>
690  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
691  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
692 
693  *cur_plan_p =
694  fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
695  0, stride, dist,
696  reinterpret_cast<fftwf_complex *> (out),
697  0, stride, dist, plan_flags);
698  }
699  else
700  {
701  *cur_plan_p =
702  fftwf_plan_many_dft_r2c (rank, tmp, howmany,
703  (const_cast<float *> (in)),
704  0, stride, dist,
705  reinterpret_cast<fftwf_complex *> (out),
706  0, stride, dist, plan_flags);
707  }
708 
709  if (*cur_plan_p == 0)
710  (*current_liboctave_error_handler) ("Error creating fftw plan");
711  }
712 
713  return *cur_plan_p;
714 }
715 
718 {
719  return meth;
720 }
721 
724 {
725  FftwMethod ret = meth;
726  if (_meth == ESTIMATE || _meth == MEASURE
727  || _meth == PATIENT || _meth == EXHAUSTIVE
728  || _meth == HYBRID)
729  {
730  if (meth != _meth)
731  {
732  meth = _meth;
733  if (rplan)
734  fftwf_destroy_plan (rplan);
735  if (plan[0])
736  fftwf_destroy_plan (plan[0]);
737  if (plan[1])
738  fftwf_destroy_plan (plan[1]);
739  rplan = plan[0] = plan[1] = 0;
740  }
741  }
742  else
743  ret = UNKNOWN;
744  return ret;
745 }
746 
747 template <class T>
748 static inline void
749 convert_packcomplex_1d (T *out, size_t nr, size_t nc,
750  octave_idx_type stride, octave_idx_type dist)
751 {
752  octave_quit ();
753 
754  // Fill in the missing data.
755 
756  for (size_t i = 0; i < nr; i++)
757  for (size_t j = nc/2+1; j < nc; j++)
758  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
759 
760  octave_quit ();
761 }
762 
763 template <class T>
764 static inline void
766 {
767  size_t nc = dv(0);
768  size_t nr = dv(1);
769  size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1);
770  size_t nrp = nr * np;
771  T *ptr1, *ptr2;
772 
773  octave_quit ();
774 
775  // Create space for the missing elements.
776 
777  for (size_t i = 0; i < nrp; i++)
778  {
779  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
780  ptr2 = out + i * nc;
781  for (size_t j = 0; j < nc/2+1; j++)
782  *ptr2++ = *ptr1++;
783  }
784 
785  octave_quit ();
786 
787  // Fill in the missing data for the rank = 2 case directly for speed.
788 
789  for (size_t i = 0; i < np; i++)
790  {
791  for (size_t j = 1; j < nr; j++)
792  for (size_t k = nc/2+1; k < nc; k++)
793  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
794 
795  for (size_t j = nc/2+1; j < nc; j++)
796  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
797  }
798 
799  octave_quit ();
800 
801  // Now do the permutations needed for rank > 2 cases.
802 
803  size_t jstart = dv(0) * dv(1);
804  size_t kstep = dv(0);
805  size_t nel = dv.numel ();
806 
807  for (int inner = 2; inner < dv.length (); inner++)
808  {
809  size_t jmax = jstart * dv(inner);
810  for (size_t i = 0; i < nel; i+=jmax)
811  for (size_t j = jstart, jj = jmax-jstart; j < jj;
812  j+=jstart, jj-=jstart)
813  for (size_t k = 0; k < jstart; k+= kstep)
814  for (size_t l = nc/2+1; l < nc; l++)
815  {
816  T tmp = out[i+ j + k + l];
817  out[i + j + k + l] = out[i + jj + k + l];
818  out[i + jj + k + l] = tmp;
819  }
820  jstart = jmax;
821  }
822 
823  octave_quit ();
824 }
825 
826 int
827 octave_fftw::fft (const double *in, Complex *out, size_t npts,
828  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
829 {
830  dist = (dist < 0 ? npts : dist);
831 
832  dim_vector dv (npts, 1);
833  fftw_plan plan = octave_fftw_planner::create_plan (1, dv, nsamples,
834  stride, dist, in, out);
835 
836  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
837  reinterpret_cast<fftw_complex *> (out));
838 
839  // Need to create other half of the transform.
840 
841  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
842 
843  return 0;
844 }
845 
846 int
847 octave_fftw::fft (const Complex *in, Complex *out, size_t npts,
848  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
849 {
850  dist = (dist < 0 ? npts : dist);
851 
852  dim_vector dv (npts, 1);
853  fftw_plan plan = octave_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
854  nsamples, stride,
855  dist, in, out);
856 
857  fftw_execute_dft (plan,
858  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
859  reinterpret_cast<fftw_complex *> (out));
860 
861  return 0;
862 }
863 
864 int
865 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts,
866  size_t nsamples, octave_idx_type stride,
867  octave_idx_type dist)
868 {
869  dist = (dist < 0 ? npts : dist);
870 
871  dim_vector dv (npts, 1);
872  fftw_plan plan = octave_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
873  nsamples, stride,
874  dist, in, out);
875 
876  fftw_execute_dft (plan,
877  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
878  reinterpret_cast<fftw_complex *> (out));
879 
880  const Complex scale = npts;
881  for (size_t j = 0; j < nsamples; j++)
882  for (size_t i = 0; i < npts; i++)
883  out[i*stride + j*dist] /= scale;
884 
885  return 0;
886 }
887 
888 int
889 octave_fftw::fftNd (const double *in, Complex *out, const int rank,
890  const dim_vector &dv)
891 {
892  octave_idx_type dist = 1;
893  for (int i = 0; i < rank; i++)
894  dist *= dv(i);
895 
896  // Fool with the position of the start of the output matrix, so that
897  // creating other half of the matrix won't cause cache problems.
898 
899  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
900 
901  fftw_plan plan = octave_fftw_planner::create_plan (rank, dv, 1, 1, dist,
902  in, out + offset);
903 
904  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
905  reinterpret_cast<fftw_complex *> (out+ offset));
906 
907  // Need to create other half of the transform.
908 
909  convert_packcomplex_Nd (out, dv);
910 
911  return 0;
912 }
913 
914 int
915 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank,
916  const dim_vector &dv)
917 {
918  octave_idx_type dist = 1;
919  for (int i = 0; i < rank; i++)
920  dist *= dv(i);
921 
922  fftw_plan plan = octave_fftw_planner::create_plan (FFTW_FORWARD, rank,
923  dv, 1, 1, dist, in, out);
924 
925  fftw_execute_dft (plan,
926  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
927  reinterpret_cast<fftw_complex *> (out));
928 
929  return 0;
930 }
931 
932 int
933 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank,
934  const dim_vector &dv)
935 {
936  octave_idx_type dist = 1;
937  for (int i = 0; i < rank; i++)
938  dist *= dv(i);
939 
940  fftw_plan plan = octave_fftw_planner::create_plan (FFTW_BACKWARD, rank,
941  dv, 1, 1, dist, in, out);
942 
943  fftw_execute_dft (plan,
944  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
945  reinterpret_cast<fftw_complex *> (out));
946 
947  const size_t npts = dv.numel ();
948  const Complex scale = npts;
949  for (size_t i = 0; i < npts; i++)
950  out[i] /= scale;
951 
952  return 0;
953 }
954 
955 int
956 octave_fftw::fft (const float *in, FloatComplex *out, size_t npts,
957  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
958 {
959  dist = (dist < 0 ? npts : dist);
960 
961  dim_vector dv (npts, 1);
962  fftwf_plan plan = octave_float_fftw_planner::create_plan (1, dv, nsamples,
963  stride, dist,
964  in, out);
965 
966  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
967  reinterpret_cast<fftwf_complex *> (out));
968 
969  // Need to create other half of the transform.
970 
971  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
972 
973  return 0;
974 }
975 
976 int
977 octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,
978  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
979 {
980  dist = (dist < 0 ? npts : dist);
981 
982  dim_vector dv (npts, 1);
983  fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_FORWARD, 1,
984  dv, nsamples,
985  stride, dist,
986  in, out);
987 
988  fftwf_execute_dft (plan,
989  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
990  reinterpret_cast<fftwf_complex *> (out));
991 
992  return 0;
993 }
994 
995 int
996 octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,
997  size_t nsamples, octave_idx_type stride,
998  octave_idx_type dist)
999 {
1000  dist = (dist < 0 ? npts : dist);
1001 
1002  dim_vector dv (npts, 1);
1003  fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD, 1,
1004  dv, nsamples,
1005  stride, dist,
1006  in, out);
1007 
1008  fftwf_execute_dft (plan,
1009  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1010  reinterpret_cast<fftwf_complex *> (out));
1011 
1012  const FloatComplex scale = npts;
1013  for (size_t j = 0; j < nsamples; j++)
1014  for (size_t i = 0; i < npts; i++)
1015  out[i*stride + j*dist] /= scale;
1016 
1017  return 0;
1018 }
1019 
1020 int
1021 octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1022  const dim_vector &dv)
1023 {
1024  octave_idx_type dist = 1;
1025  for (int i = 0; i < rank; i++)
1026  dist *= dv(i);
1027 
1028  // Fool with the position of the start of the output matrix, so that
1029  // creating other half of the matrix won't cause cache problems.
1030 
1031  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1032 
1033  fftwf_plan plan = octave_float_fftw_planner::create_plan (rank, dv, 1, 1,
1034  dist, in,
1035  out + offset);
1036 
1037  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1038  reinterpret_cast<fftwf_complex *> (out+ offset));
1039 
1040  // Need to create other half of the transform.
1041 
1042  convert_packcomplex_Nd (out, dv);
1043 
1044  return 0;
1045 }
1046 
1047 int
1048 octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1049  const dim_vector &dv)
1050 {
1051  octave_idx_type dist = 1;
1052  for (int i = 0; i < rank; i++)
1053  dist *= dv(i);
1054 
1055  fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_FORWARD,
1056  rank, dv, 1, 1,
1057  dist, in, out);
1058 
1059  fftwf_execute_dft (plan,
1060  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1061  reinterpret_cast<fftwf_complex *> (out));
1062 
1063  return 0;
1064 }
1065 
1066 int
1067 octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1068  const dim_vector &dv)
1069 {
1070  octave_idx_type dist = 1;
1071  for (int i = 0; i < rank; i++)
1072  dist *= dv(i);
1073 
1074  fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD,
1075  rank, dv, 1, 1,
1076  dist, in, out);
1077 
1078  fftwf_execute_dft (plan,
1079  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1080  reinterpret_cast<fftwf_complex *> (out));
1081 
1082  const size_t npts = dv.numel ();
1083  const FloatComplex scale = npts;
1084  for (size_t i = 0; i < npts; i++)
1085  out[i] /= scale;
1086 
1087  return 0;
1088 }
1089 
1090 #endif