dd/d16/oct-fftw_8cc_source.html

 ////////////////////////////////////////////////////////////////////////

 //

 // Copyright (C) 2001-2021 The Octave Project Developers

 //

 // See the file COPYRIGHT.md in the top-level directory of this

 // distribution or <https://octave.org/copyright/>.

 //

 // This file is part of Octave.

 //

 // Octave is free software: you can redistribute it and/or modify it

 // under the terms of the GNU General Public License as published by

 // the Free Software Foundation, either version 3 of the License, or

 // (at your option) any later version.

 //

 // Octave is distributed in the hope that it will be useful, but

 // WITHOUT ANY WARRANTY; without even the implied warranty of

 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 // GNU General Public License for more details.

 //

 // You should have received a copy of the GNU General Public License

 // along with Octave; see the file COPYING.  If not, see

 // <https://www.gnu.org/licenses/>.

 //

 ////////////////////////////////////////////////////////////////////////


 #if defined (HAVE_CONFIG_H)

 #  include "config.h"

 #endif


 #if defined (HAVE_FFTW3_H)

 #  include <fftw3.h>

 #endif


 #include "lo-error.h"

 #include "oct-fftw.h"

 #include "oct-locbuf.h"

 #include "quit.h"

 #include "singleton-cleanup.h"


 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)

 #  include "nproc-wrapper.h"

 #endif


 namespace octave

 {

 #if defined (HAVE_FFTW)


   fftw_planner *fftw_planner::instance = nullptr;


   // Helper class to create and cache FFTW plans for both 1D and

   // 2D.  This implementation defaults to using FFTW_ESTIMATE to create

   // the plans, which in theory is suboptimal, but provides quite

   // reasonable performance in practice.


   // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3

   // will destroy the input and output arrays.  We must, therefore, create a

   // temporary input array with the same size and 16-byte alignment as

   // the original array when using a different planner strategy.

   // Note that we also use any wisdom that is available, either in a

   // FFTW3 system wide file or as supplied by the user.


   // FIXME: if we can ensure 16 byte alignment in Array<T>

   // (<T> *data) the FFTW3 can use SIMD instructions for further

   // acceleration.


   // Note that it is profitable to store the FFTW3 plans, for small FFTs.


   fftw_planner::fftw_planner (void)

     : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),

       rsimd_align (false), nthreads (1)

   {

     plan[0] = plan[1] = nullptr;

     d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;

     simd_align[0] = simd_align[1] = false;

     inplace[0] = inplace[1] = false;

     n[0] = n[1] = dim_vector ();


 #if defined (HAVE_FFTW3_THREADS)

     int init_ret = fftw_init_threads ();

     if (! init_ret)

       (*current_liboctave_error_handler) ("Error initializing FFTW threads");


     // Use number of processors available to the current process

     // This can be later changed with fftw ("threads", nthreads).

     nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);

     fftw_plan_with_nthreads (nthreads);

 #endif


     // If we have a system wide wisdom file, import it.

     fftw_import_system_wisdom ();

   }


   fftw_planner::~fftw_planner (void)

   {

     fftw_plan *plan_p;


     plan_p = reinterpret_cast<fftw_plan *> (&rplan);

     if (*plan_p)

       fftw_destroy_plan (*plan_p);


     plan_p = reinterpret_cast<fftw_plan *> (&plan[0]);

     if (*plan_p)

       fftw_destroy_plan (*plan_p);


     plan_p = reinterpret_cast<fftw_plan *> (&plan[1]);

     if (*plan_p)

       fftw_destroy_plan (*plan_p);

   }


   bool

   fftw_planner::instance_ok (void)

   {

     bool retval = true;


     if (! instance)

       {

         instance = new fftw_planner ();

         singleton_cleanup_list::add (cleanup_instance);

       }


     return retval;

   }


   void

   fftw_planner::threads (int nt)

   {

 #if defined (HAVE_FFTW3_THREADS)

     if (instance_ok () && nt != threads ())

       {

         instance->nthreads = nt;

         fftw_plan_with_nthreads (nt);

         // Clear the current plans.

         instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;

       }

 #else

     octave_unused_parameter (nt);


     (*current_liboctave_warning_handler)

       ("unable to change number of threads without FFTW thread support");

 #endif

   }


 #define CHECK_SIMD_ALIGNMENT(x)                         \

   (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)


   void *

   fftw_planner::do_create_plan (int dir, const int rank,

                                 const dim_vector& dims,

                                 octave_idx_type howmany,

                                 octave_idx_type stride,

                                 octave_idx_type dist,

                                 const Complex *in, Complex *out)

   {

     int which = (dir == FFTW_FORWARD) ? 0 : 1;

     fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]);

     bool create_new_plan = false;

     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);

     bool ioinplace = (in == out);


     // Don't create a new plan if we have a non SIMD plan already but

     // can do SIMD.  This prevents endlessly recreating plans if we

     // change the alignment.


     if (plan[which] == nullptr || d[which] != dist || s[which] != stride

         || r[which] != rank || h[which] != howmany

         || ioinplace != inplace[which]

         || ((ioalign != simd_align[which]) ? ! ioalign : false))

       create_new_plan = true;

     else

       {

         // We still might not have the same shape of array.


         for (int i = 0; i < rank; i++)

           if (dims(i) != n[which](i))

             {

               create_new_plan = true;

               break;

             }

       }


     if (create_new_plan)

       {

         d[which] = dist;

         s[which] = stride;

         r[which] = rank;

         h[which] = howmany;

         simd_align[which] = ioalign;

         inplace[which] = ioinplace;

         n[which] = dims;


         // Note reversal of dimensions for column major storage in FFTW.

         octave_idx_type nn = 1;

         OCTAVE_LOCAL_BUFFER (int, tmp, rank);


         for (int i = 0, j = rank-1; i < rank; i++, j--)

           {

             tmp[i] = dims(j);

             nn *= dims(j);

           }


         int plan_flags = 0;

         bool plan_destroys_in = true;


         switch (meth)

           {

           case UNKNOWN:

           case ESTIMATE:

             plan_flags |= FFTW_ESTIMATE;

             plan_destroys_in = false;

             break;

           case MEASURE:

             plan_flags |= FFTW_MEASURE;

             break;

           case PATIENT:

             plan_flags |= FFTW_PATIENT;

             break;

           case EXHAUSTIVE:

             plan_flags |= FFTW_EXHAUSTIVE;

             break;

           case HYBRID:

             if (nn < 8193)

               plan_flags |= FFTW_MEASURE;

             else

               {

                 plan_flags |= FFTW_ESTIMATE;

                 plan_destroys_in = false;

               }

             break;

           }


         if (ioalign)

           plan_flags &= ~FFTW_UNALIGNED;

         else

           plan_flags |= FFTW_UNALIGNED;


         if (*cur_plan_p)

           fftw_destroy_plan (*cur_plan_p);


         if (plan_destroys_in)

           {

             // Create matrix with the same size and 16-byte alignment as input

             OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);

             itmp = reinterpret_cast<Complex *>

                    (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));


             *cur_plan_p

               = fftw_plan_many_dft (rank, tmp, howmany,

                                     reinterpret_cast<fftw_complex *> (itmp),

                                     nullptr, stride, dist,

                                     reinterpret_cast<fftw_complex *> (out),

                                     nullptr, stride, dist, dir, plan_flags);

           }

         else

           {

             *cur_plan_p

               = fftw_plan_many_dft (rank, tmp, howmany,

                                     reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),

                                     nullptr, stride, dist,

                                     reinterpret_cast<fftw_complex *> (out),

                                     nullptr, stride, dist, dir, plan_flags);

           }


         if (*cur_plan_p == nullptr)

           (*current_liboctave_error_handler) ("Error creating fftw plan");

       }


     return *cur_plan_p;

   }


   void *

   fftw_planner::do_create_plan (const int rank, const dim_vector& dims,

                                 octave_idx_type howmany,

                                 octave_idx_type stride,

                                 octave_idx_type dist,

                                 const double *in, Complex *out)

   {

     fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan);

     bool create_new_plan = false;

     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);


     // Don't create a new plan if we have a non SIMD plan already but

     // can do SIMD.  This prevents endlessly recreating plans if we

     // change the alignment.


     if (rplan == nullptr || rd != dist || rs != stride || rr != rank

         || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))

       create_new_plan = true;

     else

       {

         // We still might not have the same shape of array.


         for (int i = 0; i < rank; i++)

           if (dims(i) != rn(i))

             {

               create_new_plan = true;

               break;

             }

       }


     if (create_new_plan)

       {

         rd = dist;

         rs = stride;

         rr = rank;

         rh = howmany;

         rsimd_align = ioalign;

         rn = dims;


         // Note reversal of dimensions for column major storage in FFTW.

         octave_idx_type nn = 1;

         OCTAVE_LOCAL_BUFFER (int, tmp, rank);


         for (int i = 0, j = rank-1; i < rank; i++, j--)

           {

             tmp[i] = dims(j);

             nn *= dims(j);

           }


         int plan_flags = 0;

         bool plan_destroys_in = true;


         switch (meth)

           {

           case UNKNOWN:

           case ESTIMATE:

             plan_flags |= FFTW_ESTIMATE;

             plan_destroys_in = false;

             break;

           case MEASURE:

             plan_flags |= FFTW_MEASURE;

             break;

           case PATIENT:

             plan_flags |= FFTW_PATIENT;

             break;

           case EXHAUSTIVE:

             plan_flags |= FFTW_EXHAUSTIVE;

             break;

           case HYBRID:

             if (nn < 8193)

               plan_flags |= FFTW_MEASURE;

             else

               {

                 plan_flags |= FFTW_ESTIMATE;

                 plan_destroys_in = false;

               }

             break;

           }


         if (ioalign)

           plan_flags &= ~FFTW_UNALIGNED;

         else

           plan_flags |= FFTW_UNALIGNED;


         if (*cur_plan_p)

           fftw_destroy_plan (*cur_plan_p);


         if (plan_destroys_in)

           {

             // Create matrix with the same size and 16-byte alignment as input

             OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);

             itmp = reinterpret_cast<double *>

                    (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));


             *cur_plan_p

               = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,

                                         nullptr, stride, dist,

                                         reinterpret_cast<fftw_complex *> (out),

                                         nullptr, stride, dist, plan_flags);

           }

         else

           {

             *cur_plan_p

               = fftw_plan_many_dft_r2c (rank, tmp, howmany,

                                         (const_cast<double *> (in)),

                                         nullptr, stride, dist,

                                         reinterpret_cast<fftw_complex *> (out),

                                         nullptr, stride, dist, plan_flags);

           }


         if (*cur_plan_p == nullptr)

           (*current_liboctave_error_handler) ("Error creating fftw plan");

       }


     return *cur_plan_p;

   }


   fftw_planner::FftwMethod

   fftw_planner::do_method (void)

   {

     return meth;

   }


   fftw_planner::FftwMethod

   fftw_planner::do_method (FftwMethod _meth)

   {

     FftwMethod ret = meth;

     if (_meth == ESTIMATE || _meth == MEASURE

         || _meth == PATIENT || _meth == EXHAUSTIVE

         || _meth == HYBRID)

       {

         if (meth != _meth)

           {

             meth = _meth;

             if (rplan)

               fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan));

             if (plan[0])

               fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0]));

             if (plan[1])

               fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1]));

             rplan = plan[0] = plan[1] = nullptr;

           }

       }

     else

       ret = UNKNOWN;

     return ret;

   }


   float_fftw_planner *float_fftw_planner::instance = nullptr;


   float_fftw_planner::float_fftw_planner (void)

     : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),

       rsimd_align (false), nthreads (1)

   {

     plan[0] = plan[1] = nullptr;

     d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;

     simd_align[0] = simd_align[1] = false;

     inplace[0] = inplace[1] = false;

     n[0] = n[1] = dim_vector ();


 #if defined (HAVE_FFTW3F_THREADS)

     int init_ret = fftwf_init_threads ();

     if (! init_ret)

       (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");


     // Use number of processors available to the current process

     // This can be later changed with fftw ("threads", nthreads).

     nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);

     fftwf_plan_with_nthreads (nthreads);

 #endif


     // If we have a system wide wisdom file, import it.

     fftwf_import_system_wisdom ();

   }


   float_fftw_planner::~float_fftw_planner (void)

   {

     fftwf_plan *plan_p;


     plan_p = reinterpret_cast<fftwf_plan *> (&rplan);

     if (*plan_p)

       fftwf_destroy_plan (*plan_p);


     plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]);

     if (*plan_p)

       fftwf_destroy_plan (*plan_p);


     plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]);

     if (*plan_p)

       fftwf_destroy_plan (*plan_p);

   }


   bool

   float_fftw_planner::instance_ok (void)

   {

     bool retval = true;


     if (! instance)

       {

         instance = new float_fftw_planner ();

         singleton_cleanup_list::add (cleanup_instance);

       }


     return retval;

   }


   void

   float_fftw_planner::threads (int nt)

   {

 #if defined (HAVE_FFTW3F_THREADS)

     if (instance_ok () && nt != threads ())

       {

         instance->nthreads = nt;

         fftwf_plan_with_nthreads (nt);

         // Clear the current plans.

         instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;

       }

 #else

     octave_unused_parameter (nt);


     (*current_liboctave_warning_handler)

       ("unable to change number of threads without FFTW thread support");

 #endif

   }


   void *

   float_fftw_planner::do_create_plan (int dir, const int rank,

                                       const dim_vector& dims,

                                       octave_idx_type howmany,

                                       octave_idx_type stride,

                                       octave_idx_type dist,

                                       const FloatComplex *in,

                                       FloatComplex *out)

   {

     int which = (dir == FFTW_FORWARD) ? 0 : 1;

     fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]);

     bool create_new_plan = false;

     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);

     bool ioinplace = (in == out);


     // Don't create a new plan if we have a non SIMD plan already but

     // can do SIMD.  This prevents endlessly recreating plans if we

     // change the alignment.


     if (plan[which] == nullptr || d[which] != dist || s[which] != stride

         || r[which] != rank || h[which] != howmany

         || ioinplace != inplace[which]

         || ((ioalign != simd_align[which]) ? ! ioalign : false))

       create_new_plan = true;

     else

       {

         // We still might not have the same shape of array.


         for (int i = 0; i < rank; i++)

           if (dims(i) != n[which](i))

             {

               create_new_plan = true;

               break;

             }

       }


     if (create_new_plan)

       {

         d[which] = dist;

         s[which] = stride;

         r[which] = rank;

         h[which] = howmany;

         simd_align[which] = ioalign;

         inplace[which] = ioinplace;

         n[which] = dims;


         // Note reversal of dimensions for column major storage in FFTW.

         octave_idx_type nn = 1;

         OCTAVE_LOCAL_BUFFER (int, tmp, rank);


         for (int i = 0, j = rank-1; i < rank; i++, j--)

           {

             tmp[i] = dims(j);

             nn *= dims(j);

           }


         int plan_flags = 0;

         bool plan_destroys_in = true;


         switch (meth)

           {

           case UNKNOWN:

           case ESTIMATE:

             plan_flags |= FFTW_ESTIMATE;

             plan_destroys_in = false;

             break;

           case MEASURE:

             plan_flags |= FFTW_MEASURE;

             break;

           case PATIENT:

             plan_flags |= FFTW_PATIENT;

             break;

           case EXHAUSTIVE:

             plan_flags |= FFTW_EXHAUSTIVE;

             break;

           case HYBRID:

             if (nn < 8193)

               plan_flags |= FFTW_MEASURE;

             else

               {

                 plan_flags |= FFTW_ESTIMATE;

                 plan_destroys_in = false;

               }

             break;

           }


         if (ioalign)

           plan_flags &= ~FFTW_UNALIGNED;

         else

           plan_flags |= FFTW_UNALIGNED;


         if (*cur_plan_p)

           fftwf_destroy_plan (*cur_plan_p);


         if (plan_destroys_in)

           {

             // Create matrix with the same size and 16-byte alignment as input

             OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);

             itmp = reinterpret_cast<FloatComplex *>

                    (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));


             *cur_plan_p

               = fftwf_plan_many_dft (rank, tmp, howmany,

                                      reinterpret_cast<fftwf_complex *> (itmp),

                                      nullptr, stride, dist,

                                      reinterpret_cast<fftwf_complex *> (out),

                                      nullptr, stride, dist, dir, plan_flags);

           }

         else

           {

             *cur_plan_p

               = fftwf_plan_many_dft (rank, tmp, howmany,

                                      reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),

                                      nullptr, stride, dist,

                                      reinterpret_cast<fftwf_complex *> (out),

                                      nullptr, stride, dist, dir, plan_flags);

           }


         if (*cur_plan_p == nullptr)

           (*current_liboctave_error_handler) ("Error creating fftw plan");

       }


     return *cur_plan_p;

   }


   void *

   float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,

                                       octave_idx_type howmany,

                                       octave_idx_type stride,

                                       octave_idx_type dist,

                                       const float *in, FloatComplex *out)

   {

     fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan);

     bool create_new_plan = false;

     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);


     // Don't create a new plan if we have a non SIMD plan already but

     // can do SIMD.  This prevents endlessly recreating plans if we

     // change the alignment.


     if (rplan == nullptr || rd != dist || rs != stride || rr != rank

         || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))

       create_new_plan = true;

     else

       {

         // We still might not have the same shape of array.


         for (int i = 0; i < rank; i++)

           if (dims(i) != rn(i))

             {

               create_new_plan = true;

               break;

             }

       }


     if (create_new_plan)

       {

         rd = dist;

         rs = stride;

         rr = rank;

         rh = howmany;

         rsimd_align = ioalign;

         rn = dims;


         // Note reversal of dimensions for column major storage in FFTW.

         octave_idx_type nn = 1;

         OCTAVE_LOCAL_BUFFER (int, tmp, rank);


         for (int i = 0, j = rank-1; i < rank; i++, j--)

           {

             tmp[i] = dims(j);

             nn *= dims(j);

           }


         int plan_flags = 0;

         bool plan_destroys_in = true;


         switch (meth)

           {

           case UNKNOWN:

           case ESTIMATE:

             plan_flags |= FFTW_ESTIMATE;

             plan_destroys_in = false;

             break;

           case MEASURE:

             plan_flags |= FFTW_MEASURE;

             break;

           case PATIENT:

             plan_flags |= FFTW_PATIENT;

             break;

           case EXHAUSTIVE:

             plan_flags |= FFTW_EXHAUSTIVE;

             break;

           case HYBRID:

             if (nn < 8193)

               plan_flags |= FFTW_MEASURE;

             else

               {

                 plan_flags |= FFTW_ESTIMATE;

                 plan_destroys_in = false;

               }

             break;

           }


         if (ioalign)

           plan_flags &= ~FFTW_UNALIGNED;

         else

           plan_flags |= FFTW_UNALIGNED;


         if (*cur_plan_p)

           fftwf_destroy_plan (*cur_plan_p);


         if (plan_destroys_in)

           {

             // Create matrix with the same size and 16-byte alignment as input

             OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);

             itmp = reinterpret_cast<float *>

                    (((reinterpret_cast<ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));


             *cur_plan_p

               = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,

                                          nullptr, stride, dist,

                                          reinterpret_cast<fftwf_complex *> (out),

                                          nullptr, stride, dist, plan_flags);

           }

         else

           {

             *cur_plan_p

               = fftwf_plan_many_dft_r2c (rank, tmp, howmany,

                                          (const_cast<float *> (in)),

                                          nullptr, stride, dist,

                                          reinterpret_cast<fftwf_complex *> (out),

                                          nullptr, stride, dist, plan_flags);

           }


         if (*cur_plan_p == nullptr)

           (*current_liboctave_error_handler) ("Error creating fftw plan");

       }


     return *cur_plan_p;

   }


   float_fftw_planner::FftwMethod

   float_fftw_planner::do_method (void)

   {

     return meth;

   }


   float_fftw_planner::FftwMethod

   float_fftw_planner::do_method (FftwMethod _meth)

   {

     FftwMethod ret = meth;

     if (_meth == ESTIMATE || _meth == MEASURE

         || _meth == PATIENT || _meth == EXHAUSTIVE

         || _meth == HYBRID)

       {

         if (meth != _meth)

           {

             meth = _meth;

             if (rplan)

               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan));

             if (plan[0])

               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0]));

             if (plan[1])

               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1]));

             rplan = plan[0] = plan[1] = nullptr;

           }

       }

     else

       ret = UNKNOWN;

     return ret;

   }


   template <typename T>

   static inline void

   convert_packcomplex_1d (T *out, size_t nr, size_t nc,

                           octave_idx_type stride, octave_idx_type dist)

   {

     octave_quit ();


     // Fill in the missing data.


     for (size_t i = 0; i < nr; i++)

       for (size_t j = nc/2+1; j < nc; j++)

         out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);


     octave_quit ();

   }


   template <typename T>

   static inline void

   convert_packcomplex_Nd (T *out, const dim_vector& dv)

   {

     size_t nc = dv(0);

     size_t nr = dv(1);

     size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);

     size_t nrp = nr * np;

     T *ptr1, *ptr2;


     octave_quit ();


     // Create space for the missing elements.


     for (size_t i = 0; i < nrp; i++)

       {

         ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);

         ptr2 = out + i * nc;

         for (size_t j = 0; j < nc/2+1; j++)

           *ptr2++ = *ptr1++;

       }


     octave_quit ();


     // Fill in the missing data for the rank = 2 case directly for speed.


     for (size_t i = 0; i < np; i++)

       {

         for (size_t j = 1; j < nr; j++)

           for (size_t k = nc/2+1; k < nc; k++)

             out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);


         for (size_t j = nc/2+1; j < nc; j++)

           out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);

       }


     octave_quit ();


     // Now do the permutations needed for rank > 2 cases.


     size_t jstart = dv(0) * dv(1);

     size_t kstep = dv(0);

     size_t nel = dv.numel ();


     for (int inner = 2; inner < dv.ndims (); inner++)

       {

         size_t jmax = jstart * dv(inner);

         for (size_t i = 0; i < nel; i+=jmax)

           for (size_t j = jstart, jj = jmax-jstart; j < jj;

                j+=jstart, jj-=jstart)

             for (size_t k = 0; k < jstart; k+= kstep)

               for (size_t l = nc/2+1; l < nc; l++)

                 {

                   T tmp = out[i+ j + k + l];

                   out[i + j + k + l] = out[i + jj + k + l];

                   out[i + jj + k + l] = tmp;

                 }

         jstart = jmax;

       }


     octave_quit ();

   }


   int

   fftw::fft (const double *in, Complex *out, size_t npts,

              size_t nsamples, octave_idx_type stride, octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = fftw_planner::create_plan (1, dv, nsamples,

                                              stride, dist, in, out);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),

                           reinterpret_cast<fftw_complex *> (out));


     // Need to create other half of the transform.


     convert_packcomplex_1d (out, nsamples, npts, stride, dist);


     return 0;

   }


   int

   fftw::fft (const Complex *in, Complex *out, size_t npts,

              size_t nsamples, octave_idx_type stride, octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,

                                              nsamples, stride,

                                              dist, in, out);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft (plan,

                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                       reinterpret_cast<fftw_complex *> (out));


     return 0;

   }


   int

   fftw::ifft (const Complex *in, Complex *out, size_t npts,

               size_t nsamples, octave_idx_type stride,

               octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,

                                              stride, dist, in, out);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft (plan,

                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                       reinterpret_cast<fftw_complex *> (out));


     const Complex scale = npts;

     for (size_t j = 0; j < nsamples; j++)

       for (size_t i = 0; i < npts; i++)

         out[i*stride + j*dist] /= scale;


     return 0;

   }


   int

   fftw::fftNd (const double *in, Complex *out, const int rank,

                const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     // Fool with the position of the start of the output matrix, so that

     // creating other half of the matrix won't cause cache problems.


     octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);


     void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,

                                              in, out + offset);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),

                           reinterpret_cast<fftw_complex *> (out+ offset));


     // Need to create other half of the transform.


     convert_packcomplex_Nd (out, dv);


     return 0;

   }


   int

   fftw::fftNd (const Complex *in, Complex *out, const int rank,

                const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,

                                              1, 1, dist, in, out);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft (plan,

                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                       reinterpret_cast<fftw_complex *> (out));


     return 0;

   }


   int

   fftw::ifftNd (const Complex *in, Complex *out, const int rank,

                 const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,

                                              1, 1, dist, in, out);

     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);


     fftw_execute_dft (plan,

                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                       reinterpret_cast<fftw_complex *> (out));


     const size_t npts = dv.numel ();

     const Complex scale = npts;

     for (size_t i = 0; i < npts; i++)

       out[i] /= scale;


     return 0;

   }


   int

   fftw::fft (const float *in, FloatComplex *out, size_t npts,

              size_t nsamples, octave_idx_type stride, octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,

                                                    dist, in, out);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),

                            reinterpret_cast<fftwf_complex *> (out));


     // Need to create other half of the transform.


     convert_packcomplex_1d (out, nsamples, npts, stride, dist);


     return 0;

   }


   int

   fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,

              size_t nsamples, octave_idx_type stride, octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,

                                                    nsamples, stride, dist,

                                                    in, out);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft (plan,

                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                        reinterpret_cast<fftwf_complex *> (out));


     return 0;

   }


   int

   fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,

               size_t nsamples, octave_idx_type stride,

               octave_idx_type dist)

   {

     dist = (dist < 0 ? npts : dist);


     dim_vector dv (npts, 1);

     void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,

                                                    nsamples, stride, dist,

                                                    in, out);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft (plan,

                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                        reinterpret_cast<fftwf_complex *> (out));


     const FloatComplex scale = npts;

     for (size_t j = 0; j < nsamples; j++)

       for (size_t i = 0; i < npts; i++)

         out[i*stride + j*dist] /= scale;


     return 0;

   }


   int

   fftw::fftNd (const float *in, FloatComplex *out, const int rank,

                const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     // Fool with the position of the start of the output matrix, so that

     // creating other half of the matrix won't cause cache problems.


     octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);


     void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,

                                                    in, out + offset);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),

                            reinterpret_cast<fftwf_complex *> (out+ offset));


     // Need to create other half of the transform.


     convert_packcomplex_Nd (out, dv);


     return 0;

   }


   int

   fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,

                const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,

                                                    1, 1, dist, in, out);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft (plan,

                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                        reinterpret_cast<fftwf_complex *> (out));


     return 0;

   }


   int

   fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,

                 const dim_vector& dv)

   {

     octave_idx_type dist = 1;

     for (int i = 0; i < rank; i++)

       dist *= dv(i);


     void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,

                                                    1, 1, dist, in, out);

     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);


     fftwf_execute_dft (plan,

                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                        reinterpret_cast<fftwf_complex *> (out));


     const size_t npts = dv.numel ();

     const FloatComplex scale = npts;

     for (size_t i = 0; i < npts; i++)

       out[i] /= scale;


     return 0;

   }


 #endif


   std::string

   fftw_version (void)

   {

 #if defined (HAVE_FFTW)

     return ::fftw_version;

 #else

     return "none";

 #endif

   }


   std::string

   fftwf_version (void)

   {

 #if defined (HAVE_FFTW)

     return ::fftwf_version;

 #else

     return "none";

 #endif

   }

 }

conj
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217

nn
static F77_INT nn
Definition: DASPK.cc:66

dim_vector
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:95

dim_vector::numel
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:401

dim_vector::ndims
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:334

octave::fftw_planner::r
int r[2]
Definition: oct-fftw.h:149

octave::fftw_planner::plan
void * plan[2]
Definition: oct-fftw.h:140

octave::fftw_planner::rsimd_align
bool rsimd_align
Definition: oct-fftw.h:178

octave::fftw_planner::instance
static fftw_planner * instance
Definition: oct-fftw.h:116

octave::fftw_planner::n
dim_vector n[2]
Definition: oct-fftw.h:155

octave::fftw_planner::simd_align
bool simd_align[2]
Definition: oct-fftw.h:157

octave::fftw_planner::~fftw_planner
~fftw_planner(void)
Definition: oct-fftw.cc:93

octave::fftw_planner::rr
int rr
Definition: oct-fftw.h:170

octave::fftw_planner::d
octave_idx_type d[2]
Definition: oct-fftw.h:143

octave::fftw_planner::rn
dim_vector rn
Definition: oct-fftw.h:176

octave::fftw_planner::rh
octave_idx_type rh
Definition: oct-fftw.h:173

octave::fftw_planner::do_method
FftwMethod do_method(void)
Definition: oct-fftw.cc:390

octave::fftw_planner::create_plan
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:71

octave::fftw_planner::threads
static int threads(void)
Definition: oct-fftw.h:109

octave::fftw_planner::h
octave_idx_type h[2]
Definition: oct-fftw.h:152

octave::fftw_planner::s
octave_idx_type s[2]
Definition: oct-fftw.h:146

octave::fftw_planner::inplace
bool inplace[2]
Definition: oct-fftw.h:158

octave::fftw_planner::fftw_planner
fftw_planner(void)
Definition: oct-fftw.cc:68

octave::fftw_planner::rplan
void * rplan
Definition: oct-fftw.h:161

octave::fftw_planner::rd
octave_idx_type rd
Definition: oct-fftw.h:164

octave::fftw_planner::rs
octave_idx_type rs
Definition: oct-fftw.h:167

octave::fftw_planner::instance_ok
static bool instance_ok(void)
Definition: oct-fftw.cc:111

octave::fftw_planner::FftwMethod
FftwMethod
Definition: oct-fftw.h:59

octave::fftw_planner::PATIENT
@ PATIENT
Definition: oct-fftw.h:63

octave::fftw_planner::MEASURE
@ MEASURE
Definition: oct-fftw.h:62

octave::fftw_planner::UNKNOWN
@ UNKNOWN
Definition: oct-fftw.h:60

octave::fftw_planner::ESTIMATE
@ ESTIMATE
Definition: oct-fftw.h:61

octave::fftw_planner::EXHAUSTIVE
@ EXHAUSTIVE
Definition: oct-fftw.h:64

octave::fftw_planner::HYBRID
@ HYBRID
Definition: oct-fftw.h:65

octave::fftw_planner::cleanup_instance
static void cleanup_instance(void)
Definition: oct-fftw.h:118

octave::fftw_planner::nthreads
int nthreads
Definition: oct-fftw.h:182

octave::fftw_planner::do_create_plan
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:147

octave::fftw_planner::meth
FftwMethod meth
Definition: oct-fftw.h:135

octave::fftw::fft
static int fft(const double *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:852

octave::fftw::fftNd
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:916

octave::fftw::ifftNd
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:962

octave::fftw::ifft
static int ifft(const Complex *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:892

octave::float_fftw_planner
Definition: oct-fftw.h:188

octave::float_fftw_planner::inplace
bool inplace[2]
Definition: oct-fftw.h:304

octave::float_fftw_planner::plan
void * plan[2]
Definition: oct-fftw.h:286

octave::float_fftw_planner::cleanup_instance
static void cleanup_instance(void)
Definition: oct-fftw.h:264

octave::float_fftw_planner::s
octave_idx_type s[2]
Definition: oct-fftw.h:292

octave::float_fftw_planner::do_create_plan
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:498

octave::float_fftw_planner::r
int r[2]
Definition: oct-fftw.h:295

octave::float_fftw_planner::float_fftw_planner
float_fftw_planner(void)
Definition: oct-fftw.cc:422

octave::float_fftw_planner::rr
int rr
Definition: oct-fftw.h:316

octave::float_fftw_planner::instance_ok
static bool instance_ok(void)
Definition: oct-fftw.cc:465

octave::float_fftw_planner::rd
octave_idx_type rd
Definition: oct-fftw.h:310

octave::float_fftw_planner::rplan
void * rplan
Definition: oct-fftw.h:307

octave::float_fftw_planner::do_method
FftwMethod do_method(void)
Definition: oct-fftw.cc:742

octave::float_fftw_planner::rn
dim_vector rn
Definition: oct-fftw.h:322

octave::float_fftw_planner::rsimd_align
bool rsimd_align
Definition: oct-fftw.h:324

octave::float_fftw_planner::nthreads
int nthreads
Definition: oct-fftw.h:328

octave::float_fftw_planner::meth
FftwMethod meth
Definition: oct-fftw.h:281

octave::float_fftw_planner::threads
static int threads(void)
Definition: oct-fftw.h:255

octave::float_fftw_planner::~float_fftw_planner
~float_fftw_planner(void)
Definition: oct-fftw.cc:447

octave::float_fftw_planner::n
dim_vector n[2]
Definition: oct-fftw.h:301

octave::float_fftw_planner::simd_align
bool simd_align[2]
Definition: oct-fftw.h:303

octave::float_fftw_planner::rs
octave_idx_type rs
Definition: oct-fftw.h:313

octave::float_fftw_planner::d
octave_idx_type d[2]
Definition: oct-fftw.h:289

octave::float_fftw_planner::create_plan
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:217

octave::float_fftw_planner::FftwMethod
FftwMethod
Definition: oct-fftw.h:205

octave::float_fftw_planner::ESTIMATE
@ ESTIMATE
Definition: oct-fftw.h:207

octave::float_fftw_planner::MEASURE
@ MEASURE
Definition: oct-fftw.h:208

octave::float_fftw_planner::EXHAUSTIVE
@ EXHAUSTIVE
Definition: oct-fftw.h:210

octave::float_fftw_planner::HYBRID
@ HYBRID
Definition: oct-fftw.h:211

octave::float_fftw_planner::UNKNOWN
@ UNKNOWN
Definition: oct-fftw.h:206

octave::float_fftw_planner::PATIENT
@ PATIENT
Definition: oct-fftw.h:209

octave::float_fftw_planner::rh
octave_idx_type rh
Definition: oct-fftw.h:319

octave::float_fftw_planner::h
octave_idx_type h[2]
Definition: oct-fftw.h:298

octave::float_fftw_planner::instance
static float_fftw_planner * instance
Definition: oct-fftw.h:262

octave_idx_type

singleton_cleanup_list::add
static void add(fptr f)
Definition: singleton-cleanup.h:53

scale
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5846

lo-error.h

octave
Definition: aepbalance.cc:41

octave::convert_packcomplex_Nd
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:790

octave::fftw_version
std::string fftw_version(void)
Definition: oct-fftw.cc:1123

octave::fftwf_version
std::string fftwf_version(void)
Definition: oct-fftw.cc:1133

octave::convert_packcomplex_1d
static void convert_packcomplex_1d(T *out, size_t nr, size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:774

octave_num_processors_wrapper
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40

nproc-wrapper.h

OCTAVE_NPROC_CURRENT_OVERRIDABLE
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37

Complex
std::complex< double > Complex
Definition: oct-cmplx.h:33

FloatComplex
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34

CHECK_SIMD_ALIGNMENT
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:143

oct-fftw.h

oct-locbuf.h

OCTAVE_LOCAL_BUFFER
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44

retval
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811

quit.h

singleton-cleanup.h