dd/d16/oct-fftw_8cc_source.html

////////////////////////////////////////////////////////////////////////

//

// Copyright (C) 2001-2022 The Octave Project Developers

//

// See the file COPYRIGHT.md in the top-level directory of this

// distribution or <https://octave.org/copyright/>.

//

// This file is part of Octave.

//

// Octave is free software: you can redistribute it and/or modify it

// under the terms of the GNU General Public License as published by

// the Free Software Foundation, either version 3 of the License, or

// (at your option) any later version.

//

// Octave is distributed in the hope that it will be useful, but

// WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

// GNU General Public License for more details.

//

// You should have received a copy of the GNU General Public License

// along with Octave; see the file COPYING.  If not, see

// <https://www.gnu.org/licenses/>.

//

////////////////////////////////////////////////////////////////////////


#if defined (HAVE_CONFIG_H)

#  include "config.h"

#endif


#if defined (HAVE_FFTW3_H)

#  include <fftw3.h>

#endif


#include "lo-error.h"

#include "oct-fftw.h"

#include "oct-locbuf.h"

#include "quit.h"

#include "singleton-cleanup.h"


#if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)

#  include "nproc-wrapper.h"

#endif


namespace octave

{

#if defined (HAVE_FFTW)


  fftw_planner *fftw_planner::s_instance = nullptr;


  // Helper class to create and cache FFTW plans for both 1D and

  // 2D.  This implementation defaults to using FFTW_ESTIMATE to create

  // the plans, which in theory is suboptimal, but provides quite

  // reasonable performance in practice.


  // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3

  // will destroy the input and output arrays.  We must, therefore, create a

  // temporary input array with the same size and 16-byte alignment as

  // the original array when using a different planner strategy.

  // Note that we also use any wisdom that is available, either in a

  // FFTW3 system wide file or as supplied by the user.


  // FIXME: if we can ensure 16 byte alignment in Array<T>

  // (<T> *data) the FFTW3 can use SIMD instructions for further

  // acceleration.


  // Note that it is profitable to store the FFTW3 plans, for small FFTs.


  fftw_planner::fftw_planner (void)

    : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),

      m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)

  {

    m_plan[0] = m_plan[1] = nullptr;

    m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;

    m_simd_align[0] = m_simd_align[1] = false;

    m_inplace[0] = m_inplace[1] = false;

    m_n[0] = m_n[1] = dim_vector ();


#if defined (HAVE_FFTW3_THREADS)

    int init_ret = fftw_init_threads ();

    if (! init_ret)

      (*current_liboctave_error_handler) ("Error initializing FFTW threads");


    // Use number of processors available to the current process

    // This can be later changed with fftw ("threads", nthreads).

    m_nthreads =

      octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);


    fftw_plan_with_nthreads (m_nthreads);

#endif


    // If we have a system wide wisdom file, import it.

    fftw_import_system_wisdom ();

  }


  fftw_planner::~fftw_planner (void)

  {

    fftw_plan *plan_p;


    plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);

    if (*plan_p)

      fftw_destroy_plan (*plan_p);


    plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]);

    if (*plan_p)

      fftw_destroy_plan (*plan_p);


    plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]);

    if (*plan_p)

      fftw_destroy_plan (*plan_p);

  }


  bool

  fftw_planner::instance_ok (void)

  {

    bool retval = true;


    if (! s_instance)

      {

        s_instance = new fftw_planner ();

        singleton_cleanup_list::add (cleanup_instance);

      }


    return retval;

  }


  void

  fftw_planner::threads (int nt)

  {

#if defined (HAVE_FFTW3_THREADS)

    if (instance_ok () && nt != threads ())

      {

        s_instance->m_nthreads = nt;

        fftw_plan_with_nthreads (nt);

        // Clear the current plans.

        s_instance->m_rplan = nullptr;

        s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;

      }

#else

    octave_unused_parameter (nt);


    (*current_liboctave_warning_handler)

      ("unable to change number of threads without FFTW thread support");

#endif

  }


#define CHECK_SIMD_ALIGNMENT(x)                         \

  (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)


  void *

  fftw_planner::do_create_plan (int dir, const int rank,

                                const dim_vector& dims,

                                octave_idx_type howmany,

                                octave_idx_type stride,

                                octave_idx_type dist,

                                const Complex *in, Complex *out)

  {

    int which = (dir == FFTW_FORWARD) ? 0 : 1;

    fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]);

    bool create_new_plan = false;

    bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);

    bool ioinplace = (in == out);


    // Don't create a new plan if we have a non SIMD plan already but

    // can do SIMD.  This prevents endlessly recreating plans if we

    // change the alignment.


    if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride

        || m_r[which] != rank || m_h[which] != howmany

        || ioinplace != m_inplace[which]

        || ((ioalign != m_simd_align[which]) ? ! ioalign : false))

      create_new_plan = true;

    else

      {

        // We still might not have the same shape of array.


        for (int i = 0; i < rank; i++)

          if (dims(i) != m_n[which](i))

            {

              create_new_plan = true;

              break;

            }

      }


    if (create_new_plan)

      {

        m_d[which] = dist;

        m_s[which] = stride;

        m_r[which] = rank;

        m_h[which] = howmany;

        m_simd_align[which] = ioalign;

        m_inplace[which] = ioinplace;

        m_n[which] = dims;


        // Note reversal of dimensions for column major storage in FFTW.

        octave_idx_type nn = 1;

        OCTAVE_LOCAL_BUFFER (int, tmp, rank);


        for (int i = 0, j = rank-1; i < rank; i++, j--)

          {

            tmp[i] = dims(j);

            nn *= dims(j);

          }


        int plan_flags = 0;

        bool plan_destroys_in = true;


        switch (m_meth)

          {

          case UNKNOWN:

          case ESTIMATE:

            plan_flags |= FFTW_ESTIMATE;

            plan_destroys_in = false;

            break;

          case MEASURE:

            plan_flags |= FFTW_MEASURE;

            break;

          case PATIENT:

            plan_flags |= FFTW_PATIENT;

            break;

          case EXHAUSTIVE:

            plan_flags |= FFTW_EXHAUSTIVE;

            break;

          case HYBRID:

            if (nn < 8193)

              plan_flags |= FFTW_MEASURE;

            else

              {

                plan_flags |= FFTW_ESTIMATE;

                plan_destroys_in = false;

              }

            break;

          }


        if (ioalign)

          plan_flags &= ~FFTW_UNALIGNED;

        else

          plan_flags |= FFTW_UNALIGNED;


        if (*cur_plan_p)

          fftw_destroy_plan (*cur_plan_p);


        if (plan_destroys_in)

          {

            // Create matrix with the same size and 16-byte alignment as input

            OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);

            itmp = reinterpret_cast<Complex *>

                   (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                    ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));


            *cur_plan_p

              = fftw_plan_many_dft (rank, tmp, howmany,

                                    reinterpret_cast<fftw_complex *> (itmp),

                                    nullptr, stride, dist,

                                    reinterpret_cast<fftw_complex *> (out),

                                    nullptr, stride, dist, dir, plan_flags);

          }

        else

          {

            *cur_plan_p

              = fftw_plan_many_dft (rank, tmp, howmany,

                                    reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),

                                    nullptr, stride, dist,

                                    reinterpret_cast<fftw_complex *> (out),

                                    nullptr, stride, dist, dir, plan_flags);

          }


        if (*cur_plan_p == nullptr)

          (*current_liboctave_error_handler) ("Error creating FFTW plan");

      }


    return *cur_plan_p;

  }


  void *

  fftw_planner::do_create_plan (const int rank, const dim_vector& dims,

                                octave_idx_type howmany,

                                octave_idx_type stride,

                                octave_idx_type dist,

                                const double *in, Complex *out)

  {

    fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan);

    bool create_new_plan = false;

    bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);


    // Don't create a new plan if we have a non SIMD plan already but

    // can do SIMD.  This prevents endlessly recreating plans if we

    // change the alignment.


    if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank

        || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))

      create_new_plan = true;

    else

      {

        // We still might not have the same shape of array.


        for (int i = 0; i < rank; i++)

          if (dims(i) != m_rn(i))

            {

              create_new_plan = true;

              break;

            }

      }


    if (create_new_plan)

      {

        m_rd = dist;

        m_rs = stride;

        m_rr = rank;

        m_rh = howmany;

        m_rsimd_align = ioalign;

        m_rn = dims;


        // Note reversal of dimensions for column major storage in FFTW.

        octave_idx_type nn = 1;

        OCTAVE_LOCAL_BUFFER (int, tmp, rank);


        for (int i = 0, j = rank-1; i < rank; i++, j--)

          {

            tmp[i] = dims(j);

            nn *= dims(j);

          }


        int plan_flags = 0;

        bool plan_destroys_in = true;


        switch (m_meth)

          {

          case UNKNOWN:

          case ESTIMATE:

            plan_flags |= FFTW_ESTIMATE;

            plan_destroys_in = false;

            break;

          case MEASURE:

            plan_flags |= FFTW_MEASURE;

            break;

          case PATIENT:

            plan_flags |= FFTW_PATIENT;

            break;

          case EXHAUSTIVE:

            plan_flags |= FFTW_EXHAUSTIVE;

            break;

          case HYBRID:

            if (nn < 8193)

              plan_flags |= FFTW_MEASURE;

            else

              {

                plan_flags |= FFTW_ESTIMATE;

                plan_destroys_in = false;

              }

            break;

          }


        if (ioalign)

          plan_flags &= ~FFTW_UNALIGNED;

        else

          plan_flags |= FFTW_UNALIGNED;


        if (*cur_plan_p)

          fftw_destroy_plan (*cur_plan_p);


        if (plan_destroys_in)

          {

            // Create matrix with the same size and 16-byte alignment as input

            OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);

            itmp = reinterpret_cast<double *>

                   (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                    ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));


            *cur_plan_p

              = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,

                                        nullptr, stride, dist,

                                        reinterpret_cast<fftw_complex *> (out),

                                        nullptr, stride, dist, plan_flags);

          }

        else

          {

            *cur_plan_p

              = fftw_plan_many_dft_r2c (rank, tmp, howmany,

                                        (const_cast<double *> (in)),

                                        nullptr, stride, dist,

                                        reinterpret_cast<fftw_complex *> (out),

                                        nullptr, stride, dist, plan_flags);

          }


        if (*cur_plan_p == nullptr)

          (*current_liboctave_error_handler) ("Error creating FFTW plan");

      }


    return *cur_plan_p;

  }


  fftw_planner::FftwMethod

  fftw_planner::do_method (void)

  {

    return m_meth;

  }


  fftw_planner::FftwMethod

  fftw_planner::do_method (FftwMethod _meth)

  {

    FftwMethod ret = m_meth;

    if (_meth == ESTIMATE || _meth == MEASURE

        || _meth == PATIENT || _meth == EXHAUSTIVE

        || _meth == HYBRID)

      {

        if (m_meth != _meth)

          {

            m_meth = _meth;

            if (m_rplan)

              fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan));

            if (m_plan[0])

              fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0]));

            if (m_plan[1])

              fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1]));

            m_rplan = m_plan[0] = m_plan[1] = nullptr;

          }

      }

    else

      ret = UNKNOWN;

    return ret;

  }


  float_fftw_planner *float_fftw_planner::s_instance = nullptr;


  float_fftw_planner::float_fftw_planner (void)

    : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0),

      m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1)

  {

    m_plan[0] = m_plan[1] = nullptr;

    m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0;

    m_simd_align[0] = m_simd_align[1] = false;

    m_inplace[0] = m_inplace[1] = false;

    m_n[0] = m_n[1] = dim_vector ();


#if defined (HAVE_FFTW3F_THREADS)

    int init_ret = fftwf_init_threads ();

    if (! init_ret)

      (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");


    // Use number of processors available to the current process

    // This can be later changed with fftw ("threads", nthreads).

    m_nthreads =

      octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);


    fftwf_plan_with_nthreads (m_nthreads);

#endif


    // If we have a system wide wisdom file, import it.

    fftwf_import_system_wisdom ();

  }


  float_fftw_planner::~float_fftw_planner (void)

  {

    fftwf_plan *plan_p;


    plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);

    if (*plan_p)

      fftwf_destroy_plan (*plan_p);


    plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]);

    if (*plan_p)

      fftwf_destroy_plan (*plan_p);


    plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]);

    if (*plan_p)

      fftwf_destroy_plan (*plan_p);

  }


  bool

  float_fftw_planner::instance_ok (void)

  {

    bool retval = true;


    if (! s_instance)

      {

        s_instance = new float_fftw_planner ();

        singleton_cleanup_list::add (cleanup_instance);

      }


    return retval;

  }


  void

  float_fftw_planner::threads (int nt)

  {

#if defined (HAVE_FFTW3F_THREADS)

    if (instance_ok () && nt != threads ())

      {

        s_instance->m_nthreads = nt;

        fftwf_plan_with_nthreads (nt);

        // Clear the current plans.

        s_instance->m_rplan = nullptr;

        s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr;

      }

#else

    octave_unused_parameter (nt);


    (*current_liboctave_warning_handler)

      ("unable to change number of threads without FFTW thread support");

#endif

  }


  void *

  float_fftw_planner::do_create_plan (int dir, const int rank,

                                      const dim_vector& dims,

                                      octave_idx_type howmany,

                                      octave_idx_type stride,

                                      octave_idx_type dist,

                                      const FloatComplex *in,

                                      FloatComplex *out)

  {

    int which = (dir == FFTW_FORWARD) ? 0 : 1;

    fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]);

    bool create_new_plan = false;

    bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);

    bool ioinplace = (in == out);


    // Don't create a new plan if we have a non SIMD plan already but

    // can do SIMD.  This prevents endlessly recreating plans if we

    // change the alignment.


    if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride

        || m_r[which] != rank || m_h[which] != howmany

        || ioinplace != m_inplace[which]

        || ((ioalign != m_simd_align[which]) ? ! ioalign : false))

      create_new_plan = true;

    else

      {

        // We still might not have the same shape of array.


        for (int i = 0; i < rank; i++)

          if (dims(i) != m_n[which](i))

            {

              create_new_plan = true;

              break;

            }

      }


    if (create_new_plan)

      {

        m_d[which] = dist;

        m_s[which] = stride;

        m_r[which] = rank;

        m_h[which] = howmany;

        m_simd_align[which] = ioalign;

        m_inplace[which] = ioinplace;

        m_n[which] = dims;


        // Note reversal of dimensions for column major storage in FFTW.

        octave_idx_type nn = 1;

        OCTAVE_LOCAL_BUFFER (int, tmp, rank);


        for (int i = 0, j = rank-1; i < rank; i++, j--)

          {

            tmp[i] = dims(j);

            nn *= dims(j);

          }


        int plan_flags = 0;

        bool plan_destroys_in = true;


        switch (m_meth)

          {

          case UNKNOWN:

          case ESTIMATE:

            plan_flags |= FFTW_ESTIMATE;

            plan_destroys_in = false;

            break;

          case MEASURE:

            plan_flags |= FFTW_MEASURE;

            break;

          case PATIENT:

            plan_flags |= FFTW_PATIENT;

            break;

          case EXHAUSTIVE:

            plan_flags |= FFTW_EXHAUSTIVE;

            break;

          case HYBRID:

            if (nn < 8193)

              plan_flags |= FFTW_MEASURE;

            else

              {

                plan_flags |= FFTW_ESTIMATE;

                plan_destroys_in = false;

              }

            break;

          }


        if (ioalign)

          plan_flags &= ~FFTW_UNALIGNED;

        else

          plan_flags |= FFTW_UNALIGNED;


        if (*cur_plan_p)

          fftwf_destroy_plan (*cur_plan_p);


        if (plan_destroys_in)

          {

            // Create matrix with the same size and 16-byte alignment as input

            OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);

            itmp = reinterpret_cast<FloatComplex *>

                   (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                    ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));


            *cur_plan_p

              = fftwf_plan_many_dft (rank, tmp, howmany,

                                     reinterpret_cast<fftwf_complex *> (itmp),

                                     nullptr, stride, dist,

                                     reinterpret_cast<fftwf_complex *> (out),

                                     nullptr, stride, dist, dir, plan_flags);

          }

        else

          {

            *cur_plan_p

              = fftwf_plan_many_dft (rank, tmp, howmany,

                                     reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),

                                     nullptr, stride, dist,

                                     reinterpret_cast<fftwf_complex *> (out),

                                     nullptr, stride, dist, dir, plan_flags);

          }


        if (*cur_plan_p == nullptr)

          (*current_liboctave_error_handler) ("Error creating FFTW plan");

      }


    return *cur_plan_p;

  }


  void *

  float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,

                                      octave_idx_type howmany,

                                      octave_idx_type stride,

                                      octave_idx_type dist,

                                      const float *in, FloatComplex *out)

  {

    fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan);

    bool create_new_plan = false;

    bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);


    // Don't create a new plan if we have a non SIMD plan already but

    // can do SIMD.  This prevents endlessly recreating plans if we

    // change the alignment.


    if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank

        || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false))

      create_new_plan = true;

    else

      {

        // We still might not have the same shape of array.


        for (int i = 0; i < rank; i++)

          if (dims(i) != m_rn(i))

            {

              create_new_plan = true;

              break;

            }

      }


    if (create_new_plan)

      {

        m_rd = dist;

        m_rs = stride;

        m_rr = rank;

        m_rh = howmany;

        m_rsimd_align = ioalign;

        m_rn = dims;


        // Note reversal of dimensions for column major storage in FFTW.

        octave_idx_type nn = 1;

        OCTAVE_LOCAL_BUFFER (int, tmp, rank);


        for (int i = 0, j = rank-1; i < rank; i++, j--)

          {

            tmp[i] = dims(j);

            nn *= dims(j);

          }


        int plan_flags = 0;

        bool plan_destroys_in = true;


        switch (m_meth)

          {

          case UNKNOWN:

          case ESTIMATE:

            plan_flags |= FFTW_ESTIMATE;

            plan_destroys_in = false;

            break;

          case MEASURE:

            plan_flags |= FFTW_MEASURE;

            break;

          case PATIENT:

            plan_flags |= FFTW_PATIENT;

            break;

          case EXHAUSTIVE:

            plan_flags |= FFTW_EXHAUSTIVE;

            break;

          case HYBRID:

            if (nn < 8193)

              plan_flags |= FFTW_MEASURE;

            else

              {

                plan_flags |= FFTW_ESTIMATE;

                plan_destroys_in = false;

              }

            break;

          }


        if (ioalign)

          plan_flags &= ~FFTW_UNALIGNED;

        else

          plan_flags |= FFTW_UNALIGNED;


        if (*cur_plan_p)

          fftwf_destroy_plan (*cur_plan_p);


        if (plan_destroys_in)

          {

            // Create matrix with the same size and 16-byte alignment as input

            OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);

            itmp = reinterpret_cast<float *>

                   (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +

                    ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));


            *cur_plan_p

              = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,

                                         nullptr, stride, dist,

                                         reinterpret_cast<fftwf_complex *> (out),

                                         nullptr, stride, dist, plan_flags);

          }

        else

          {

            *cur_plan_p

              = fftwf_plan_many_dft_r2c (rank, tmp, howmany,

                                         (const_cast<float *> (in)),

                                         nullptr, stride, dist,

                                         reinterpret_cast<fftwf_complex *> (out),

                                         nullptr, stride, dist, plan_flags);

          }


        if (*cur_plan_p == nullptr)

          (*current_liboctave_error_handler) ("Error creating FFTW plan");

      }


    return *cur_plan_p;

  }


  float_fftw_planner::FftwMethod

  float_fftw_planner::do_method (void)

  {

    return m_meth;

  }


  float_fftw_planner::FftwMethod

  float_fftw_planner::do_method (FftwMethod _meth)

  {

    FftwMethod ret = m_meth;

    if (_meth == ESTIMATE || _meth == MEASURE

        || _meth == PATIENT || _meth == EXHAUSTIVE

        || _meth == HYBRID)

      {

        if (m_meth != _meth)

          {

            m_meth = _meth;

            if (m_rplan)

              fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan));

            if (m_plan[0])

              fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0]));

            if (m_plan[1])

              fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1]));

            m_rplan = m_plan[0] = m_plan[1] = nullptr;

          }

      }

    else

      ret = UNKNOWN;

    return ret;

  }


  template <typename T>

  static inline void

  convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,

                          octave_idx_type stride, octave_idx_type dist)

  {

    octave_quit ();


    // Fill in the missing data.


    for (std::size_t i = 0; i < nr; i++)

      for (std::size_t j = nc/2+1; j < nc; j++)

        out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);


    octave_quit ();

  }


  template <typename T>

  static inline void

  convert_packcomplex_Nd (T *out, const dim_vector& dv)

  {

    std::size_t nc = dv(0);

    std::size_t nr = dv(1);

    std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);

    std::size_t nrp = nr * np;

    T *ptr1, *ptr2;


    octave_quit ();


    // Create space for the missing elements.


    for (std::size_t i = 0; i < nrp; i++)

      {

        ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);

        ptr2 = out + i * nc;

        for (std::size_t j = 0; j < nc/2+1; j++)

          *ptr2++ = *ptr1++;

      }


    octave_quit ();


    // Fill in the missing data for the rank = 2 case directly for speed.


    for (std::size_t i = 0; i < np; i++)

      {

        for (std::size_t j = 1; j < nr; j++)

          for (std::size_t k = nc/2+1; k < nc; k++)

            out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);


        for (std::size_t j = nc/2+1; j < nc; j++)

          out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);

      }


    octave_quit ();


    // Now do the permutations needed for rank > 2 cases.


    std::size_t jstart = dv(0) * dv(1);

    std::size_t kstep = dv(0);

    std::size_t nel = dv.numel ();


    for (int inner = 2; inner < dv.ndims (); inner++)

      {

        std::size_t jmax = jstart * dv(inner);

        for (std::size_t i = 0; i < nel; i+=jmax)

          for (std::size_t j = jstart, jj = jmax-jstart; j < jj;

               j+=jstart, jj-=jstart)

            for (std::size_t k = 0; k < jstart; k+= kstep)

              for (std::size_t l = nc/2+1; l < nc; l++)

                {

                  T tmp = out[i+ j + k + l];

                  out[i + j + k + l] = out[i + jj + k + l];

                  out[i + jj + k + l] = tmp;

                }

        jstart = jmax;

      }


    octave_quit ();

  }


  int

  fftw::fft (const double *in, Complex *out, std::size_t npts,

             std::size_t nsamples, octave_idx_type stride,

             octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = fftw_planner::create_plan (1, dv, nsamples,

                                             stride, dist, in, out);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),

                          reinterpret_cast<fftw_complex *> (out));


    // Need to create other half of the transform.


    convert_packcomplex_1d (out, nsamples, npts, stride, dist);


    return 0;

  }


  int

  fftw::fft (const Complex *in, Complex *out, std::size_t npts,

             std::size_t nsamples, octave_idx_type stride,

             octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,

                                             nsamples, stride,

                                             dist, in, out);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft (m_plan,

                      reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                      reinterpret_cast<fftw_complex *> (out));


    return 0;

  }


  int

  fftw::ifft (const Complex *in, Complex *out, std::size_t npts,

              std::size_t nsamples, octave_idx_type stride,

              octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,

                                             stride, dist, in, out);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft (m_plan,

                      reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                      reinterpret_cast<fftw_complex *> (out));


    const Complex scale = npts;

    for (std::size_t j = 0; j < nsamples; j++)

      for (std::size_t i = 0; i < npts; i++)

        out[i*stride + j*dist] /= scale;


    return 0;

  }


  int

  fftw::fftNd (const double *in, Complex *out, const int rank,

               const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    // Fool with the position of the start of the output matrix, so that

    // creating other half of the matrix won't cause cache problems.


    octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);


    void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,

                                             in, out + offset);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)),

                          reinterpret_cast<fftw_complex *> (out+ offset));


    // Need to create other half of the transform.


    convert_packcomplex_Nd (out, dv);


    return 0;

  }


  int

  fftw::fftNd (const Complex *in, Complex *out, const int rank,

               const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,

                                             1, 1, dist, in, out);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft (m_plan,

                      reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                      reinterpret_cast<fftw_complex *> (out));


    return 0;

  }


  int

  fftw::ifftNd (const Complex *in, Complex *out, const int rank,

                const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,

                                             1, 1, dist, in, out);

    fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan);


    fftw_execute_dft (m_plan,

                      reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),

                      reinterpret_cast<fftw_complex *> (out));


    const std::size_t npts = dv.numel ();

    const Complex scale = npts;

    for (std::size_t i = 0; i < npts; i++)

      out[i] /= scale;


    return 0;

  }


  int

  fftw::fft (const float *in, FloatComplex *out, std::size_t npts,

             std::size_t nsamples, octave_idx_type stride,

             octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,

                                                   dist, in, out);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),

                           reinterpret_cast<fftwf_complex *> (out));


    // Need to create other half of the transform.


    convert_packcomplex_1d (out, nsamples, npts, stride, dist);


    return 0;

  }


  int

  fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,

             std::size_t nsamples, octave_idx_type stride,

             octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,

                                                   nsamples, stride, dist,

                                                   in, out);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft (m_plan,

                       reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                       reinterpret_cast<fftwf_complex *> (out));


    return 0;

  }


  int

  fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,

              std::size_t nsamples, octave_idx_type stride,

              octave_idx_type dist)

  {

    dist = (dist < 0 ? npts : dist);


    dim_vector dv (npts, 1);

    void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,

                                                   nsamples, stride, dist,

                                                   in, out);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft (m_plan,

                       reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                       reinterpret_cast<fftwf_complex *> (out));


    const FloatComplex scale = npts;

    for (std::size_t j = 0; j < nsamples; j++)

      for (std::size_t i = 0; i < npts; i++)

        out[i*stride + j*dist] /= scale;


    return 0;

  }


  int

  fftw::fftNd (const float *in, FloatComplex *out, const int rank,

               const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    // Fool with the position of the start of the output matrix, so that

    // creating other half of the matrix won't cause cache problems.


    octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);


    void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,

                                                   in, out + offset);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)),

                           reinterpret_cast<fftwf_complex *> (out+ offset));


    // Need to create other half of the transform.


    convert_packcomplex_Nd (out, dv);


    return 0;

  }


  int

  fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,

               const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,

                                                   1, 1, dist, in, out);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft (m_plan,

                       reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                       reinterpret_cast<fftwf_complex *> (out));


    return 0;

  }


  int

  fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,

                const dim_vector& dv)

  {

    octave_idx_type dist = 1;

    for (int i = 0; i < rank; i++)

      dist *= dv(i);


    void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,

                                                   1, 1, dist, in, out);

    fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan);


    fftwf_execute_dft (m_plan,

                       reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),

                       reinterpret_cast<fftwf_complex *> (out));


    const std::size_t npts = dv.numel ();

    const FloatComplex scale = npts;

    for (std::size_t i = 0; i < npts; i++)

      out[i] /= scale;


    return 0;

  }


#endif


  std::string

  fftw_version (void)

  {

#if defined (HAVE_FFTW)

    return ::fftw_version;

#else

    return "none";

#endif

  }


  std::string

  fftwf_version (void)

  {

#if defined (HAVE_FFTW)

    return ::fftwf_version;

#else

    return "none";

#endif

  }

}

conj
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:217

nn
static F77_INT nn
Definition: DASPK.cc:66

dim_vector
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94

dim_vector::numel
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:335

dim_vector::ndims
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:257

octave::fftw_planner::m_rs
octave_idx_type m_rs
Definition: oct-fftw.h:168

octave::fftw_planner::m_rsimd_align
bool m_rsimd_align
Definition: oct-fftw.h:179

octave::fftw_planner::m_rr
int m_rr
Definition: oct-fftw.h:171

octave::fftw_planner::~fftw_planner
~fftw_planner(void)
Definition: oct-fftw.cc:95

octave::fftw_planner::m_plan
void * m_plan[2]
Definition: oct-fftw.h:141

octave::fftw_planner::m_d
octave_idx_type m_d[2]
Definition: oct-fftw.h:144

octave::fftw_planner::m_n
dim_vector m_n[2]
Definition: oct-fftw.h:156

octave::fftw_planner::m_meth
FftwMethod m_meth
Definition: oct-fftw.h:136

octave::fftw_planner::s_instance
static fftw_planner * s_instance
Definition: oct-fftw.h:116

octave::fftw_planner::m_inplace
bool m_inplace[2]
Definition: oct-fftw.h:159

octave::fftw_planner::m_rplan
void * m_rplan
Definition: oct-fftw.h:162

octave::fftw_planner::do_method
FftwMethod do_method(void)
Definition: oct-fftw.cc:393

octave::fftw_planner::m_r
int m_r[2]
Definition: oct-fftw.h:150

octave::fftw_planner::threads
static int threads(void)
Definition: oct-fftw.h:109

octave::fftw_planner::create_plan
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:71

octave::fftw_planner::fftw_planner
fftw_planner(void)
Definition: oct-fftw.cc:68

octave::fftw_planner::m_rd
octave_idx_type m_rd
Definition: oct-fftw.h:165

octave::fftw_planner::m_h
octave_idx_type m_h[2]
Definition: oct-fftw.h:153

octave::fftw_planner::m_rn
dim_vector m_rn
Definition: oct-fftw.h:177

octave::fftw_planner::m_nthreads
int m_nthreads
Definition: oct-fftw.h:183

octave::fftw_planner::m_rh
octave_idx_type m_rh
Definition: oct-fftw.h:174

octave::fftw_planner::m_simd_align
bool m_simd_align[2]
Definition: oct-fftw.h:158

octave::fftw_planner::instance_ok
static bool instance_ok(void)
Definition: oct-fftw.cc:113

octave::fftw_planner::FftwMethod
FftwMethod
Definition: oct-fftw.h:59

octave::fftw_planner::PATIENT
@ PATIENT
Definition: oct-fftw.h:63

octave::fftw_planner::MEASURE
@ MEASURE
Definition: oct-fftw.h:62

octave::fftw_planner::UNKNOWN
@ UNKNOWN
Definition: oct-fftw.h:60

octave::fftw_planner::ESTIMATE
@ ESTIMATE
Definition: oct-fftw.h:61

octave::fftw_planner::EXHAUSTIVE
@ EXHAUSTIVE
Definition: oct-fftw.h:64

octave::fftw_planner::HYBRID
@ HYBRID
Definition: oct-fftw.h:65

octave::fftw_planner::cleanup_instance
static void cleanup_instance(void)
Definition: oct-fftw.h:118

octave::fftw_planner::do_create_plan
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:150

octave::fftw_planner::m_s
octave_idx_type m_s[2]
Definition: oct-fftw.h:147

octave::fftw::ifft
static int ifft(const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:900

octave::fftw::fftNd
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:924

octave::fftw::ifftNd
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:970

octave::fftw::fft
static int fft(const double *in, Complex *out, std::size_t npts, std::size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:858

octave::float_fftw_planner
Definition: oct-fftw.h:189

octave::float_fftw_planner::m_r
int m_r[2]
Definition: oct-fftw.h:297

octave::float_fftw_planner::cleanup_instance
static void cleanup_instance(void)
Definition: oct-fftw.h:265

octave::float_fftw_planner::m_d
octave_idx_type m_d[2]
Definition: oct-fftw.h:291

octave::float_fftw_planner::m_rd
octave_idx_type m_rd
Definition: oct-fftw.h:312

octave::float_fftw_planner::m_simd_align
bool m_simd_align[2]
Definition: oct-fftw.h:305

octave::float_fftw_planner::do_create_plan
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:504

octave::float_fftw_planner::float_fftw_planner
float_fftw_planner(void)
Definition: oct-fftw.cc:425

octave::float_fftw_planner::instance_ok
static bool instance_ok(void)
Definition: oct-fftw.cc:470

octave::float_fftw_planner::m_meth
FftwMethod m_meth
Definition: oct-fftw.h:283

octave::float_fftw_planner::create_plan
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:218

octave::float_fftw_planner::m_h
octave_idx_type m_h[2]
Definition: oct-fftw.h:300

octave::float_fftw_planner::do_method
FftwMethod do_method(void)
Definition: oct-fftw.cc:748

octave::float_fftw_planner::s_instance
static float_fftw_planner * s_instance
Definition: oct-fftw.h:263

octave::float_fftw_planner::m_rr
int m_rr
Definition: oct-fftw.h:318

octave::float_fftw_planner::m_rsimd_align
bool m_rsimd_align
Definition: oct-fftw.h:326

octave::float_fftw_planner::m_rs
octave_idx_type m_rs
Definition: oct-fftw.h:315

octave::float_fftw_planner::m_s
octave_idx_type m_s[2]
Definition: oct-fftw.h:294

octave::float_fftw_planner::threads
static int threads(void)
Definition: oct-fftw.h:256

octave::float_fftw_planner::~float_fftw_planner
~float_fftw_planner(void)
Definition: oct-fftw.cc:452

octave::float_fftw_planner::m_inplace
bool m_inplace[2]
Definition: oct-fftw.h:306

octave::float_fftw_planner::FftwMethod
FftwMethod
Definition: oct-fftw.h:206

octave::float_fftw_planner::ESTIMATE
@ ESTIMATE
Definition: oct-fftw.h:208

octave::float_fftw_planner::MEASURE
@ MEASURE
Definition: oct-fftw.h:209

octave::float_fftw_planner::EXHAUSTIVE
@ EXHAUSTIVE
Definition: oct-fftw.h:211

octave::float_fftw_planner::HYBRID
@ HYBRID
Definition: oct-fftw.h:212

octave::float_fftw_planner::UNKNOWN
@ UNKNOWN
Definition: oct-fftw.h:207

octave::float_fftw_planner::PATIENT
@ PATIENT
Definition: oct-fftw.h:210

octave::float_fftw_planner::m_rplan
void * m_rplan
Definition: oct-fftw.h:309

octave::float_fftw_planner::m_rn
dim_vector m_rn
Definition: oct-fftw.h:324

octave::float_fftw_planner::m_nthreads
int m_nthreads
Definition: oct-fftw.h:330

octave::float_fftw_planner::m_rh
octave_idx_type m_rh
Definition: oct-fftw.h:321

octave::float_fftw_planner::m_n
dim_vector m_n[2]
Definition: oct-fftw.h:303

octave::float_fftw_planner::m_plan
void * m_plan[2]
Definition: oct-fftw.h:288

octave_idx_type

singleton_cleanup_list::add
static void add(fptr f)
Definition: singleton-cleanup.h:53

scale
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5893

lo-error.h

octave
Definition: idx-vector.cc:46

octave::convert_packcomplex_Nd
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:796

octave::fftw_version
std::string fftw_version(void)
Definition: oct-fftw.cc:1133

octave::fftwf_version
std::string fftwf_version(void)
Definition: oct-fftw.cc:1143

octave::convert_packcomplex_1d
static void convert_packcomplex_1d(T *out, std::size_t nr, std::size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:780

octave_num_processors_wrapper
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:40

nproc-wrapper.h

OCTAVE_NPROC_CURRENT_OVERRIDABLE
@ OCTAVE_NPROC_CURRENT_OVERRIDABLE
Definition: nproc-wrapper.h:37

Complex
std::complex< double > Complex
Definition: oct-cmplx.h:33

FloatComplex
std::complex< float > FloatComplex
Definition: oct-cmplx.h:34

CHECK_SIMD_ALIGNMENT
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:146

oct-fftw.h

oct-locbuf.h

OCTAVE_LOCAL_BUFFER
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44

quit.h

singleton-cleanup.h