GNU Octave: liboctave/mx-inlines.cc Source File

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 1993-2012 John W. Eaton
00004 Copyright (C) 2009 Jaroslav Hajek
00005 Copyright (C) 2009 VZLU Prague
00006 
00007 This file is part of Octave.
00008 
00009 Octave is free software; you can redistribute it and/or modify it
00010 under the terms of the GNU General Public License as published by the
00011 Free Software Foundation; either version 3 of the License, or (at your
00012 option) any later version.
00013 
00014 Octave is distributed in the hope that it will be useful, but WITHOUT
00015 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00016 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00017 for more details.
00018 
00019 You should have received a copy of the GNU General Public License
00020 along with Octave; see the file COPYING.  If not, see
00021 <http://www.gnu.org/licenses/>.
00022 
00023 */
00024 
00025 #if !defined (octave_mx_inlines_h)
00026 #define octave_mx_inlines_h 1
00027 
00028 #include <cstddef>
00029 #include <cmath>
00030 #include <memory>
00031 
00032 #include "quit.h"
00033 
00034 #include "oct-cmplx.h"
00035 #include "oct-locbuf.h"
00036 #include "oct-inttypes.h"
00037 #include "Array.h"
00038 #include "Array-util.h"
00039 
00040 #include "bsxfun.h"
00041 
00042 // Provides some commonly repeated, basic loop templates.
00043 
00044 template <class R, class S>
00045 inline void mx_inline_fill (size_t n, R *r, S s) throw ()
00046 { for (size_t i = 0; i < n; i++) r[i] = s; }
00047 
00048 #define DEFMXUNOP(F, OP) \
00049 template <class R, class X> \
00050 inline void F (size_t n, R *r, const X *x) throw () \
00051 { for (size_t i = 0; i < n; i++) r[i] = OP x[i]; }
00052 
00053 DEFMXUNOP (mx_inline_uminus, -)
00054 
00055 #define DEFMXUNOPEQ(F, OP) \
00056 template <class R> \
00057 inline void F (size_t n, R *r) throw () \
00058 { for (size_t i = 0; i < n; i++) r[i] = OP r[i]; }
00059 
00060 DEFMXUNOPEQ (mx_inline_uminus2, -)
00061 
00062 #define DEFMXUNBOOLOP(F, OP) \
00063 template <class X> \
00064 inline void F (size_t n, bool *r, const X *x) throw () \
00065 { const X zero = X(); for (size_t i = 0; i < n; i++) r[i] = x[i] OP zero; }
00066 
00067 DEFMXUNBOOLOP (mx_inline_iszero, ==)
00068 DEFMXUNBOOLOP (mx_inline_notzero, !=)
00069 
00070 #define DEFMXBINOP(F, OP) \
00071 template <class R, class X, class Y> \
00072 inline void F (size_t n, R *r, const X *x, const Y *y) throw () \
00073 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
00074 template <class R, class X, class Y> \
00075 inline void F (size_t n, R *r, const X *x, Y y) throw () \
00076 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
00077 template <class R, class X, class Y> \
00078 inline void F (size_t n, R *r, X x, const Y *y) throw () \
00079 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
00080 
00081 DEFMXBINOP (mx_inline_add, +)
00082 DEFMXBINOP (mx_inline_sub, -)
00083 DEFMXBINOP (mx_inline_mul, *)
00084 DEFMXBINOP (mx_inline_div, /)
00085 
00086 #define DEFMXBINOPEQ(F, OP) \
00087 template <class R, class X> \
00088 inline void F (size_t n, R *r, const X *x) throw () \
00089 { for (size_t i = 0; i < n; i++) r[i] OP x[i]; } \
00090 template <class R, class X> \
00091 inline void F (size_t n, R *r, X x) throw () \
00092 { for (size_t i = 0; i < n; i++) r[i] OP x; }
00093 
00094 DEFMXBINOPEQ (mx_inline_add2, +=)
00095 DEFMXBINOPEQ (mx_inline_sub2, -=)
00096 DEFMXBINOPEQ (mx_inline_mul2, *=)
00097 DEFMXBINOPEQ (mx_inline_div2, /=)
00098 
00099 #define DEFMXCMPOP(F, OP) \
00100 template <class X, class Y> \
00101 inline void F (size_t n, bool *r, const X *x, const Y *y) throw () \
00102 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
00103 template <class X, class Y> \
00104 inline void F (size_t n, bool *r, const X *x, Y y) throw () \
00105 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
00106 template <class X, class Y> \
00107 inline void F (size_t n, bool *r, X x, const Y *y) throw () \
00108 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
00109 
00110 DEFMXCMPOP (mx_inline_lt, <)
00111 DEFMXCMPOP (mx_inline_le, <=)
00112 DEFMXCMPOP (mx_inline_gt, >)
00113 DEFMXCMPOP (mx_inline_ge, >=)
00114 DEFMXCMPOP (mx_inline_eq, ==)
00115 DEFMXCMPOP (mx_inline_ne, !=)
00116 
00117 // Convert to logical value, for logical op purposes.
00118 template <class T> inline bool logical_value (T x) { return x; }
00119 template <class T> inline bool logical_value (const std::complex<T>& x)
00120 { return x.real () != 0 || x.imag () != 0; }
00121 template <class T> inline bool logical_value (const octave_int<T>& x)
00122 { return x.value (); }
00123 
00124 template <class X>
00125 void mx_inline_not (size_t n, bool *r, const X* x) throw ()
00126 {
00127   for (size_t i = 0; i < n; i++)
00128     r[i] = ! logical_value (x[i]);
00129 }
00130 
00131 inline void mx_inline_not2 (size_t n, bool *r) throw ()
00132 {
00133   for (size_t i = 0; i < n; i++) r[i] = ! r[i];
00134 }
00135 
00136 #define DEFMXBOOLOP(F, NOT1, OP, NOT2) \
00137 template <class X, class Y> \
00138 inline void F (size_t n, bool *r, const X *x, const Y *y) throw () \
00139 { \
00140   for (size_t i = 0; i < n; i++) \
00141     r[i] = (NOT1 logical_value (x[i])) OP (NOT2 logical_value (y[i])); \
00142 } \
00143 template <class X, class Y> \
00144 inline void F (size_t n, bool *r, const X *x, Y y) throw () \
00145 { \
00146   const bool yy = (NOT2 logical_value (y)); \
00147   for (size_t i = 0; i < n; i++) \
00148     r[i] = (NOT1 logical_value (x[i])) OP yy; \
00149 } \
00150 template <class X, class Y> \
00151 inline void F (size_t n, bool *r, X x, const Y *y) throw () \
00152 { \
00153   const bool xx = (NOT1 logical_value (x)); \
00154   for (size_t i = 0; i < n; i++) \
00155     r[i] = xx OP (NOT2 logical_value (y[i])); \
00156 }
00157 
00158 DEFMXBOOLOP (mx_inline_and, , &, )
00159 DEFMXBOOLOP (mx_inline_or, , |, )
00160 DEFMXBOOLOP (mx_inline_not_and, !, &, )
00161 DEFMXBOOLOP (mx_inline_not_or, !, |, )
00162 DEFMXBOOLOP (mx_inline_and_not, , &, !)
00163 DEFMXBOOLOP (mx_inline_or_not, , |, !)
00164 
00165 #define DEFMXBOOLOPEQ(F, OP) \
00166 template <class X> \
00167 inline void F (size_t n, bool *r, const X *x) throw () \
00168 { \
00169   for (size_t i = 0; i < n; i++) \
00170     r[i] OP logical_value (x[i]); \
00171 } \
00172 template <class X> \
00173 inline void F (size_t n, bool *r, X x) throw () \
00174 { for (size_t i = 0; i < n; i++) r[i] OP x; }
00175 
00176 DEFMXBOOLOPEQ (mx_inline_and2, &=)
00177 DEFMXBOOLOPEQ (mx_inline_or2, |=)
00178 
00179 template <class T>
00180 inline bool
00181 mx_inline_any_nan (size_t n, const T* x)  throw ()
00182 {
00183   for (size_t i = 0; i < n; i++)
00184     {
00185       if (xisnan (x[i]))
00186         return true;
00187     }
00188 
00189   return false;
00190 }
00191 
00192 template <class T>
00193 inline bool
00194 mx_inline_all_finite (size_t n, const T* x)  throw ()
00195 {
00196   for (size_t i = 0; i < n; i++)
00197     {
00198       if (! xfinite (x[i]))
00199         return false;
00200     }
00201 
00202   return true;
00203 }
00204 
00205 template <class T>
00206 inline bool
00207 mx_inline_any_negative (size_t n, const T* x) throw ()
00208 {
00209   for (size_t i = 0; i < n; i++)
00210     {
00211       if (x[i] < 0)
00212         return true;
00213     }
00214 
00215   return false;
00216 }
00217 
00218 template <class T>
00219 inline bool
00220 mx_inline_any_positive (size_t n, const T* x) throw ()
00221 {
00222   for (size_t i = 0; i < n; i++)
00223     {
00224       if (x[i] > 0)
00225         return true;
00226     }
00227 
00228   return false;
00229 }
00230 
00231 template<class T>
00232 inline bool
00233 mx_inline_all_real (size_t n, const std::complex<T>* x) throw ()
00234 {
00235   for (size_t i = 0; i < n; i++)
00236     {
00237       if (x[i].imag () != 0)
00238         return false;
00239     }
00240 
00241   return true;
00242 }
00243 
00244 #define DEFMXMAPPER(F, FUN) \
00245 template <class T> \
00246 inline void F (size_t n, T *r, const T *x) throw () \
00247 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i]); }
00248 
00249 template<class T>
00250 inline void mx_inline_real (size_t n, T *r, const std::complex<T>* x) throw ()
00251 { for (size_t i = 0; i < n; i++) r[i] = x[i].real (); }
00252 template<class T>
00253 inline void mx_inline_imag (size_t n, T *r, const std::complex<T>* x) throw ()
00254 { for (size_t i = 0; i < n; i++) r[i] = x[i].imag (); }
00255 
00256 // Pairwise minimums/maximums
00257 #define DEFMXMAPPER2(F, FUN) \
00258 template <class T> \
00259 inline void F (size_t n, T *r, const T *x, const T *y) throw () \
00260 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \
00261 template <class T> \
00262 inline void F (size_t n, T *r, const T *x, T y) throw () \
00263 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \
00264 template <class T> \
00265 inline void F (size_t n, T *r, T x, const T *y) throw () \
00266 { for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); }
00267 
00268 DEFMXMAPPER2 (mx_inline_xmin, xmin)
00269 DEFMXMAPPER2 (mx_inline_xmax, xmax)
00270 
00271 // Specialize array-scalar max/min
00272 #define DEFMINMAXSPEC(T, F, OP) \
00273 template <> \
00274 inline void F<T> (size_t n, T *r, const T *x, T y) throw () \
00275 { \
00276   if (xisnan (y)) \
00277     std::memcpy (r, x, n * sizeof (T)); \
00278   else \
00279     for (size_t i = 0; i < n; i++) r[i] = (x[i] OP y) ? x[i] : y; \
00280 } \
00281 template <> \
00282 inline void F<T> (size_t n, T *r, T x, const T *y) throw () \
00283 { \
00284   if (xisnan (x)) \
00285     std::memcpy (r, y, n * sizeof (T)); \
00286   else \
00287     for (size_t i = 0; i < n; i++) r[i] = (y[i] OP x) ? y[i] : x; \
00288 }
00289 
00290 DEFMINMAXSPEC (double, mx_inline_xmin, <=)
00291 DEFMINMAXSPEC (double, mx_inline_xmax, >=)
00292 DEFMINMAXSPEC (float, mx_inline_xmin, <=)
00293 DEFMINMAXSPEC (float, mx_inline_xmax, >=)
00294 
00295 // Pairwise power
00296 #define DEFMXMAPPER2X(F, FUN) \
00297 template <class R, class X, class Y> \
00298 inline void F (size_t n, R *r, const X *x, const Y *y) throw () \
00299 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \
00300 template <class R, class X, class Y> \
00301 inline void F (size_t n, R *r, const X *x, Y y) throw () \
00302 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \
00303 template <class R, class X, class Y> \
00304 inline void F (size_t n, R *r, X x, const Y *y) throw () \
00305 { for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); }
00306 
00307 // Let the compiler decide which pow to use, whichever best matches the
00308 // arguments provided.
00309 using std::pow;
00310 DEFMXMAPPER2X (mx_inline_pow, pow)
00311 
00312 // Arbitrary function appliers. The function is a template parameter to enable
00313 // inlining.
00314 template <class R, class X, R fun (X x)>
00315 inline void mx_inline_map (size_t n, R *r, const X *x) throw ()
00316 { for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); }
00317 
00318 template <class R, class X, R fun (const X& x)>
00319 inline void mx_inline_map (size_t n, R *r, const X *x) throw ()
00320 { for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); }
00321 
00322 // Appliers. Since these call the operation just once, we pass it as
00323 // a pointer, to allow the compiler reduce number of instances.
00324 
00325 template <class R, class X>
00326 inline Array<R>
00327 do_mx_unary_op (const Array<X>& x,
00328                 void (*op) (size_t, R *, const X *) throw ())
00329 {
00330   Array<R> r (x.dims ());
00331   op (r.numel (), r.fortran_vec (), x.data ());
00332   return r;
00333 }
00334 
00335 // Shortcuts for applying mx_inline_map.
00336 
00337 template <class R, class X, R fun (X)>
00338 inline Array<R>
00339 do_mx_unary_map (const Array<X>& x)
00340 {
00341   return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
00342 }
00343 
00344 template <class R, class X, R fun (const X&)>
00345 inline Array<R>
00346 do_mx_unary_map (const Array<X>& x)
00347 {
00348   return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
00349 }
00350 
00351 template <class R>
00352 inline Array<R>&
00353 do_mx_inplace_op (Array<R>& r,
00354                   void (*op) (size_t, R *) throw ())
00355 {
00356   op (r.numel (), r.fortran_vec ());
00357   return r;
00358 }
00359 
00360 template <class R, class X, class Y>
00361 inline Array<R>
00362 do_mm_binary_op (const Array<X>& x, const Array<Y>& y,
00363                  void (*op) (size_t, R *, const X *, const Y *) throw (),
00364                  void (*op1) (size_t, R *, X, const Y *) throw (),
00365                  void (*op2) (size_t, R *, const X *, Y) throw (),
00366                  const char *opname)
00367 {
00368   dim_vector dx = x.dims (), dy = y.dims ();
00369   if (dx == dy)
00370     {
00371       Array<R> r (dx);
00372       op (r.length (), r.fortran_vec (), x.data (), y.data ());
00373       return r;
00374     }
00375   else if (is_valid_bsxfun (opname, dx, dy))
00376     {
00377       return do_bsxfun_op (x, y, op, op1, op2);
00378     }
00379   else
00380     {
00381       gripe_nonconformant (opname, dx, dy);
00382       return Array<R> ();
00383     }
00384 }
00385 
00386 template <class R, class X, class Y>
00387 inline Array<R>
00388 do_ms_binary_op (const Array<X>& x, const Y& y,
00389                  void (*op) (size_t, R *, const X *, Y) throw ())
00390 {
00391   Array<R> r (x.dims ());
00392   op (r.length (), r.fortran_vec (), x.data (), y);
00393   return r;
00394 }
00395 
00396 template <class R, class X, class Y>
00397 inline Array<R>
00398 do_sm_binary_op (const X& x, const Array<Y>& y,
00399                  void (*op) (size_t, R *, X, const Y *) throw ())
00400 {
00401   Array<R> r (y.dims ());
00402   op (r.length (), r.fortran_vec (), x, y.data ());
00403   return r;
00404 }
00405 
00406 template <class R, class X>
00407 inline Array<R>&
00408 do_mm_inplace_op (Array<R>& r, const Array<X>& x,
00409                   void (*op) (size_t, R *, const X *) throw (),
00410                   void (*op1) (size_t, R *, X) throw (),
00411                   const char *opname)
00412 {
00413   dim_vector dr = r.dims (), dx = x.dims ();
00414   if (dr == dx)
00415     {
00416       op (r.length (), r.fortran_vec (), x.data ());
00417     }
00418   else if (is_valid_inplace_bsxfun (opname, dr, dx))
00419     {
00420       do_inplace_bsxfun_op (r, x, op, op1);
00421     }
00422   else
00423     gripe_nonconformant (opname, dr, dx);
00424   return r;
00425 }
00426 
00427 template <class R, class X>
00428 inline Array<R>&
00429 do_ms_inplace_op (Array<R>& r, const X& x,
00430                   void (*op) (size_t, R *, X) throw ())
00431 {
00432   op (r.length (), r.fortran_vec (), x);
00433   return r;
00434 }
00435 
00436 template <class T1, class T2>
00437 inline bool
00438 mx_inline_equal (size_t n, const T1 *x, const T2 *y) throw ()
00439 {
00440   for (size_t i = 0; i < n; i++)
00441     if (x[i] != y[i])
00442       return false;
00443   return true;
00444 }
00445 
00446 template <class T>
00447 inline bool
00448 do_mx_check (const Array<T>& a,
00449              bool (*op) (size_t, const T *) throw ())
00450 {
00451   return op (a.numel (), a.data ());
00452 }
00453 
00454 // NOTE: we don't use std::norm because it typically does some heavyweight
00455 // magic to avoid underflows, which we don't need here.
00456 template <class T>
00457 inline T cabsq (const std::complex<T>& c)
00458 { return c.real () * c.real () + c.imag () * c.imag (); }
00459 
00460 // default. works for integers and bool.
00461 template <class T>
00462 inline bool xis_true (T x) { return x; }
00463 template <class T>
00464 inline bool xis_false (T x) { return ! x; }
00465 // for octave_ints
00466 template <class T>
00467 inline bool xis_true (const octave_int<T>& x) { return x.value (); }
00468 template <class T>
00469 inline bool xis_false (const octave_int<T>& x) { return ! x.value (); }
00470 // for reals, we want to ignore NaNs.
00471 inline bool xis_true (double x) { return ! xisnan (x) && x != 0.0; }
00472 inline bool xis_false (double x) { return x == 0.0; }
00473 inline bool xis_true (float x) { return ! xisnan (x) && x != 0.0f; }
00474 inline bool xis_false (float x) { return x == 0.0f; }
00475 // Ditto for complex.
00476 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; }
00477 inline bool xis_false (const Complex& x) { return x == 0.0; }
00478 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; }
00479 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; }
00480 
00481 #define OP_RED_SUM(ac, el) ac += el
00482 #define OP_RED_PROD(ac, el) ac *= el
00483 #define OP_RED_SUMSQ(ac, el) ac += el*el
00484 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
00485 
00486 inline void op_dble_sum(double& ac, float el)
00487 { ac += el; }
00488 inline void op_dble_sum(Complex& ac, const FloatComplex& el)
00489 { ac += el; } // FIXME: guaranteed?
00490 template <class T>
00491 inline void op_dble_sum(double& ac, const octave_int<T>& el)
00492 { ac += el.double_value (); }
00493 
00494 // The following two implement a simple short-circuiting.
00495 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue
00496 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue
00497 
00498 #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \
00499 template <class T> \
00500 inline TRES \
00501 F (const TSRC* v, octave_idx_type n) \
00502 { \
00503   TRES ac = ZERO; \
00504   for (octave_idx_type i = 0; i < n; i++) \
00505     OP(ac, v[i]); \
00506   return ac; \
00507 }
00508 
00509 #define PROMOTE_DOUBLE(T) typename subst_template_param<std::complex, T, double>::type
00510 
00511 OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0)
00512 OP_RED_FCN (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0)
00513 OP_RED_FCN (mx_inline_count, bool, T, OP_RED_SUM, 0)
00514 OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1)
00515 OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0)
00516 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
00517 OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false)
00518 OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true)
00519 
00520 
00521 #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \
00522 template <class T> \
00523 inline void \
00524 F (const TSRC* v, TRES *r, octave_idx_type m, octave_idx_type n) \
00525 { \
00526   for (octave_idx_type i = 0; i < m; i++) \
00527     r[i] = ZERO; \
00528   for (octave_idx_type j = 0; j < n; j++) \
00529     { \
00530       for (octave_idx_type i = 0; i < m; i++) \
00531         OP(r[i], v[i]); \
00532       v += m; \
00533     } \
00534 }
00535 
00536 OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0)
00537 OP_RED_FCN2 (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0)
00538 OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0)
00539 OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1)
00540 OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0)
00541 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
00542 
00543 #define OP_RED_ANYR(ac, el) ac |= xis_true (el)
00544 #define OP_RED_ALLR(ac, el) ac &= xis_true (el)
00545 
00546 OP_RED_FCN2 (mx_inline_any_r, T, bool, OP_RED_ANYR, false)
00547 OP_RED_FCN2 (mx_inline_all_r, T, bool, OP_RED_ALLR, true)
00548 
00549 // Using the general code for any/all would sacrifice short-circuiting.
00550 // OTOH, going by rows would sacrifice cache-coherence. The following algorithm
00551 // will achieve both, at the cost of a temporary octave_idx_type array.
00552 
00553 #define OP_ROW_SHORT_CIRCUIT(F, PRED, ZERO) \
00554 template <class T> \
00555 inline void \
00556 F (const T* v, bool *r, octave_idx_type m, octave_idx_type n) \
00557 { \
00558   if (n <= 8) \
00559     return F ## _r (v, r, m, n); \
00560   \
00561   /* FIXME: it may be sub-optimal to allocate the buffer here. */ \
00562   OCTAVE_LOCAL_BUFFER (octave_idx_type, iact, m); \
00563   for (octave_idx_type i = 0; i < m; i++) iact[i] = i; \
00564   octave_idx_type nact = m; \
00565   for (octave_idx_type j = 0; j < n; j++) \
00566     { \
00567       octave_idx_type k = 0; \
00568       for (octave_idx_type i = 0; i < nact; i++) \
00569         { \
00570           octave_idx_type ia = iact[i]; \
00571           if (! PRED (v[ia])) \
00572             iact[k++] = ia; \
00573         } \
00574       nact = k; \
00575       v += m; \
00576     } \
00577   for (octave_idx_type i = 0; i < m; i++) r[i] = ! ZERO; \
00578   for (octave_idx_type i = 0; i < nact; i++) r[iact[i]] = ZERO; \
00579 }
00580 
00581 OP_ROW_SHORT_CIRCUIT (mx_inline_any, xis_true, false)
00582 OP_ROW_SHORT_CIRCUIT (mx_inline_all, xis_false, true)
00583 
00584 #define OP_RED_FCNN(F, TSRC, TRES) \
00585 template <class T> \
00586 inline void \
00587 F (const TSRC *v, TRES *r, octave_idx_type l, \
00588    octave_idx_type n, octave_idx_type u) \
00589 { \
00590   if (l == 1) \
00591     { \
00592       for (octave_idx_type i = 0; i < u; i++) \
00593         { \
00594           r[i] = F<T> (v, n); \
00595           v += n; \
00596         } \
00597     } \
00598   else \
00599     { \
00600       for (octave_idx_type i = 0; i < u; i++) \
00601         { \
00602           F (v, r, l, n); \
00603           v += l*n; \
00604           r += l; \
00605         } \
00606     } \
00607 }
00608 
00609 OP_RED_FCNN (mx_inline_sum, T, T)
00610 OP_RED_FCNN (mx_inline_dsum, T, PROMOTE_DOUBLE(T))
00611 OP_RED_FCNN (mx_inline_count, bool, T)
00612 OP_RED_FCNN (mx_inline_prod, T, T)
00613 OP_RED_FCNN (mx_inline_sumsq, T, T)
00614 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T)
00615 OP_RED_FCNN (mx_inline_any, T, bool)
00616 OP_RED_FCNN (mx_inline_all, T, bool)
00617 
00618 #define OP_CUM_FCN(F, TSRC, TRES, OP) \
00619 template <class T> \
00620 inline void \
00621 F (const TSRC *v, TRES *r, octave_idx_type n) \
00622 { \
00623   if (n) \
00624     { \
00625       TRES t = r[0] = v[0]; \
00626       for (octave_idx_type i = 1; i < n; i++) \
00627         r[i] = t = t OP v[i]; \
00628     } \
00629 }
00630 
00631 OP_CUM_FCN (mx_inline_cumsum, T, T, +)
00632 OP_CUM_FCN (mx_inline_cumprod, T, T, *)
00633 OP_CUM_FCN (mx_inline_cumcount, bool, T, +)
00634 
00635 #define OP_CUM_FCN2(F, TSRC, TRES, OP) \
00636 template <class T> \
00637 inline void \
00638 F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \
00639 { \
00640   if (n) \
00641     { \
00642       for (octave_idx_type i = 0; i < m; i++) \
00643         r[i] = v[i]; \
00644       const T *r0 = r; \
00645       for (octave_idx_type j = 1; j < n; j++) \
00646         { \
00647           r += m; v += m; \
00648           for (octave_idx_type i = 0; i < m; i++) \
00649             r[i] = r0[i] OP v[i]; \
00650           r0 += m; \
00651         } \
00652     } \
00653 }
00654 
00655 OP_CUM_FCN2 (mx_inline_cumsum, T, T, +)
00656 OP_CUM_FCN2 (mx_inline_cumprod, T, T, *)
00657 OP_CUM_FCN2 (mx_inline_cumcount, bool, T, +)
00658 
00659 #define OP_CUM_FCNN(F, TSRC, TRES) \
00660 template <class T> \
00661 inline void \
00662 F (const TSRC *v, TRES *r, octave_idx_type l, \
00663    octave_idx_type n, octave_idx_type u) \
00664 { \
00665   if (l == 1) \
00666     { \
00667       for (octave_idx_type i = 0; i < u; i++) \
00668         { \
00669           F (v, r, n); \
00670           v += n; r += n; \
00671         } \
00672     } \
00673   else \
00674     { \
00675       for (octave_idx_type i = 0; i < u; i++) \
00676         { \
00677           F (v, r, l, n); \
00678           v += l*n; \
00679           r += l*n; \
00680         } \
00681     } \
00682 }
00683 
00684 OP_CUM_FCNN (mx_inline_cumsum, T, T)
00685 OP_CUM_FCNN (mx_inline_cumprod, T, T)
00686 OP_CUM_FCNN (mx_inline_cumcount, bool, T)
00687 
00688 #define OP_MINMAX_FCN(F, OP) \
00689 template <class T> \
00690 void F (const T *v, T *r, octave_idx_type n) \
00691 { \
00692   if (! n) return; \
00693   T tmp = v[0]; \
00694   octave_idx_type i = 1; \
00695   if (xisnan (tmp)) \
00696     { \
00697       for (; i < n && xisnan (v[i]); i++) ; \
00698       if (i < n) tmp = v[i]; \
00699     } \
00700   for (; i < n; i++) \
00701     if (v[i] OP tmp) tmp = v[i]; \
00702   *r = tmp; \
00703 } \
00704 template <class T> \
00705 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
00706 { \
00707   if (! n) return; \
00708   T tmp = v[0]; \
00709   octave_idx_type tmpi = 0; \
00710   octave_idx_type i = 1; \
00711   if (xisnan (tmp)) \
00712     { \
00713       for (; i < n && xisnan (v[i]); i++) ; \
00714       if (i < n) { tmp = v[i]; tmpi = i; } \
00715     } \
00716   for (; i < n; i++) \
00717     if (v[i] OP tmp) { tmp = v[i]; tmpi = i; }\
00718   *r = tmp; \
00719   *ri = tmpi; \
00720 }
00721 
00722 OP_MINMAX_FCN (mx_inline_min, <)
00723 OP_MINMAX_FCN (mx_inline_max, >)
00724 
00725 // Row reductions will be slightly complicated.  We will proceed with checks
00726 // for NaNs until we detect that no row will yield a NaN, in which case we
00727 // proceed to a faster code.
00728 
00729 #define OP_MINMAX_FCN2(F, OP) \
00730 template <class T> \
00731 inline void \
00732 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
00733 { \
00734   if (! n) return; \
00735   bool nan = false; \
00736   octave_idx_type j = 0; \
00737   for (octave_idx_type i = 0; i < m; i++) \
00738     {  \
00739       r[i] = v[i]; \
00740       if (xisnan (v[i])) nan = true;  \
00741     } \
00742   j++; v += m; \
00743   while (nan && j < n) \
00744     { \
00745       nan = false; \
00746       for (octave_idx_type i = 0; i < m; i++) \
00747         {  \
00748           if (xisnan (v[i])) \
00749             nan = true;  \
00750           else if (xisnan (r[i]) || v[i] OP r[i]) \
00751             r[i] = v[i]; \
00752         } \
00753       j++; v += m; \
00754     } \
00755   while (j < n) \
00756     { \
00757       for (octave_idx_type i = 0; i < m; i++) \
00758         if (v[i] OP r[i]) r[i] = v[i]; \
00759       j++; v += m; \
00760     } \
00761 } \
00762 template <class T> \
00763 inline void \
00764 F (const T *v, T *r, octave_idx_type *ri, \
00765    octave_idx_type m, octave_idx_type n) \
00766 { \
00767   if (! n) return; \
00768   bool nan = false; \
00769   octave_idx_type j = 0; \
00770   for (octave_idx_type i = 0; i < m; i++) \
00771     {  \
00772       r[i] = v[i]; ri[i] = j; \
00773       if (xisnan (v[i])) nan = true;  \
00774     } \
00775   j++; v += m; \
00776   while (nan && j < n) \
00777     { \
00778       nan = false; \
00779       for (octave_idx_type i = 0; i < m; i++) \
00780         {  \
00781           if (xisnan (v[i])) \
00782             nan = true;  \
00783           else if (xisnan (r[i]) || v[i] OP r[i]) \
00784             { r[i] = v[i]; ri[i] = j; } \
00785         } \
00786       j++; v += m; \
00787     } \
00788   while (j < n) \
00789     { \
00790       for (octave_idx_type i = 0; i < m; i++) \
00791         if (v[i] OP r[i]) \
00792           { r[i] = v[i]; ri[i] = j; } \
00793       j++; v += m; \
00794     } \
00795 }
00796 
00797 OP_MINMAX_FCN2 (mx_inline_min, <)
00798 OP_MINMAX_FCN2 (mx_inline_max, >)
00799 
00800 #define OP_MINMAX_FCNN(F) \
00801 template <class T> \
00802 inline void \
00803 F (const T *v, T *r, octave_idx_type l, \
00804    octave_idx_type n, octave_idx_type u) \
00805 { \
00806   if (! n) return; \
00807   if (l == 1) \
00808     { \
00809       for (octave_idx_type i = 0; i < u; i++) \
00810         { \
00811           F (v, r, n); \
00812           v += n; r++; \
00813         } \
00814     } \
00815   else \
00816     { \
00817       for (octave_idx_type i = 0; i < u; i++) \
00818         { \
00819           F (v, r, l, n); \
00820           v += l*n; \
00821           r += l; \
00822         } \
00823     } \
00824 } \
00825 template <class T> \
00826 inline void \
00827 F (const T *v, T *r, octave_idx_type *ri, \
00828    octave_idx_type l, octave_idx_type n, octave_idx_type u) \
00829 { \
00830   if (! n) return; \
00831   if (l == 1) \
00832     { \
00833       for (octave_idx_type i = 0; i < u; i++) \
00834         { \
00835           F (v, r, ri, n); \
00836           v += n; r++; ri++; \
00837         } \
00838     } \
00839   else \
00840     { \
00841       for (octave_idx_type i = 0; i < u; i++) \
00842         { \
00843           F (v, r, ri, l, n); \
00844           v += l*n; \
00845           r += l; ri += l; \
00846         } \
00847     } \
00848 }
00849 
00850 OP_MINMAX_FCNN (mx_inline_min)
00851 OP_MINMAX_FCNN (mx_inline_max)
00852 
00853 #define OP_CUMMINMAX_FCN(F, OP) \
00854 template <class T> \
00855 void F (const T *v, T *r, octave_idx_type n) \
00856 { \
00857   if (! n) return; \
00858   T tmp = v[0]; \
00859   octave_idx_type i = 1, j = 0; \
00860   if (xisnan (tmp)) \
00861     { \
00862       for (; i < n && xisnan (v[i]); i++) ; \
00863       for (; j < i; j++) r[j] = tmp; \
00864       if (i < n) tmp = v[i]; \
00865     } \
00866   for (; i < n; i++) \
00867     if (v[i] OP tmp) \
00868       { \
00869         for (; j < i; j++) r[j] = tmp; \
00870         tmp = v[i]; \
00871       } \
00872   for (; j < i; j++) r[j] = tmp; \
00873 } \
00874 template <class T> \
00875 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
00876 { \
00877   if (! n) return; \
00878   T tmp = v[0]; octave_idx_type tmpi = 0; \
00879   octave_idx_type i = 1, j = 0; \
00880   if (xisnan (tmp)) \
00881     { \
00882       for (; i < n && xisnan (v[i]); i++) ; \
00883       for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
00884       if (i < n) { tmp = v[i]; tmpi = i; } \
00885     } \
00886   for (; i < n; i++) \
00887     if (v[i] OP tmp) \
00888       { \
00889         for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
00890         tmp = v[i]; tmpi = i; \
00891       } \
00892   for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
00893 }
00894 
00895 OP_CUMMINMAX_FCN (mx_inline_cummin, <)
00896 OP_CUMMINMAX_FCN (mx_inline_cummax, >)
00897 
00898 // Row reductions will be slightly complicated.  We will proceed with checks
00899 // for NaNs until we detect that no row will yield a NaN, in which case we
00900 // proceed to a faster code.
00901 
00902 #define OP_CUMMINMAX_FCN2(F, OP) \
00903 template <class T> \
00904 inline void \
00905 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
00906 { \
00907   if (! n) return; \
00908   bool nan = false; \
00909   const T *r0; \
00910   octave_idx_type j = 0; \
00911   for (octave_idx_type i = 0; i < m; i++) \
00912     {  \
00913       r[i] = v[i]; \
00914       if (xisnan (v[i])) nan = true;  \
00915     } \
00916   j++; v += m; r0 = r; r += m; \
00917   while (nan && j < n) \
00918     { \
00919       nan = false; \
00920       for (octave_idx_type i = 0; i < m; i++) \
00921         {  \
00922           if (xisnan (v[i])) \
00923             { r[i] = r0[i]; nan = true; } \
00924           else if (xisnan (r0[i]) || v[i] OP r0[i]) \
00925             r[i] = v[i]; \
00926         } \
00927       j++; v += m; r0 = r; r += m; \
00928     } \
00929   while (j < n) \
00930     { \
00931       for (octave_idx_type i = 0; i < m; i++) \
00932         if (v[i] OP r0[i]) \
00933           r[i] = v[i]; \
00934         else \
00935           r[i] = r0[i]; \
00936       j++; v += m; r0 = r; r += m; \
00937     } \
00938 } \
00939 template <class T> \
00940 inline void \
00941 F (const T *v, T *r, octave_idx_type *ri, \
00942    octave_idx_type m, octave_idx_type n) \
00943 { \
00944   if (! n) return; \
00945   bool nan = false; \
00946   const T *r0; const octave_idx_type *r0i; \
00947   octave_idx_type j = 0; \
00948   for (octave_idx_type i = 0; i < m; i++) \
00949     {  \
00950       r[i] = v[i]; ri[i] = 0; \
00951       if (xisnan (v[i])) nan = true;  \
00952     } \
00953   j++; v += m; r0 = r; r += m; r0i = ri; ri += m;  \
00954   while (nan && j < n) \
00955     { \
00956       nan = false; \
00957       for (octave_idx_type i = 0; i < m; i++) \
00958         {  \
00959           if (xisnan (v[i])) \
00960             { r[i] = r0[i]; ri[i] = r0i[i]; nan = true; } \
00961           else if (xisnan (r0[i]) || v[i] OP r0[i]) \
00962             { r[i] = v[i]; ri[i] = j; }\
00963         } \
00964       j++; v += m; r0 = r; r += m; r0i = ri; ri += m;  \
00965     } \
00966   while (j < n) \
00967     { \
00968       for (octave_idx_type i = 0; i < m; i++) \
00969         if (v[i] OP r0[i]) \
00970           { r[i] = v[i]; ri[i] = j; } \
00971         else \
00972           { r[i] = r0[i]; ri[i] = r0i[i]; } \
00973       j++; v += m; r0 = r; r += m; r0i = ri; ri += m;  \
00974     } \
00975 }
00976 
00977 OP_CUMMINMAX_FCN2 (mx_inline_cummin, <)
00978 OP_CUMMINMAX_FCN2 (mx_inline_cummax, >)
00979 
00980 #define OP_CUMMINMAX_FCNN(F) \
00981 template <class T> \
00982 inline void \
00983 F (const T *v, T *r, octave_idx_type l, \
00984    octave_idx_type n, octave_idx_type u) \
00985 { \
00986   if (! n) return; \
00987   if (l == 1) \
00988     { \
00989       for (octave_idx_type i = 0; i < u; i++) \
00990         { \
00991           F (v, r, n); \
00992           v += n; r += n; \
00993         } \
00994     } \
00995   else \
00996     { \
00997       for (octave_idx_type i = 0; i < u; i++) \
00998         { \
00999           F (v, r, l, n); \
01000           v += l*n; \
01001           r += l*n; \
01002         } \
01003     } \
01004 } \
01005 template <class T> \
01006 inline void \
01007 F (const T *v, T *r, octave_idx_type *ri, \
01008    octave_idx_type l, octave_idx_type n, octave_idx_type u) \
01009 { \
01010   if (! n) return; \
01011   if (l == 1) \
01012     { \
01013       for (octave_idx_type i = 0; i < u; i++) \
01014         { \
01015           F (v, r, ri, n); \
01016           v += n; r += n; ri += n; \
01017         } \
01018     } \
01019   else \
01020     { \
01021       for (octave_idx_type i = 0; i < u; i++) \
01022         { \
01023           F (v, r, ri, l, n); \
01024           v += l*n; \
01025           r += l*n; ri += l*n; \
01026         } \
01027     } \
01028 }
01029 
01030 OP_CUMMINMAX_FCNN (mx_inline_cummin)
01031 OP_CUMMINMAX_FCNN (mx_inline_cummax)
01032 
01033 template <class T>
01034 void mx_inline_diff (const T *v, T *r, octave_idx_type n,
01035                      octave_idx_type order)
01036 {
01037   switch (order)
01038     {
01039     case 1:
01040       for (octave_idx_type i = 0; i < n-1; i++)
01041         r[i] = v[i+1] - v[i];
01042       break;
01043     case 2:
01044       if (n > 1)
01045         {
01046           T lst = v[1] - v[0];
01047           for (octave_idx_type i = 0; i < n-2; i++)
01048             {
01049               T dif = v[i+2] - v[i+1];
01050               r[i] = dif - lst;
01051               lst = dif;
01052             }
01053         }
01054       break;
01055     default:
01056         {
01057           OCTAVE_LOCAL_BUFFER (T, buf, n-1);
01058 
01059           for (octave_idx_type i = 0; i < n-1; i++)
01060             buf[i] = v[i+1] - v[i];
01061 
01062           for (octave_idx_type o = 2; o <= order; o++)
01063             {
01064               for (octave_idx_type i = 0; i < n-o; i++)
01065                 buf[i] = buf[i+1] - buf[i];
01066             }
01067 
01068           for (octave_idx_type i = 0; i < n-order; i++)
01069             r[i] = buf[i];
01070         }
01071     }
01072 }
01073 
01074 template <class T>
01075 void mx_inline_diff (const T *v, T *r,
01076                      octave_idx_type m, octave_idx_type n,
01077                      octave_idx_type order)
01078 {
01079   switch (order)
01080     {
01081     case 1:
01082       for (octave_idx_type i = 0; i < m*(n-1); i++)
01083         r[i] = v[i+m] - v[i];
01084       break;
01085     case 2:
01086       for (octave_idx_type i = 0; i < n-2; i++)
01087         {
01088           for (octave_idx_type j = i*m; j < i*m+m; j++)
01089             r[j] = (v[j+m+m] - v[j+m]) - (v[j+m] - v[j]);
01090         }
01091       break;
01092     default:
01093         {
01094           OCTAVE_LOCAL_BUFFER (T, buf, n-1);
01095 
01096           for (octave_idx_type j = 0; j < m; j++)
01097             {
01098               for (octave_idx_type i = 0; i < n-1; i++)
01099                 buf[i] = v[i*m+j+m] - v[i*m+j];
01100 
01101               for (octave_idx_type o = 2; o <= order; o++)
01102                 {
01103                   for (octave_idx_type i = 0; i < n-o; i++)
01104                     buf[i] = buf[i+1] - buf[i];
01105                 }
01106 
01107               for (octave_idx_type i = 0; i < n-order; i++)
01108                 r[i*m+j] = buf[i];
01109             }
01110         }
01111     }
01112 }
01113 
01114 template <class T>
01115 inline void
01116 mx_inline_diff (const T *v, T *r,
01117                 octave_idx_type l, octave_idx_type n, octave_idx_type u,
01118                 octave_idx_type order)
01119 {
01120   if (! n) return;
01121   if (l == 1)
01122     {
01123       for (octave_idx_type i = 0; i < u; i++)
01124         {
01125           mx_inline_diff (v, r, n, order);
01126           v += n; r += n-order;
01127         }
01128     }
01129   else
01130     {
01131       for (octave_idx_type i = 0; i < u; i++)
01132         {
01133           mx_inline_diff (v, r, l, n, order);
01134           v += l*n;
01135           r += l*(n-order);
01136         }
01137     }
01138 }
01139 
01140 // Assistant function
01141 
01142 inline void
01143 get_extent_triplet (const dim_vector& dims, int& dim,
01144                     octave_idx_type& l, octave_idx_type& n,
01145                     octave_idx_type& u)
01146 {
01147   octave_idx_type ndims = dims.length ();
01148   if (dim >= ndims)
01149     {
01150       l = dims.numel ();
01151       n = 1;
01152       u = 1;
01153     }
01154   else
01155     {
01156       if (dim < 0)
01157         dim = dims.first_non_singleton ();
01158 
01159       // calculate extent triplet.
01160       l = 1, n = dims(dim), u = 1;
01161       for (octave_idx_type i = 0; i < dim; i++)
01162         l *= dims (i);
01163       for (octave_idx_type i = dim + 1; i < ndims; i++)
01164         u *= dims (i);
01165     }
01166 }
01167 
01168 // Appliers.
01169 // FIXME: is this the best design? C++ gives a lot of options here...
01170 // maybe it can be done without an explicit parameter?
01171 
01172 template <class R, class T>
01173 inline Array<R>
01174 do_mx_red_op (const Array<T>& src, int dim,
01175               void (*mx_red_op) (const T *, R *, octave_idx_type,
01176                                  octave_idx_type, octave_idx_type))
01177 {
01178   octave_idx_type l, n, u;
01179   dim_vector dims = src.dims ();
01180   // M*b inconsistency: sum([]) = 0 etc.
01181   if (dims.length () == 2 && dims(0) == 0 && dims(1) == 0)
01182     dims (1) = 1;
01183 
01184   get_extent_triplet (dims, dim, l, n, u);
01185 
01186   // Reduction operation reduces the array size.
01187   if (dim < dims.length ()) dims(dim) = 1;
01188   dims.chop_trailing_singletons ();
01189 
01190   Array<R> ret (dims);
01191   mx_red_op (src.data (), ret.fortran_vec (), l, n, u);
01192 
01193   return ret;
01194 }
01195 
01196 template <class R, class T>
01197 inline Array<R>
01198 do_mx_cum_op (const Array<T>& src, int dim,
01199               void (*mx_cum_op) (const T *, R *, octave_idx_type,
01200                                  octave_idx_type, octave_idx_type))
01201 {
01202   octave_idx_type l, n, u;
01203   dim_vector dims = src.dims ();
01204   get_extent_triplet (dims, dim, l, n, u);
01205 
01206   // Cumulative operation doesn't reduce the array size.
01207   Array<R> ret (dims);
01208   mx_cum_op (src.data (), ret.fortran_vec (), l, n, u);
01209 
01210   return ret;
01211 }
01212 
01213 template <class R>
01214 inline Array<R>
01215 do_mx_minmax_op (const Array<R>& src, int dim,
01216                  void (*mx_minmax_op) (const R *, R *, octave_idx_type,
01217                                        octave_idx_type, octave_idx_type))
01218 {
01219   octave_idx_type l, n, u;
01220   dim_vector dims = src.dims ();
01221   get_extent_triplet (dims, dim, l, n, u);
01222 
01223   // If the dimension is zero, we don't do anything.
01224   if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1;
01225   dims.chop_trailing_singletons ();
01226 
01227   Array<R> ret (dims);
01228   mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u);
01229 
01230   return ret;
01231 }
01232 
01233 template <class R>
01234 inline Array<R>
01235 do_mx_minmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
01236                  void (*mx_minmax_op) (const R *, R *, octave_idx_type *,
01237                                        octave_idx_type, octave_idx_type, octave_idx_type))
01238 {
01239   octave_idx_type l, n, u;
01240   dim_vector dims = src.dims ();
01241   get_extent_triplet (dims, dim, l, n, u);
01242 
01243   // If the dimension is zero, we don't do anything.
01244   if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1;
01245   dims.chop_trailing_singletons ();
01246 
01247   Array<R> ret (dims);
01248   if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
01249 
01250   mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
01251                 l, n, u);
01252 
01253   return ret;
01254 }
01255 
01256 template <class R>
01257 inline Array<R>
01258 do_mx_cumminmax_op (const Array<R>& src, int dim,
01259                     void (*mx_cumminmax_op) (const R *, R *, octave_idx_type,
01260                                              octave_idx_type, octave_idx_type))
01261 {
01262   octave_idx_type l, n, u;
01263   dim_vector dims = src.dims ();
01264   get_extent_triplet (dims, dim, l, n, u);
01265 
01266   Array<R> ret (dims);
01267   mx_cumminmax_op (src.data (), ret.fortran_vec (), l, n, u);
01268 
01269   return ret;
01270 }
01271 
01272 template <class R>
01273 inline Array<R>
01274 do_mx_cumminmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
01275                     void (*mx_cumminmax_op) (const R *, R *, octave_idx_type *,
01276                                              octave_idx_type, octave_idx_type, octave_idx_type))
01277 {
01278   octave_idx_type l, n, u;
01279   dim_vector dims = src.dims ();
01280   get_extent_triplet (dims, dim, l, n, u);
01281 
01282   Array<R> ret (dims);
01283   if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
01284 
01285   mx_cumminmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
01286                    l, n, u);
01287 
01288   return ret;
01289 }
01290 
01291 template <class R>
01292 inline Array<R>
01293 do_mx_diff_op (const Array<R>& src, int dim, octave_idx_type order,
01294                void (*mx_diff_op) (const R *, R *,
01295                                    octave_idx_type, octave_idx_type, octave_idx_type,
01296                                    octave_idx_type))
01297 {
01298   octave_idx_type l, n, u;
01299   if (order <= 0)
01300     return src;
01301 
01302   dim_vector dims = src.dims ();
01303 
01304   get_extent_triplet (dims, dim, l, n, u);
01305   if (dim >= dims.length ())
01306     dims.resize (dim+1, 1);
01307 
01308   if (dims(dim) <= order)
01309     {
01310       dims (dim) = 0;
01311       return Array<R> (dims);
01312     }
01313   else
01314     {
01315       dims(dim) -= order;
01316     }
01317 
01318   Array<R> ret (dims);
01319   mx_diff_op (src.data (), ret.fortran_vec (), l, n, u, order);
01320 
01321   return ret;
01322 }
01323 
01324 // Fast extra-precise summation. According to
01325 // T. Ogita, S. M. Rump, S. Oishi:
01326 // Accurate Sum And Dot Product,
01327 // SIAM J. Sci. Computing, Vol. 26, 2005
01328 
01329 template <class T>
01330 inline void twosum_accum (T& s, T& e,
01331                           const T& x)
01332 {
01333   T s1 = s + x, t = s1 - s, e1 = (s - (s1 - t)) + (x - t);
01334   s = s1;
01335   e += e1;
01336 }
01337 
01338 template <class T>
01339 inline T
01340 mx_inline_xsum (const T *v, octave_idx_type n)
01341 {
01342   T s = 0, e = 0;
01343   for (octave_idx_type i = 0; i < n; i++)
01344     twosum_accum (s, e, v[i]);
01345 
01346   return s + e;
01347 }
01348 
01349 template <class T>
01350 inline void
01351 mx_inline_xsum (const T *v, T *r,
01352                 octave_idx_type m, octave_idx_type n)
01353 {
01354   OCTAVE_LOCAL_BUFFER (T, e, m);
01355   for (octave_idx_type i = 0; i < m; i++)
01356     e[i] = r[i] = T ();
01357 
01358   for (octave_idx_type j = 0; j < n; j++)
01359     {
01360       for (octave_idx_type i = 0; i < m; i++)
01361         twosum_accum (r[i], e[i], v[i]);
01362 
01363       v += m;
01364     }
01365 
01366   for (octave_idx_type i = 0; i < m; i++)
01367     r[i] += e[i];
01368 }
01369 
01370 OP_RED_FCNN (mx_inline_xsum, T, T)
01371 
01372 #endif