d5/d22/oct-sort_8cc_source.html

 ////////////////////////////////////////////////////////////////////////

 //

 // Copyright (C) 2003-2021 The Octave Project Developers

 //

 // See the file COPYRIGHT.md in the top-level directory of this

 // distribution or <https://octave.org/copyright/>.

 //

 // This file is part of Octave.

 //

 // Octave is free software: you can redistribute it and/or modify it

 // under the terms of the GNU General Public License as published by

 // the Free Software Foundation, either version 3 of the License, or

 // (at your option) any later version.

 //

 // Octave is distributed in the hope that it will be useful, but

 // WITHOUT ANY WARRANTY; without even the implied warranty of

 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 // GNU General Public License for more details.

 //

 // You should have received a copy of the GNU General Public License

 // along with Octave; see the file COPYING.  If not, see

 // <https://www.gnu.org/licenses/>.

 //

 // Code stolen in large part from Python's, listobject.c, which itself had

 // no license header.  However, thanks to Tim Peters for the parts of the

 // code I ripped-off.

 //

 // As required in the Python license the short description of the changes

 // made are

 //

 // * convert the sorting code in listobject.cc into a generic class,

 //   replacing PyObject* with the type of the class T.

 //

 // * replaced usages of malloc, free, memcpy and memmove by standard C++

 //   new [], delete [] and std::copy and std::copy_backward.  Note that replacing

 //   memmove by std::copy is possible if the destination starts before the source.

 //   If not, std::copy_backward needs to be used.

 //

 // * templatize comparison operator in most methods, provide possible dispatch

 //

 // * duplicate methods to avoid by-the-way indexed sorting

 //

 // * add methods for verifying sortedness of array

 //

 // * row sorting via breadth-first tree subsorting

 //

 // * binary lookup and sequential binary lookup optimized for dense downsampling.

 //

 // * NOTE: the memory management routines rely on the fact that delete [] silently

 //   ignores null pointers.  Don't gripe about the missing checks - they're there.

 //

 //

 // The Python license is

 //

 //   PSF LICENSE AGREEMENT FOR PYTHON 2.3

 //   --------------------------------------

 //

 //   1. This LICENSE AGREEMENT is between the Python Software Foundation

 //   ("PSF"), and the Individual or Organization ("Licensee") accessing and

 //   otherwise using Python 2.3 software in source or binary form and its

 //   associated documentation.

 //

 //   2. Subject to the terms and conditions of this License Agreement, PSF

 //   hereby grants Licensee a nonexclusive, royalty-free, world-wide

 //   license to reproduce, analyze, test, perform and/or display publicly,

 //   prepare derivative works, distribute, and otherwise use Python 2.3

 //   alone or in any derivative version, provided, however, that PSF's

 //   License Agreement and PSF's notice of copyright, i.e., "Copyright (c)

 //   2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are

 //   retained in Python 2.3 alone or in any derivative version prepared by

 //   Licensee.

 //

 //   3. In the event Licensee prepares a derivative work that is based on

 //   or incorporates Python 2.3 or any part thereof, and wants to make

 //   the derivative work available to others as provided herein, then

 //   Licensee hereby agrees to include in any such work a brief summary of

 //   the changes made to Python 2.3.

 //

 //   4. PSF is making Python 2.3 available to Licensee on an "AS IS"

 //   basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR

 //   IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND

 //   DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS

 //   FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT

 //   INFRINGE ANY THIRD PARTY RIGHTS.

 //

 //   5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON

 //   2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS

 //   A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,

 //   OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

 //

 //   6. This License Agreement will automatically terminate upon a material

 //   breach of its terms and conditions.

 //

 //   7. Nothing in this License Agreement shall be deemed to create any

 //   relationship of agency, partnership, or joint venture between PSF and

 //   Licensee.  This License Agreement does not grant permission to use PSF

 //   trademarks or trade name in a trademark sense to endorse or promote

 //   products or services of Licensee, or any third party.

 //

 //   8. By copying, installing or otherwise using Python 2.3, Licensee

 //   agrees to be bound by the terms and conditions of this License

 //   Agreement.

 //

 ////////////////////////////////////////////////////////////////////////


 // This file should not include config.h.  It is only included in other

 // C++ source files that should have included config.h before including

 // this file.


 #include <cassert>

 #include <algorithm>

 #include <functional>

 #include <cstring>

 #include <stack>


 #include "lo-error.h"

 #include "lo-mappers.h"

 #include "quit.h"

 #include "oct-sort.h"

 #include "oct-locbuf.h"


 template <typename T>

 octave_sort<T>::octave_sort (void) :

   m_compare (ascending_compare), m_ms (nullptr)

 { }


 template <typename T>

 octave_sort<T>::octave_sort (compare_fcn_type comp)

   : m_compare (comp), m_ms (nullptr)

 { }


 template <typename T>

 octave_sort<T>::~octave_sort ()

 {

   delete m_ms;

 }


 template <typename T>

 void

 octave_sort<T>::set_compare (sortmode mode)

 {

   if (mode == ASCENDING)

     m_compare = ascending_compare;

   else if (mode == DESCENDING)

     m_compare = descending_compare;

   else

     m_compare = nullptr;

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::binarysort (T *data, octave_idx_type nel,

                             octave_idx_type start, Comp comp)

 {

   if (start == 0)

     ++start;


   for (; start < nel; ++start)

     {

       /* set l to where *start belongs */

       octave_idx_type l = 0;

       octave_idx_type r = start;

       T pivot = data[start];

       /* Invariants:

        * pivot >= all in [lo, l).

        * pivot  < all in [r, start).

        * The second is vacuously true at the start.

        */

       do

         {

           octave_idx_type p = l + ((r - l) >> 1);

           if (comp (pivot, data[p]))

             r = p;

           else

             l = p+1;

         }

       while (l < r);

       /* The invariants still hold, so pivot >= all in [lo, l) and

          pivot < all in [l, start), so pivot belongs at l.  Note

          that if there are elements equal to pivot, l points to the

          first slot after them -- that's why this sort is stable.

          Slide over to make room.

          Caution: using memmove is much slower under MSVC 5;

          we're not usually moving many slots. */

       // NOTE: using swap and going upwards appears to be faster.

       for (octave_idx_type p = l; p < start; p++)

         std::swap (pivot, data[p]);

       data[start] = pivot;

     }


   return;

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel,

                             octave_idx_type start, Comp comp)

 {

   if (start == 0)

     ++start;


   for (; start < nel; ++start)

     {

       /* set l to where *start belongs */

       octave_idx_type l = 0;

       octave_idx_type r = start;

       T pivot = data[start];

       /* Invariants:

        * pivot >= all in [lo, l).

        * pivot  < all in [r, start).

        * The second is vacuously true at the start.

        */

       do

         {

           octave_idx_type p = l + ((r - l) >> 1);

           if (comp (pivot, data[p]))

             r = p;

           else

             l = p+1;

         }

       while (l < r);

       /* The invariants still hold, so pivot >= all in [lo, l) and

          pivot < all in [l, start), so pivot belongs at l.  Note

          that if there are elements equal to pivot, l points to the

          first slot after them -- that's why this sort is stable.

          Slide over to make room.

          Caution: using memmove is much slower under MSVC 5;

          we're not usually moving many slots. */

       // NOTE: using swap and going upwards appears to be faster.

       for (octave_idx_type p = l; p < start; p++)

         std::swap (pivot, data[p]);

       data[start] = pivot;

       octave_idx_type ipivot = idx[start];

       for (octave_idx_type p = l; p < start; p++)

         std::swap (ipivot, idx[p]);

       idx[start] = ipivot;

     }


   return;

 }


 /*

 Return the length of the run beginning at lo, in the slice [lo, hi).  lo < hi

 is required on entry.  "A run" is the longest ascending sequence, with


     lo[0] <= lo[1] <= lo[2] <= ...


 or the longest descending sequence, with


     lo[0] > lo[1] > lo[2] > ...


 DESCENDING is set to false in the former case, or to true in the latter.

 For its intended use in a stable mergesort, the strictness of the defn of

 "descending" is needed so that the caller can safely reverse a descending

 sequence without violating stability (strict > ensures there are no equal

 elements to get out of order).


 Returns -1 in case of error.

 */

 template <typename T>

 template <typename Comp>

 octave_idx_type

 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,

                            Comp comp)

 {

   octave_idx_type n;

   T *hi = lo + nel;


   descending = false;

   ++lo;

   if (lo == hi)

     return 1;


   n = 2;


   if (comp (*lo, *(lo-1)))

     {

       descending = true;

       for (lo = lo+1; lo < hi; ++lo, ++n)

         {

           if (comp (*lo, *(lo-1)))

             ;

           else

             break;

         }

     }

   else

     {

       for (lo = lo+1; lo < hi; ++lo, ++n)

         {

           if (comp (*lo, *(lo-1)))

             break;

         }

     }


   return n;

 }


 /*

 Locate the proper position of key in a sorted vector; if the vector contains

 an element equal to key, return the position immediately to the left of

 the leftmost equal element.  [gallop_right() does the same except returns

 the position to the right of the rightmost equal element (if any).]


 "a" is a sorted vector with n elements, starting at a[0].  n must be > 0.


 "hint" is an index at which to begin the search, 0 <= hint < n.  The closer

 hint is to the final result, the faster this runs.


 The return value is the int k in 0..n such that


     a[k-1] < key <= a[k]


 pretending that *(a-1) is minus infinity and a[n] is plus infinity.  IOW,

 key belongs at index k; or, IOW, the first k elements of a should precede

 key, and the last n-k should follow key.


 Returns -1 on error.  See listsort.txt for info on the method.

 */

 template <typename T>

 template <typename Comp>

 octave_idx_type

 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n,

                              octave_idx_type hint,

                              Comp comp)

 {

   octave_idx_type ofs;

   octave_idx_type lastofs;

   octave_idx_type k;


   a += hint;

   lastofs = 0;

   ofs = 1;

   if (comp (*a, key))

     {

       /* a[hint] < key -- gallop right, until

        * a[hint + lastofs] < key <= a[hint + ofs]

        */

       const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */

       while (ofs < maxofs)

         {

           if (comp (a[ofs], key))

             {

               lastofs = ofs;

               ofs = (ofs << 1) + 1;

               if (ofs <= 0)     /* int overflow */

                 ofs = maxofs;

             }

           else  /* key <= a[hint + ofs] */

             break;

         }

       if (ofs > maxofs)

         ofs = maxofs;

       /* Translate back to offsets relative to &a[0]. */

       lastofs += hint;

       ofs += hint;

     }

   else

     {

       /* key <= a[hint] -- gallop left, until

        * a[hint - ofs] < key <= a[hint - lastofs]

        */

       const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */

       while (ofs < maxofs)

         {

           if (comp (*(a-ofs), key))

             break;

           /* key <= a[hint - ofs] */

           lastofs = ofs;

           ofs = (ofs << 1) + 1;

           if (ofs <= 0) /* int overflow */

             ofs = maxofs;

         }

       if (ofs > maxofs)

         ofs = maxofs;

       /* Translate back to positive offsets relative to &a[0]. */

       k = lastofs;

       lastofs = hint - ofs;

       ofs = hint - k;

     }

   a -= hint;


   /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the

    * right of lastofs but no farther right than ofs.  Do a binary

    * search, with invariant a[lastofs-1] < key <= a[ofs].

    */

   ++lastofs;

   while (lastofs < ofs)

     {

       octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);


       if (comp (a[m], key))

         lastofs = m+1;  /* a[m] < key */

       else

         ofs = m;        /* key <= a[m] */

     }


   return ofs;

 }


 /*

 Exactly like gallop_left(), except that if key already exists in a[0:n],

 finds the position immediately to the right of the rightmost equal value.


 The return value is the int k in 0..n such that


     a[k-1] <= key < a[k]


 or -1 if error.


 The code duplication is massive, but this is enough different given that

 we're sticking to "<" comparisons that it's much harder to follow if

 written as one routine with yet another "left or right?" flag.

 */

 template <typename T>

 template <typename Comp>

 octave_idx_type

 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n,

                               octave_idx_type hint,

                               Comp comp)

 {

   octave_idx_type ofs;

   octave_idx_type lastofs;

   octave_idx_type k;


   a += hint;

   lastofs = 0;

   ofs = 1;

   if (comp (key, *a))

     {

       /* key < a[hint] -- gallop left, until

        * a[hint - ofs] <= key < a[hint - lastofs]

        */

       const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */

       while (ofs < maxofs)

         {

           if (comp (key, *(a-ofs)))

             {

               lastofs = ofs;

               ofs = (ofs << 1) + 1;

               if (ofs <= 0)     /* int overflow */

                 ofs = maxofs;

             }

           else  /* a[hint - ofs] <= key */

             break;

         }

       if (ofs > maxofs)

         ofs = maxofs;

       /* Translate back to positive offsets relative to &a[0]. */

       k = lastofs;

       lastofs = hint - ofs;

       ofs = hint - k;

     }

   else

     {

       /* a[hint] <= key -- gallop right, until

        * a[hint + lastofs] <= key < a[hint + ofs]

        */

       const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */

       while (ofs < maxofs)

         {

           if (comp (key, a[ofs]))

             break;

           /* a[hint + ofs] <= key */

           lastofs = ofs;

           ofs = (ofs << 1) + 1;

           if (ofs <= 0) /* int overflow */

             ofs = maxofs;

         }

       if (ofs > maxofs)

         ofs = maxofs;

       /* Translate back to offsets relative to &a[0]. */

       lastofs += hint;

       ofs += hint;

     }

   a -= hint;


   /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the

    * right of lastofs but no farther right than ofs.  Do a binary

    * search, with invariant a[lastofs-1] <= key < a[ofs].

    */

   ++lastofs;

   while (lastofs < ofs)

     {

       octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);


       if (comp (key, a[m]))

         ofs = m;        /* key < a[m] */

       else

         lastofs = m+1;  /* a[m] <= key */

     }


   return ofs;

 }


 static inline octave_idx_type

 roundupsize (size_t n)

 {

   unsigned int nbits = 3;

   size_t n2 = n >> 8;


   /* Round up:

    * If n <       256, to a multiple of        8.

    * If n <      2048, to a multiple of       64.

    * If n <     16384, to a multiple of      512.

    * If n <    131072, to a multiple of     4096.

    * If n <   1048576, to a multiple of    32768.

    * If n <   8388608, to a multiple of   262144.

    * If n <  67108864, to a multiple of  2097152.

    * If n < 536870912, to a multiple of 16777216.

    * ...

    * If n < 2**(5+3*i), to a multiple of 2**(3*i).

    *

    * This over-allocates proportional to the list size, making room

    * for additional growth.  The over-allocation is mild, but is

    * enough to give linear-time amortized behavior over a long

    * sequence of appends() in the presence of a poorly-performing

    * system realloc() (which is a reality, e.g., across all flavors

    * of Windows, with Win9x behavior being particularly bad -- and

    * we've still got address space fragmentation problems on Win9x

    * even with this scheme, although it requires much longer lists to

    * provoke them than it used to).

    */

   while (n2)

     {

       n2 >>= 3;

       nbits += 3;

     }


   size_t new_size = ((n >> nbits) + 1) << nbits;


   if (new_size == 0

       || new_size

          > static_cast<size_t> (std::numeric_limits<octave_idx_type>::max ()))

     (*current_liboctave_error_handler)

       ("unable to allocate sufficient memory for sort");


   return static_cast<octave_idx_type> (new_size);

 }


 /* Ensure enough temp memory for 'need' array slots is available.

  * Returns 0 on success and -1 if the memory can't be gotten.

  */

 template <typename T>

 void

 octave_sort<T>::MergeState::getmem (octave_idx_type need)

 {

   if (need <= m_alloced)

     return;


   need = roundupsize (need);

   /* Don't realloc!  That can cost cycles to copy the old data, but

    * we don't care what's in the block.

    */

   delete [] m_a;

   delete [] m_ia; // Must do this or fool possible next getmemi.

   m_a = new T [need];

   m_alloced = need;


 }


 template <typename T>

 void

 octave_sort<T>::MergeState::getmemi (octave_idx_type need)

 {

   if (m_ia && need <= m_alloced)

     return;


   need = roundupsize (need);

   /* Don't realloc!  That can cost cycles to copy the old data, but

    * we don't care what's in the block.

    */

   delete [] m_a;

   delete [] m_ia;


   m_a = new T [need];

   m_ia = new octave_idx_type [need];

   m_alloced = need;

 }


 /* Merge the na elements starting at pa with the nb elements starting at pb

  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.

  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the

  * merge, and should have na <= nb.  See listsort.txt for more info.

  * Return 0 if successful, -1 if error.

  */

 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_lo (T *pa, octave_idx_type na,

                           T *pb, octave_idx_type nb,

                           Comp comp)

 {

   octave_idx_type k;

   T *dest;

   int result = -1;      /* guilty until proved innocent */

   octave_idx_type min_gallop = m_ms->m_min_gallop;


   m_ms->getmem (na);


   std::copy (pa, pa + na, m_ms->m_a);

   dest = pa;

   pa = m_ms->m_a;


   *dest++ = *pb++;

   --nb;

   if (nb == 0)

     goto Succeed;

   if (na == 1)

     goto CopyB;


   for (;;)

     {

       octave_idx_type acount = 0;       /* # of times A won in a row */

       octave_idx_type bcount = 0;       /* # of times B won in a row */


       /* Do the straightforward thing until (if ever) one run

        * appears to win consistently.

        */

       for (;;)

         {


           // FIXME: these loops are candidates for further optimizations.

           // Rather than testing everything in each cycle, it may be more

           // efficient to do it in hunks.

           if (comp (*pb, *pa))

             {

               *dest++ = *pb++;

               ++bcount;

               acount = 0;

               --nb;

               if (nb == 0)

                 goto Succeed;

               if (bcount >= min_gallop)

                 break;

             }

           else

             {

               *dest++ = *pa++;

               ++acount;

               bcount = 0;

               --na;

               if (na == 1)

                 goto CopyB;

               if (acount >= min_gallop)

                 break;

             }

         }


       /* One run is winning so consistently that galloping may

        * be a huge win.  So try that, and continue galloping until

        * (if ever) neither run appears to be winning consistently

        * anymore.

        */

       ++min_gallop;

       do

         {

           min_gallop -= (min_gallop > 1);

           m_ms->m_min_gallop = min_gallop;

           k = gallop_right (*pb, pa, na, 0, comp);

           acount = k;

           if (k)

             {

               if (k < 0)

                 goto Fail;

               dest = std::copy (pa, pa + k, dest);

               pa += k;

               na -= k;

               if (na == 1)

                 goto CopyB;

               /* na==0 is impossible now if the comparison

                * function is consistent, but we can't assume

                * that it is.

                */

               if (na == 0)

                 goto Succeed;

             }

           *dest++ = *pb++;

           --nb;

           if (nb == 0)

             goto Succeed;


           k = gallop_left (*pa, pb, nb, 0, comp);

           bcount = k;

           if (k)

             {

               if (k < 0)

                 goto Fail;

               dest = std::copy (pb, pb + k, dest);

               pb += k;

               nb -= k;

               if (nb == 0)

                 goto Succeed;

             }

           *dest++ = *pa++;

           --na;

           if (na == 1)

             goto CopyB;

         }

       while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);


       ++min_gallop;     /* penalize it for leaving galloping mode */

       m_ms->m_min_gallop = min_gallop;

     }


 Succeed:

   result = 0;


 Fail:

   if (na)

     std::copy (pa, pa + na, dest);

   return result;


 CopyB:

   /* The last element of pa belongs at the end of the merge. */

   std::copy (pb, pb + nb, dest);

   dest[nb] = *pa;


   return 0;

 }


 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na,

                           T *pb, octave_idx_type *ipb, octave_idx_type nb,

                           Comp comp)

 {

   octave_idx_type k;

   T *dest;

   octave_idx_type *idest;

   int result = -1;      /* guilty until proved innocent */

   octave_idx_type min_gallop = m_ms->m_min_gallop;


   m_ms->getmemi (na);


   std::copy (pa, pa + na, m_ms->m_a);

   std::copy (ipa, ipa + na, m_ms->m_ia);

   dest = pa; idest = ipa;

   pa = m_ms->m_a; ipa = m_ms->m_ia;


   *dest++ = *pb++; *idest++ = *ipb++;

   --nb;

   if (nb == 0)

     goto Succeed;

   if (na == 1)

     goto CopyB;


   for (;;)

     {

       octave_idx_type acount = 0;       /* # of times A won in a row */

       octave_idx_type bcount = 0;       /* # of times B won in a row */


       /* Do the straightforward thing until (if ever) one run

        * appears to win consistently.

        */

       for (;;)

         {


           if (comp (*pb, *pa))

             {

               *dest++ = *pb++; *idest++ = *ipb++;

               ++bcount;

               acount = 0;

               --nb;

               if (nb == 0)

                 goto Succeed;

               if (bcount >= min_gallop)

                 break;

             }

           else

             {

               *dest++ = *pa++; *idest++ = *ipa++;

               ++acount;

               bcount = 0;

               --na;

               if (na == 1)

                 goto CopyB;

               if (acount >= min_gallop)

                 break;

             }

         }


       /* One run is winning so consistently that galloping may

        * be a huge win.  So try that, and continue galloping until

        * (if ever) neither run appears to be winning consistently

        * anymore.

        */

       ++min_gallop;

       do

         {

           min_gallop -= (min_gallop > 1);

           m_ms->m_min_gallop = min_gallop;

           k = gallop_right (*pb, pa, na, 0, comp);

           acount = k;

           if (k)

             {

               if (k < 0)

                 goto Fail;

               dest = std::copy (pa, pa + k, dest);

               idest = std::copy (ipa, ipa + k, idest);

               pa += k; ipa += k;

               na -= k;

               if (na == 1)

                 goto CopyB;

               /* na==0 is impossible now if the comparison

                * function is consistent, but we can't assume

                * that it is.

                */

               if (na == 0)

                 goto Succeed;

             }

           *dest++ = *pb++; *idest++ = *ipb++;

           --nb;

           if (nb == 0)

             goto Succeed;


           k = gallop_left (*pa, pb, nb, 0, comp);

           bcount = k;

           if (k)

             {

               if (k < 0)

                 goto Fail;

               dest = std::copy (pb, pb + k, dest);

               idest = std::copy (ipb, ipb + k, idest);

               pb += k; ipb += k;

               nb -= k;

               if (nb == 0)

                 goto Succeed;

             }

           *dest++ = *pa++; *idest++ = *ipa++;

           --na;

           if (na == 1)

             goto CopyB;

         }

       while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);


       ++min_gallop;     /* penalize it for leaving galloping mode */

       m_ms->m_min_gallop = min_gallop;

     }


 Succeed:

   result = 0;


 Fail:

   if (na)

     {

       std::copy (pa, pa + na, dest);

       std::copy (ipa, ipa + na, idest);

     }

   return result;


 CopyB:

   /* The last element of pa belongs at the end of the merge. */

   std::copy (pb, pb + nb, dest);

   std::copy (ipb, ipb + nb, idest);

   dest[nb] = *pa;

   idest[nb] = *ipa;


   return 0;

 }


 /* Merge the na elements starting at pa with the nb elements starting at pb

  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.

  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the

  * merge, and should have na >= nb.  See listsort.txt for more info.

  * Return 0 if successful, -1 if error.

  */

 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_hi (T *pa, octave_idx_type na,

                           T *pb, octave_idx_type nb,

                           Comp comp)

 {

   octave_idx_type k;

   T *dest;

   int result = -1;      /* guilty until proved innocent */

   T *basea, *baseb;

   octave_idx_type min_gallop = m_ms->m_min_gallop;


   m_ms->getmem (nb);


   dest = pb + nb - 1;

   std::copy (pb, pb + nb, m_ms->m_a);

   basea = pa;

   baseb = m_ms->m_a;

   pb = m_ms->m_a + nb - 1;

   pa += na - 1;


   *dest-- = *pa--;

   --na;

   if (na == 0)

     goto Succeed;

   if (nb == 1)

     goto CopyA;


   for (;;)

     {

       octave_idx_type acount = 0;       /* # of times A won in a row */

       octave_idx_type bcount = 0;       /* # of times B won in a row */


       /* Do the straightforward thing until (if ever) one run

        * appears to win consistently.

        */

       for (;;)

         {

           if (comp (*pb, *pa))

             {

               *dest-- = *pa--;

               ++acount;

               bcount = 0;

               --na;

               if (na == 0)

                 goto Succeed;

               if (acount >= min_gallop)

                 break;

             }

           else

             {

               *dest-- = *pb--;

               ++bcount;

               acount = 0;

               --nb;

               if (nb == 1)

                 goto CopyA;

               if (bcount >= min_gallop)

                 break;

             }

         }


       /* One run is winning so consistently that galloping may

        * be a huge win.  So try that, and continue galloping until

        * (if ever) neither run appears to be winning consistently

        * anymore.

        */

       ++min_gallop;

       do

         {

           min_gallop -= (min_gallop > 1);

           m_ms->m_min_gallop = min_gallop;

           k = gallop_right (*pb, basea, na, na-1, comp);

           if (k < 0)

             goto Fail;

           k = na - k;

           acount = k;

           if (k)

             {

               dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;

               pa -= k;

               na -= k;

               if (na == 0)

                 goto Succeed;

             }

           *dest-- = *pb--;

           --nb;

           if (nb == 1)

             goto CopyA;


           k = gallop_left (*pa, baseb, nb, nb-1, comp);

           if (k < 0)

             goto Fail;

           k = nb - k;

           bcount = k;

           if (k)

             {

               dest -= k;

               pb -= k;

               std::copy (pb+1, pb+1 + k, dest+1);

               nb -= k;

               if (nb == 1)

                 goto CopyA;

               /* nb==0 is impossible now if the comparison

                * function is consistent, but we can't assume

                * that it is.

                */

               if (nb == 0)

                 goto Succeed;

             }

           *dest-- = *pa--;

           --na;

           if (na == 0)

             goto Succeed;

         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);

       ++min_gallop;     /* penalize it for leaving galloping mode */

       m_ms->m_min_gallop = min_gallop;

     }


 Succeed:

   result = 0;


 Fail:

   if (nb)

     std::copy (baseb, baseb + nb, dest-(nb-1));

   return result;


 CopyA:

   /* The first element of pb belongs at the front of the merge. */

   dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;

   pa -= na;

   *dest = *pb;


   return 0;

 }


 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na,

                           T *pb, octave_idx_type *ipb, octave_idx_type nb,

                           Comp comp)

 {

   octave_idx_type k;

   T *dest;

   octave_idx_type *idest;

   int result = -1;      /* guilty until proved innocent */

   T *basea, *baseb;

   octave_idx_type *ibaseb;

   octave_idx_type min_gallop = m_ms->m_min_gallop;


   m_ms->getmemi (nb);


   dest = pb + nb - 1;

   idest = ipb + nb - 1;

   std::copy (pb, pb + nb, m_ms->m_a);

   std::copy (ipb, ipb + nb, m_ms->m_ia);

   basea = pa;

   baseb = m_ms->m_a; ibaseb = m_ms->m_ia;

   pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1;

   pa += na - 1; ipa += na - 1;


   *dest-- = *pa--; *idest-- = *ipa--;

   --na;

   if (na == 0)

     goto Succeed;

   if (nb == 1)

     goto CopyA;


   for (;;)

     {

       octave_idx_type acount = 0;       /* # of times A won in a row */

       octave_idx_type bcount = 0;       /* # of times B won in a row */


       /* Do the straightforward thing until (if ever) one run

        * appears to win consistently.

        */

       for (;;)

         {

           if (comp (*pb, *pa))

             {

               *dest-- = *pa--; *idest-- = *ipa--;

               ++acount;

               bcount = 0;

               --na;

               if (na == 0)

                 goto Succeed;

               if (acount >= min_gallop)

                 break;

             }

           else

             {

               *dest-- = *pb--; *idest-- = *ipb--;

               ++bcount;

               acount = 0;

               --nb;

               if (nb == 1)

                 goto CopyA;

               if (bcount >= min_gallop)

                 break;

             }

         }


       /* One run is winning so consistently that galloping may

        * be a huge win.  So try that, and continue galloping until

        * (if ever) neither run appears to be winning consistently

        * anymore.

        */

       ++min_gallop;

       do

         {

           min_gallop -= (min_gallop > 1);

           m_ms->m_min_gallop = min_gallop;

           k = gallop_right (*pb, basea, na, na-1, comp);

           if (k < 0)

             goto Fail;

           k = na - k;

           acount = k;

           if (k)

             {

               dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;

               idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;

               pa -= k; ipa -= k;

               na -= k;

               if (na == 0)

                 goto Succeed;

             }

           *dest-- = *pb--; *idest-- = *ipb--;

           --nb;

           if (nb == 1)

             goto CopyA;


           k = gallop_left (*pa, baseb, nb, nb-1, comp);

           if (k < 0)

             goto Fail;

           k = nb - k;

           bcount = k;

           if (k)

             {

               dest -= k; idest -= k;

               pb -= k; ipb -= k;

               std::copy (pb+1, pb+1 + k, dest+1);

               std::copy (ipb+1, ipb+1 + k, idest+1);

               nb -= k;

               if (nb == 1)

                 goto CopyA;

               /* nb==0 is impossible now if the comparison

                * function is consistent, but we can't assume

                * that it is.

                */

               if (nb == 0)

                 goto Succeed;

             }

           *dest-- = *pa--; *idest-- = *ipa--;

           --na;

           if (na == 0)

             goto Succeed;

         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);

       ++min_gallop;     /* penalize it for leaving galloping mode */

       m_ms->m_min_gallop = min_gallop;

     }


 Succeed:

   result = 0;


 Fail:

   if (nb)

     {

       std::copy (baseb, baseb + nb, dest-(nb-1));

       std::copy (ibaseb, ibaseb + nb, idest-(nb-1));

     }

   return result;


 CopyA:

   /* The first element of pb belongs at the front of the merge. */

   dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;

   idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;

   pa -= na; ipa -= na;

   *dest = *pb; *idest = *ipb;


   return 0;

 }


 /* Merge the two runs at stack indices i and i+1.

  * Returns 0 on success, -1 on error.

  */

 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_at (octave_idx_type i, T *data,

                           Comp comp)

 {

   T *pa, *pb;

   octave_idx_type na, nb;

   octave_idx_type k;


   pa = data + m_ms->m_pending[i].m_base;

   na = m_ms->m_pending[i].m_len;

   pb = data + m_ms->m_pending[i+1].m_base;

   nb = m_ms->m_pending[i+1].m_len;


   /* Record the length of the combined runs; if i is the 3rd-last

    * run now, also slide over the last run (which isn't involved

    * in this merge).  The current run i+1 goes away in any case.

    */

   m_ms->m_pending[i].m_len = na + nb;

   if (i == m_ms->m_n - 3)

     m_ms->m_pending[i+1] = m_ms->m_pending[i+2];

   m_ms->m_n--;


   /* Where does b start in a?  Elements in a before that can be

    * ignored (already in place).

    */

   k = gallop_right (*pb, pa, na, 0, comp);

   if (k < 0)

     return -1;

   pa += k;

   na -= k;

   if (na == 0)

     return 0;


   /* Where does a end in b?  Elements in b after that can be

    * ignored (already in place).

    */

   nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);

   if (nb <= 0)

     return nb;


   /* Merge what remains of the runs, using a temp array with

    * min (na, nb) elements.

    */

   if (na <= nb)

     return merge_lo (pa, na, pb, nb, comp);

   else

     return merge_hi (pa, na, pb, nb, comp);

 }


 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_at (octave_idx_type i, T *data, octave_idx_type *idx,

                           Comp comp)

 {

   T *pa, *pb;

   octave_idx_type *ipa, *ipb;

   octave_idx_type na, nb;

   octave_idx_type k;


   pa = data + m_ms->m_pending[i].m_base;

   ipa = idx + m_ms->m_pending[i].m_base;

   na = m_ms->m_pending[i].m_len;

   pb = data + m_ms->m_pending[i+1].m_base;

   ipb = idx + m_ms->m_pending[i+1].m_base;

   nb = m_ms->m_pending[i+1].m_len;


   /* Record the length of the combined runs; if i is the 3rd-last

    * run now, also slide over the last run (which isn't involved

    * in this merge).  The current run i+1 goes away in any case.

    */

   m_ms->m_pending[i].m_len = na + nb;

   if (i == m_ms->m_n - 3)

     m_ms->m_pending[i+1] = m_ms->m_pending[i+2];

   m_ms->m_n--;


   /* Where does b start in a?  Elements in a before that can be

    * ignored (already in place).

    */

   k = gallop_right (*pb, pa, na, 0, comp);

   if (k < 0)

     return -1;

   pa += k; ipa += k;

   na -= k;

   if (na == 0)

     return 0;


   /* Where does a end in b?  Elements in b after that can be

    * ignored (already in place).

    */

   nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);

   if (nb <= 0)

     return nb;


   /* Merge what remains of the runs, using a temp array with

    * min (na, nb) elements.

    */

   if (na <= nb)

     return merge_lo (pa, ipa, na, pb, ipb, nb, comp);

   else

     return merge_hi (pa, ipa, na, pb, ipb, nb, comp);

 }


 /* Examine the stack of runs waiting to be merged, merging adjacent runs

  * until the stack invariants are re-established:

  *

  * 1. len[-3] > len[-2] + len[-1]

  * 2. len[-2] > len[-1]

  *

  * See listsort.txt for more info.

  *

  * Returns 0 on success, -1 on error.

  */

 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_collapse (T *data, Comp comp)

 {

   struct s_slice *p = m_ms->m_pending;


   while (m_ms->m_n > 1)

     {

       octave_idx_type n = m_ms->m_n - 2;

       if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)

         {

           if (p[n-1].m_len < p[n+1].m_len)

             --n;

           if (merge_at (n, data, comp) < 0)

             return -1;

         }

       else if (p[n].m_len <= p[n+1].m_len)

         {

           if (merge_at (n, data, comp) < 0)

             return -1;

         }

       else

         break;

     }


   return 0;

 }


 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp)

 {

   struct s_slice *p = m_ms->m_pending;


   while (m_ms->m_n > 1)

     {

       octave_idx_type n = m_ms->m_n - 2;

       if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)

         {

           if (p[n-1].m_len < p[n+1].m_len)

             --n;

           if (merge_at (n, data, idx, comp) < 0)

             return -1;

         }

       else if (p[n].m_len <= p[n+1].m_len)

         {

           if (merge_at (n, data, idx, comp) < 0)

             return -1;

         }

       else

         break;

     }


   return 0;

 }


 /* Regardless of invariants, merge all runs on the stack until only one

  * remains.  This is used at the end of the mergesort.

  *

  * Returns 0 on success, -1 on error.

  */

 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_force_collapse (T *data, Comp comp)

 {

   struct s_slice *p = m_ms->m_pending;


   while (m_ms->m_n > 1)

     {

       octave_idx_type n = m_ms->m_n - 2;

       if (n > 0 && p[n-1].m_len < p[n+1].m_len)

         --n;

       if (merge_at (n, data, comp) < 0)

         return -1;

     }


   return 0;

 }


 template <typename T>

 template <typename Comp>

 int

 octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp)

 {

   struct s_slice *p = m_ms->m_pending;


   while (m_ms->m_n > 1)

     {

       octave_idx_type n = m_ms->m_n - 2;

       if (n > 0 && p[n-1].m_len < p[n+1].m_len)

         --n;

       if (merge_at (n, data, idx, comp) < 0)

         return -1;

     }


   return 0;

 }


 /* Compute a good value for the minimum run length; natural runs shorter

  * than this are boosted artificially via binary insertion.

  *

  * If n < 64, return n (it's too small to bother with fancy stuff).

  * Else if n is an exact power of 2, return 32.

  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but

  * strictly less than, an exact power of 2.

  *

  * See listsort.txt for more info.

  */

 template <typename T>

 octave_idx_type

 octave_sort<T>::merge_compute_minrun (octave_idx_type n)

 {

   octave_idx_type r = 0;        /* becomes 1 if any 1 bits are shifted off */


   while (n >= 64)

     {

       r |= n & 1;

       n >>= 1;

     }


   return n + r;

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)

 {

   /* Re-initialize the Mergestate as this might be the second time called */

   if (! m_ms) m_ms = new MergeState;


   m_ms->reset ();

   m_ms->getmem (MERGESTATE_TEMP_SIZE);


   if (nel > 1)

     {

       octave_idx_type nremaining = nel;

       octave_idx_type lo = 0;


       /* March over the array once, left to right, finding natural runs,

        * and extending short natural runs to minrun elements.

        */

       octave_idx_type minrun = merge_compute_minrun (nremaining);

       do

         {

           bool descending;

           octave_idx_type n;


           /* Identify next run. */

           n = count_run (data + lo, nremaining, descending, comp);

           if (n < 0)

             return;

           if (descending)

             std::reverse (data + lo, data + lo + n);

           /* If short, extend to min (minrun, nremaining). */

           if (n < minrun)

             {

               const octave_idx_type force = (nremaining <= minrun ? nremaining

                                                                   : minrun);

               binarysort (data + lo, force, n, comp);

               n = force;

             }

           /* Push run onto m_pending-runs stack, and maybe merge. */

           assert (m_ms->m_n < MAX_MERGE_PENDING);

           m_ms->m_pending[m_ms->m_n].m_base = lo;

           m_ms->m_pending[m_ms->m_n].m_len = n;

           m_ms->m_n++;

           if (merge_collapse (data, comp) < 0)

             return;

           /* Advance to find next run. */

           lo += n;

           nremaining -= n;

         }

       while (nremaining);


       merge_force_collapse (data, comp);

     }

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel,

                       Comp comp)

 {

   /* Re-initialize the Mergestate as this might be the second time called */

   if (! m_ms) m_ms = new MergeState;


   m_ms->reset ();

   m_ms->getmemi (MERGESTATE_TEMP_SIZE);


   if (nel > 1)

     {

       octave_idx_type nremaining = nel;

       octave_idx_type lo = 0;


       /* March over the array once, left to right, finding natural runs,

        * and extending short natural runs to minrun elements.

        */

       octave_idx_type minrun = merge_compute_minrun (nremaining);

       do

         {

           bool descending;

           octave_idx_type n;


           /* Identify next run. */

           n = count_run (data + lo, nremaining, descending, comp);

           if (n < 0)

             return;

           if (descending)

             {

               std::reverse (data + lo, data + lo + n);

               std::reverse (idx + lo, idx + lo + n);

             }

           /* If short, extend to min (minrun, nremaining). */

           if (n < minrun)

             {

               const octave_idx_type force = (nremaining <= minrun ? nremaining

                                                                   : minrun);

               binarysort (data + lo, idx + lo, force, n, comp);

               n = force;

             }

           /* Push run onto m_pending-runs stack, and maybe merge. */

           assert (m_ms->m_n < MAX_MERGE_PENDING);

           m_ms->m_pending[m_ms->m_n].m_base = lo;

           m_ms->m_pending[m_ms->m_n].m_len = n;

           m_ms->m_n++;

           if (merge_collapse (data, idx, comp) < 0)

             return;

           /* Advance to find next run. */

           lo += n;

           nremaining -= n;

         }

       while (nremaining);


       merge_force_collapse (data, idx, comp);

     }

 }


 template <typename T>

 void

 octave_sort<T>::sort (T *data, octave_idx_type nel)

 {

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     sort (data, nel, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       sort (data, nel, std::greater<T> ());

     else

 #endif

       if (m_compare)

         sort (data, nel, m_compare);

 }


 template <typename T>

 void

 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel)

 {

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     sort (data, idx, nel, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       sort (data, idx, nel, std::greater<T> ());

     else

 #endif

       if (m_compare)

         sort (data, idx, nel, m_compare);

 }


 template <typename T>

 template <typename Comp>

 bool

 octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp)

 {

   const T *end = data + nel;

   if (data != end)

     {

       const T *next = data;

       while (++next != end)

         {

           if (comp (*next, *data))

             break;

           data = next;

         }

       data = next;

     }


   return data == end;

 }


 template <typename T>

 bool

 octave_sort<T>::issorted (const T *data, octave_idx_type nel)

 {

   bool retval = false;

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     retval = issorted (data, nel, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       retval = issorted (data, nel, std::greater<T> ());

     else

 #endif

       if (m_compare)

         retval = issorted (data, nel, m_compare);


   return retval;

 }


 // FIXME: is there really no way to make this local to the following function?

 struct sortrows_run_t

 {

   sortrows_run_t (octave_idx_type c, octave_idx_type o, octave_idx_type n)

     : col (c), ofs (o), nel (n) { }

   octave_idx_type col, ofs, nel;

 };


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,

                            octave_idx_type rows, octave_idx_type cols,

                            Comp comp)

 {

   OCTAVE_LOCAL_BUFFER (T, buf, rows);

   for (octave_idx_type i = 0; i < rows; i++)

     idx[i] = i;


   if (cols == 0 || rows <= 1)

     return;


   // This is a breadth-first traversal.

   typedef sortrows_run_t run_t;

   std::stack<run_t> runs;


   runs.push (run_t (0, 0, rows));


   while (! runs.empty ())

     {

       octave_idx_type col = runs.top ().col;

       octave_idx_type ofs = runs.top ().ofs;

       octave_idx_type nel = runs.top ().nel;

       runs.pop ();

       assert (nel > 1);


       T *lbuf = buf + ofs;

       const T *ldata = data + rows*col;

       octave_idx_type *lidx = idx + ofs;


       // Gather.

       for (octave_idx_type i = 0; i < nel; i++)

         lbuf[i] = ldata[lidx[i]];


       // Sort.

       sort (lbuf, lidx, nel, comp);


       // Identify constant runs and schedule subsorts.

       if (col < cols-1)

         {

           octave_idx_type lst = 0;

           for (octave_idx_type i = 0; i < nel; i++)

             {

               if (comp (lbuf[lst], lbuf[i]))

                 {

                   if (i > lst + 1)

                     runs.push (run_t (col+1, ofs + lst, i - lst));

                   lst = i;

                 }

             }

           if (nel > lst + 1)

             runs.push (run_t (col+1, ofs + lst, nel - lst));

         }

     }

 }


 template <typename T>

 void

 octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,

                            octave_idx_type rows, octave_idx_type cols)

 {

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     sort_rows (data, idx, rows, cols, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       sort_rows (data, idx, rows, cols, std::greater<T> ());

     else

 #endif

       if (m_compare)

         sort_rows (data, idx, rows, cols, m_compare);

 }


 template <typename T>

 template <typename Comp>

 bool

 octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,

                                 octave_idx_type cols, Comp comp)

 {

   if (rows <= 1 || cols == 0)

     return true;


   // This is a breadth-first traversal.

   const T *lastrow = data + rows*(cols - 1);

   typedef std::pair<const T *, octave_idx_type> run_t;

   std::stack<run_t> runs;


   bool sorted = true;

   runs.push (run_t (data, rows));

   while (sorted && ! runs.empty ())

     {

       const T *lo = runs.top ().first;

       octave_idx_type n = runs.top ().second;

       runs.pop ();

       if (lo < lastrow)

         {

           // Not the final column.

           assert (n > 1);

           const T *hi = lo + n;

           const T *lst = lo;

           for (lo++; lo < hi; lo++)

             {

               if (comp (*lst, *lo))

                 {

                   if (lo > lst + 1)

                     runs.push (run_t (lst + rows, lo - lst));

                   lst = lo;

                 }

               else if (comp (*lo, *lst))

                 break;


             }

           if (lo == hi)

             {

               if (lo > lst + 1)

                 runs.push (run_t (lst + rows, lo - lst));

             }

           else

             {

               sorted = false;

               break;

             }

         }

       else

         // The final column - use fast code.

         sorted = issorted (lo, n, comp);

     }


   return sorted;

 }


 template <typename T>

 bool

 octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,

                                 octave_idx_type cols)

 {

   bool retval = false;


 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     retval = is_sorted_rows (data, rows, cols, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       retval = is_sorted_rows (data, rows, cols, std::greater<T> ());

     else

 #endif

       if (m_compare)

         retval = is_sorted_rows (data, rows, cols, m_compare);


   return retval;

 }


 // The simple binary lookup.


 template <typename T>

 template <typename Comp>

 octave_idx_type

 octave_sort<T>::lookup (const T *data, octave_idx_type nel,

                         const T& value, Comp comp)

 {

   octave_idx_type lo = 0;

   octave_idx_type hi = nel;


   while (lo < hi)

     {

       octave_idx_type mid = lo + ((hi-lo) >> 1);

       if (comp (value, data[mid]))

         hi = mid;

       else

         lo = mid + 1;

     }


   return lo;

 }


 template <typename T>

 octave_idx_type

 octave_sort<T>::lookup (const T *data, octave_idx_type nel,

                         const T& value)

 {

   octave_idx_type retval = 0;


 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     retval = lookup (data, nel, value, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       retval = lookup (data, nel, value, std::greater<T> ());

     else

 #endif

       if (m_compare)

         retval = lookup (data, nel, value, std::ptr_fun (m_compare));


   return retval;

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::lookup (const T *data, octave_idx_type nel,

                         const T *values, octave_idx_type nvalues,

                         octave_idx_type *idx, Comp comp)

 {

   // Use a sequence of binary lookups.

   // FIXME: Can this be sped up generally?  The sorted merge case is dealt with

   // elsewhere.

   for (octave_idx_type j = 0; j < nvalues; j++)

     idx[j] = lookup (data, nel, values[j], comp);

 }


 template <typename T>

 void

 octave_sort<T>::lookup (const T *data, octave_idx_type nel,

                         const T *values, octave_idx_type nvalues,

                         octave_idx_type *idx)

 {

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     lookup (data, nel, values, nvalues, idx, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       lookup (data, nel, values, nvalues, idx, std::greater<T> ());

     else

 #endif

       if (m_compare)

         lookup (data, nel, values, nvalues, idx, std::ptr_fun (m_compare));

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,

                                const T *values, octave_idx_type nvalues,

                                octave_idx_type *idx, bool rev, Comp comp)

 {

   if (rev)

     {

       octave_idx_type i = 0;

       octave_idx_type j = nvalues - 1;


       if (nvalues > 0 && nel > 0)

         {

           while (true)

             {

               if (comp (values[j], data[i]))

                 {

                   idx[j] = i;

                   if (--j < 0)

                     break;

                 }

               else if (++i == nel)

                 break;

             }

         }


       for (; j >= 0; j--)

         idx[j] = i;

     }

   else

     {

       octave_idx_type i = 0;

       octave_idx_type j = 0;


       if (nvalues > 0 && nel > 0)

         {

           while (true)

             {

               if (comp (values[j], data[i]))

                 {

                   idx[j] = i;

                   if (++j == nvalues)

                     break;

                 }

               else if (++i == nel)

                 break;

             }

         }


       for (; j != nvalues; j++)

         idx[j] = i;

     }

 }


 template <typename T>

 void

 octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,

                                const T *values, octave_idx_type nvalues,

                                octave_idx_type *idx, bool rev)

 {

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());

     else

 #endif

       if (m_compare)

         lookup_sorted (data, nel, values, nvalues, idx, rev,

                        std::ptr_fun (m_compare));

 }


 template <typename T>

 template <typename Comp>

 void

 octave_sort<T>::nth_element (T *data, octave_idx_type nel,

                              octave_idx_type lo, octave_idx_type up,

                              Comp comp)

 {

   // Simply wrap the STL algorithms.

   // FIXME: this will fail if we attempt to inline <,> for Complex.

   if (up == lo+1)

     std::nth_element (data, data + lo, data + nel, comp);

   else if (lo == 0)

     std::partial_sort (data, data + up, data + nel, comp);

   else

     {

       std::nth_element (data, data + lo, data + nel, comp);

       if (up == lo + 2)

         {

           // Finding two subsequent elements.

           std::swap (data[lo+1],

                      *std::min_element (data + lo + 1, data + nel, comp));

         }

       else

         std::partial_sort (data + lo + 1, data + up, data + nel, comp);

     }

 }


 template <typename T>

 void

 octave_sort<T>::nth_element (T *data, octave_idx_type nel,

                              octave_idx_type lo, octave_idx_type up)

 {

   if (up < 0)

     up = lo + 1;

 #if defined (INLINE_ASCENDING_SORT)

   if (m_compare == ascending_compare)

     nth_element (data, nel, lo, up, std::less<T> ());

   else

 #endif

 #if defined (INLINE_DESCENDING_SORT)

     if (m_compare == descending_compare)

       nth_element (data, nel, lo, up, std::greater<T> ());

     else

 #endif

       if (m_compare)

         nth_element (data, nel, lo, up, std::ptr_fun (m_compare));

 }


 template <typename T>

 bool

 octave_sort<T>::ascending_compare (typename ref_param<T>::type x,

                                    typename ref_param<T>::type y)

 {

   return x < y;

 }


 template <typename T>

 bool

 octave_sort<T>::descending_compare (typename ref_param<T>::type x,

                                     typename ref_param<T>::type y)

 {

   return x > y;

 }

max
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230

octave_idx_type

octave_sort::octave_sort
octave_sort(void)
Definition: oct-sort.cc:123

octave_sort::MERGESTATE_TEMP_SIZE
static const int MERGESTATE_TEMP_SIZE
Definition: oct-sort.h:180

octave_sort::set_compare
void set_compare(compare_fcn_type comp)
Definition: oct-sort.h:118

octave_sort::merge_collapse
int merge_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1276

octave_sort::~octave_sort
~octave_sort(void)
Definition: oct-sort.cc:133

octave_sort::sort_rows
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1661

octave_sort::gallop_right
octave_idx_type gallop_right(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:420

octave_sort::merge_hi
int merge_hi(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:874

octave_sort::count_run
octave_idx_type count_run(T *lo, octave_idx_type n, bool &descending, Comp comp)
Definition: oct-sort.cc:265

octave_sort::m_compare
compare_fcn_type m_compare
Definition: oct-sort.h:241

octave_sort::gallop_left
octave_idx_type gallop_left(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:325

octave_sort::descending_compare
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1973

octave_sort::issorted
bool issorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1574

octave_sort::ascending_compare
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1965

octave_sort::lookup
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1784

octave_sort::m_ms
MergeState * m_ms
Definition: oct-sort.h:243

octave_sort::merge_at
int merge_at(octave_idx_type i, T *data, Comp comp)
Definition: oct-sort.cc:1161

octave_sort::merge_compute_minrun
octave_idx_type merge_compute_minrun(octave_idx_type n)
Definition: oct-sort.cc:1386

octave_sort::binarysort
void binarysort(T *data, octave_idx_type nel, octave_idx_type start, Comp comp)
Definition: oct-sort.cc:153

octave_sort::sort
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1517

octave_sort::MIN_GALLOP
static const int MIN_GALLOP
Definition: oct-sort.h:177

octave_sort::nth_element
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1944

octave_sort::is_sorted_rows
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1738

octave_sort::MAX_MERGE_PENDING
static const int MAX_MERGE_PENDING
Definition: oct-sort.h:173

octave_sort::merge_force_collapse
int merge_force_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1339

octave_sort::merge_lo
int merge_lo(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:592

octave_sort::lookup_sorted
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1896

ref_param::type
if_then_else< is_class_type< T >::no, T, T const  & >::result type
Definition: lo-traits.h:121

lo-error.h

lo-mappers.h

x
F77_RET_T const F77_DBLE * x
Definition: lo-slatec-proto.h:38

m
T octave_idx_type m
Definition: mx-inlines.cc:773

n
octave_idx_type n
Definition: mx-inlines.cc:753

r
T * r
Definition: mx-inlines.cc:773

octave::next
static uint32_t * next
Definition: randmtzig.cc:189

oct-locbuf.h

OCTAVE_LOCAL_BUFFER
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44

roundupsize
static octave_idx_type roundupsize(size_t n)
Definition: oct-sort.cc:499

oct-sort.h

sortmode
sortmode
Definition: oct-sort.h:95

ASCENDING
@ ASCENDING
Definition: oct-sort.h:95

DESCENDING
@ DESCENDING
Definition: oct-sort.h:95

retval
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811

quit.h

octave_sort::MergeState
Definition: oct-sort.h:196

octave_sort::MergeState::getmem
void getmem(octave_idx_type need)
Definition: oct-sort.cc:548

octave_sort::MergeState::m_a
T * m_a
Definition: oct-sort.h:225

octave_sort::MergeState::m_pending
struct s_slice m_pending[MAX_MERGE_PENDING]
Definition: oct-sort.h:238

octave_sort::MergeState::m_n
octave_idx_type m_n
Definition: oct-sort.h:237

octave_sort::MergeState::m_alloced
octave_idx_type m_alloced
Definition: oct-sort.h:227

octave_sort::MergeState::getmemi
void getmemi(octave_idx_type need)
Definition: oct-sort.cc:566

octave_sort::MergeState::m_ia
octave_idx_type * m_ia
Definition: oct-sort.h:226

octave_sort::MergeState::reset
void reset(void)
Definition: oct-sort.h:210

octave_sort::MergeState::m_min_gallop
octave_idx_type m_min_gallop
Definition: oct-sort.h:221

octave_sort::s_slice
Definition: oct-sort.h:191

octave_sort::s_slice::m_base
octave_idx_type m_base
Definition: oct-sort.h:192

octave_sort::s_slice::m_len
octave_idx_type m_len
Definition: oct-sort.h:192

sortrows_run_t
Definition: oct-sort.cc:1595

sortrows_run_t::col
octave_idx_type col
Definition: oct-sort.cc:1598

sortrows_run_t::sortrows_run_t
sortrows_run_t(octave_idx_type c, octave_idx_type o, octave_idx_type n)
Definition: oct-sort.cc:1596