GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-sort.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2003-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 // Code stolen in large part from Python's, listobject.c, which itself had
25 // no license header. However, thanks to Tim Peters for the parts of the
26 // code I ripped-off.
27 //
28 // As required in the Python license the short description of the changes
29 // made are
30 //
31 // * convert the sorting code in listobject.cc into a generic class,
32 // replacing PyObject* with the type of the class T.
33 //
34 // * replaced usages of malloc, free, memcpy and memmove by standard C++
35 // new [], delete [] and std::copy and std::copy_backward. Note that replacing
36 // memmove by std::copy is possible if the destination starts before the source.
37 // If not, std::copy_backward needs to be used.
38 //
39 // * templatize comparison operator in most methods, provide possible dispatch
40 //
41 // * duplicate methods to avoid by-the-way indexed sorting
42 //
43 // * add methods for verifying sortedness of array
44 //
45 // * row sorting via breadth-first tree subsorting
46 //
47 // * binary lookup and sequential binary lookup optimized for dense downsampling.
48 //
49 // * NOTE: the memory management routines rely on the fact that delete [] silently
50 // ignores null pointers. Don't gripe about the missing checks - they're there.
51 //
52 //
53 // The Python license is
54 //
55 // PSF LICENSE AGREEMENT FOR PYTHON 2.3
56 // --------------------------------------
57 //
58 // 1. This LICENSE AGREEMENT is between the Python Software Foundation
59 // ("PSF"), and the Individual or Organization ("Licensee") accessing and
60 // otherwise using Python 2.3 software in source or binary form and its
61 // associated documentation.
62 //
63 // 2. Subject to the terms and conditions of this License Agreement, PSF
64 // hereby grants Licensee a nonexclusive, royalty-free, world-wide
65 // license to reproduce, analyze, test, perform and/or display publicly,
66 // prepare derivative works, distribute, and otherwise use Python 2.3
67 // alone or in any derivative version, provided, however, that PSF's
68 // License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
69 // 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
70 // retained in Python 2.3 alone or in any derivative version prepared by
71 // Licensee.
72 //
73 // 3. In the event Licensee prepares a derivative work that is based on
74 // or incorporates Python 2.3 or any part thereof, and wants to make
75 // the derivative work available to others as provided herein, then
76 // Licensee hereby agrees to include in any such work a brief summary of
77 // the changes made to Python 2.3.
78 //
79 // 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
80 // basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
81 // IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
82 // DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
83 // FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
84 // INFRINGE ANY THIRD PARTY RIGHTS.
85 //
86 // 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
87 // 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
88 // A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
89 // OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
90 //
91 // 6. This License Agreement will automatically terminate upon a material
92 // breach of its terms and conditions.
93 //
94 // 7. Nothing in this License Agreement shall be deemed to create any
95 // relationship of agency, partnership, or joint venture between PSF and
96 // Licensee. This License Agreement does not grant permission to use PSF
97 // trademarks or trade name in a trademark sense to endorse or promote
98 // products or services of Licensee, or any third party.
99 //
100 // 8. By copying, installing or otherwise using Python 2.3, Licensee
101 // agrees to be bound by the terms and conditions of this License
102 // Agreement.
103 //
104 ////////////////////////////////////////////////////////////////////////
105 
106 // This file should not include config.h. It is only included in other
107 // C++ source files that should have included config.h before including
108 // this file.
109 
110 #include <cassert>
111 #include <algorithm>
112 #include <functional>
113 #include <cstring>
114 #include <stack>
115 
116 #include "lo-error.h"
117 #include "lo-mappers.h"
118 #include "quit.h"
119 #include "oct-sort.h"
120 #include "oct-locbuf.h"
121 
122 template <typename T>
124  m_compare (ascending_compare), m_ms (nullptr)
125 { }
126 
127 template <typename T>
128 octave_sort<T>::octave_sort (compare_fcn_type comp)
129  : m_compare (comp), m_ms (nullptr)
130 { }
131 
132 template <typename T>
134 {
135  delete m_ms;
136 }
137 
138 template <typename T>
139 void
141 {
142  if (mode == ASCENDING)
143  m_compare = ascending_compare;
144  else if (mode == DESCENDING)
145  m_compare = descending_compare;
146  else
147  m_compare = nullptr;
148 }
149 
150 template <typename T>
151 template <typename Comp>
152 void
154  octave_idx_type start, Comp comp)
155 {
156  if (start == 0)
157  ++start;
158 
159  for (; start < nel; ++start)
160  {
161  /* set l to where *start belongs */
162  octave_idx_type l = 0;
163  octave_idx_type r = start;
164  T pivot = data[start];
165  /* Invariants:
166  * pivot >= all in [lo, l).
167  * pivot < all in [r, start).
168  * The second is vacuously true at the start.
169  */
170  do
171  {
172  octave_idx_type p = l + ((r - l) >> 1);
173  if (comp (pivot, data[p]))
174  r = p;
175  else
176  l = p+1;
177  }
178  while (l < r);
179  /* The invariants still hold, so pivot >= all in [lo, l) and
180  pivot < all in [l, start), so pivot belongs at l. Note
181  that if there are elements equal to pivot, l points to the
182  first slot after them -- that's why this sort is stable.
183  Slide over to make room.
184  Caution: using memmove is much slower under MSVC 5;
185  we're not usually moving many slots. */
186  // NOTE: using swap and going upwards appears to be faster.
187  for (octave_idx_type p = l; p < start; p++)
188  std::swap (pivot, data[p]);
189  data[start] = pivot;
190  }
191 
192  return;
193 }
194 
195 template <typename T>
196 template <typename Comp>
197 void
199  octave_idx_type start, Comp comp)
200 {
201  if (start == 0)
202  ++start;
203 
204  for (; start < nel; ++start)
205  {
206  /* set l to where *start belongs */
207  octave_idx_type l = 0;
208  octave_idx_type r = start;
209  T pivot = data[start];
210  /* Invariants:
211  * pivot >= all in [lo, l).
212  * pivot < all in [r, start).
213  * The second is vacuously true at the start.
214  */
215  do
216  {
217  octave_idx_type p = l + ((r - l) >> 1);
218  if (comp (pivot, data[p]))
219  r = p;
220  else
221  l = p+1;
222  }
223  while (l < r);
224  /* The invariants still hold, so pivot >= all in [lo, l) and
225  pivot < all in [l, start), so pivot belongs at l. Note
226  that if there are elements equal to pivot, l points to the
227  first slot after them -- that's why this sort is stable.
228  Slide over to make room.
229  Caution: using memmove is much slower under MSVC 5;
230  we're not usually moving many slots. */
231  // NOTE: using swap and going upwards appears to be faster.
232  for (octave_idx_type p = l; p < start; p++)
233  std::swap (pivot, data[p]);
234  data[start] = pivot;
235  octave_idx_type ipivot = idx[start];
236  for (octave_idx_type p = l; p < start; p++)
237  std::swap (ipivot, idx[p]);
238  idx[start] = ipivot;
239  }
240 
241  return;
242 }
243 
244 /*
245 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
246 is required on entry. "A run" is the longest ascending sequence, with
247 
248  lo[0] <= lo[1] <= lo[2] <= ...
249 
250 or the longest descending sequence, with
251 
252  lo[0] > lo[1] > lo[2] > ...
253 
254 DESCENDING is set to false in the former case, or to true in the latter.
255 For its intended use in a stable mergesort, the strictness of the defn of
256 "descending" is needed so that the caller can safely reverse a descending
257 sequence without violating stability (strict > ensures there are no equal
258 elements to get out of order).
259 
260 Returns -1 in case of error.
261 */
262 template <typename T>
263 template <typename Comp>
265 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
266  Comp comp)
267 {
269  T *hi = lo + nel;
270 
271  descending = false;
272  ++lo;
273  if (lo == hi)
274  return 1;
275 
276  n = 2;
277 
278  if (comp (*lo, *(lo-1)))
279  {
280  descending = true;
281  for (lo = lo+1; lo < hi; ++lo, ++n)
282  {
283  if (comp (*lo, *(lo-1)))
284  ;
285  else
286  break;
287  }
288  }
289  else
290  {
291  for (lo = lo+1; lo < hi; ++lo, ++n)
292  {
293  if (comp (*lo, *(lo-1)))
294  break;
295  }
296  }
297 
298  return n;
299 }
300 
301 /*
302 Locate the proper position of key in a sorted vector; if the vector contains
303 an element equal to key, return the position immediately to the left of
304 the leftmost equal element. [gallop_right() does the same except returns
305 the position to the right of the rightmost equal element (if any).]
306 
307 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
308 
309 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
310 hint is to the final result, the faster this runs.
311 
312 The return value is the int k in 0..n such that
313 
314  a[k-1] < key <= a[k]
315 
316 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
317 key belongs at index k; or, IOW, the first k elements of a should precede
318 key, and the last n-k should follow key.
319 
320 Returns -1 on error. See listsort.txt for info on the method.
321 */
322 template <typename T>
323 template <typename Comp>
326  octave_idx_type hint,
327  Comp comp)
328 {
329  octave_idx_type ofs;
330  octave_idx_type lastofs;
331  octave_idx_type k;
332 
333  a += hint;
334  lastofs = 0;
335  ofs = 1;
336  if (comp (*a, key))
337  {
338  /* a[hint] < key -- gallop right, until
339  * a[hint + lastofs] < key <= a[hint + ofs]
340  */
341  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
342  while (ofs < maxofs)
343  {
344  if (comp (a[ofs], key))
345  {
346  lastofs = ofs;
347  ofs = (ofs << 1) + 1;
348  if (ofs <= 0) /* int overflow */
349  ofs = maxofs;
350  }
351  else /* key <= a[hint + ofs] */
352  break;
353  }
354  if (ofs > maxofs)
355  ofs = maxofs;
356  /* Translate back to offsets relative to &a[0]. */
357  lastofs += hint;
358  ofs += hint;
359  }
360  else
361  {
362  /* key <= a[hint] -- gallop left, until
363  * a[hint - ofs] < key <= a[hint - lastofs]
364  */
365  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
366  while (ofs < maxofs)
367  {
368  if (comp (*(a-ofs), key))
369  break;
370  /* key <= a[hint - ofs] */
371  lastofs = ofs;
372  ofs = (ofs << 1) + 1;
373  if (ofs <= 0) /* int overflow */
374  ofs = maxofs;
375  }
376  if (ofs > maxofs)
377  ofs = maxofs;
378  /* Translate back to positive offsets relative to &a[0]. */
379  k = lastofs;
380  lastofs = hint - ofs;
381  ofs = hint - k;
382  }
383  a -= hint;
384 
385  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
386  * right of lastofs but no farther right than ofs. Do a binary
387  * search, with invariant a[lastofs-1] < key <= a[ofs].
388  */
389  ++lastofs;
390  while (lastofs < ofs)
391  {
392  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
393 
394  if (comp (a[m], key))
395  lastofs = m+1; /* a[m] < key */
396  else
397  ofs = m; /* key <= a[m] */
398  }
399 
400  return ofs;
401 }
402 
403 /*
404 Exactly like gallop_left(), except that if key already exists in a[0:n],
405 finds the position immediately to the right of the rightmost equal value.
406 
407 The return value is the int k in 0..n such that
408 
409  a[k-1] <= key < a[k]
410 
411 or -1 if error.
412 
413 The code duplication is massive, but this is enough different given that
414 we're sticking to "<" comparisons that it's much harder to follow if
415 written as one routine with yet another "left or right?" flag.
416 */
417 template <typename T>
418 template <typename Comp>
421  octave_idx_type hint,
422  Comp comp)
423 {
424  octave_idx_type ofs;
425  octave_idx_type lastofs;
426  octave_idx_type k;
427 
428  a += hint;
429  lastofs = 0;
430  ofs = 1;
431  if (comp (key, *a))
432  {
433  /* key < a[hint] -- gallop left, until
434  * a[hint - ofs] <= key < a[hint - lastofs]
435  */
436  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
437  while (ofs < maxofs)
438  {
439  if (comp (key, *(a-ofs)))
440  {
441  lastofs = ofs;
442  ofs = (ofs << 1) + 1;
443  if (ofs <= 0) /* int overflow */
444  ofs = maxofs;
445  }
446  else /* a[hint - ofs] <= key */
447  break;
448  }
449  if (ofs > maxofs)
450  ofs = maxofs;
451  /* Translate back to positive offsets relative to &a[0]. */
452  k = lastofs;
453  lastofs = hint - ofs;
454  ofs = hint - k;
455  }
456  else
457  {
458  /* a[hint] <= key -- gallop right, until
459  * a[hint + lastofs] <= key < a[hint + ofs]
460  */
461  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
462  while (ofs < maxofs)
463  {
464  if (comp (key, a[ofs]))
465  break;
466  /* a[hint + ofs] <= key */
467  lastofs = ofs;
468  ofs = (ofs << 1) + 1;
469  if (ofs <= 0) /* int overflow */
470  ofs = maxofs;
471  }
472  if (ofs > maxofs)
473  ofs = maxofs;
474  /* Translate back to offsets relative to &a[0]. */
475  lastofs += hint;
476  ofs += hint;
477  }
478  a -= hint;
479 
480  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
481  * right of lastofs but no farther right than ofs. Do a binary
482  * search, with invariant a[lastofs-1] <= key < a[ofs].
483  */
484  ++lastofs;
485  while (lastofs < ofs)
486  {
487  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
488 
489  if (comp (key, a[m]))
490  ofs = m; /* key < a[m] */
491  else
492  lastofs = m+1; /* a[m] <= key */
493  }
494 
495  return ofs;
496 }
497 
498 static inline octave_idx_type
499 roundupsize (size_t n)
500 {
501  unsigned int nbits = 3;
502  size_t n2 = n >> 8;
503 
504  /* Round up:
505  * If n < 256, to a multiple of 8.
506  * If n < 2048, to a multiple of 64.
507  * If n < 16384, to a multiple of 512.
508  * If n < 131072, to a multiple of 4096.
509  * If n < 1048576, to a multiple of 32768.
510  * If n < 8388608, to a multiple of 262144.
511  * If n < 67108864, to a multiple of 2097152.
512  * If n < 536870912, to a multiple of 16777216.
513  * ...
514  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
515  *
516  * This over-allocates proportional to the list size, making room
517  * for additional growth. The over-allocation is mild, but is
518  * enough to give linear-time amortized behavior over a long
519  * sequence of appends() in the presence of a poorly-performing
520  * system realloc() (which is a reality, e.g., across all flavors
521  * of Windows, with Win9x behavior being particularly bad -- and
522  * we've still got address space fragmentation problems on Win9x
523  * even with this scheme, although it requires much longer lists to
524  * provoke them than it used to).
525  */
526  while (n2)
527  {
528  n2 >>= 3;
529  nbits += 3;
530  }
531 
532  size_t new_size = ((n >> nbits) + 1) << nbits;
533 
534  if (new_size == 0
535  || new_size
536  > static_cast<size_t> (std::numeric_limits<octave_idx_type>::max ()))
537  (*current_liboctave_error_handler)
538  ("unable to allocate sufficient memory for sort");
539 
540  return static_cast<octave_idx_type> (new_size);
541 }
542 
543 /* Ensure enough temp memory for 'need' array slots is available.
544  * Returns 0 on success and -1 if the memory can't be gotten.
545  */
546 template <typename T>
547 void
549 {
550  if (need <= m_alloced)
551  return;
552 
553  need = roundupsize (need);
554  /* Don't realloc! That can cost cycles to copy the old data, but
555  * we don't care what's in the block.
556  */
557  delete [] m_a;
558  delete [] m_ia; // Must do this or fool possible next getmemi.
559  m_a = new T [need];
560  m_alloced = need;
561 
562 }
563 
564 template <typename T>
565 void
567 {
568  if (m_ia && need <= m_alloced)
569  return;
570 
571  need = roundupsize (need);
572  /* Don't realloc! That can cost cycles to copy the old data, but
573  * we don't care what's in the block.
574  */
575  delete [] m_a;
576  delete [] m_ia;
577 
578  m_a = new T [need];
579  m_ia = new octave_idx_type [need];
580  m_alloced = need;
581 }
582 
583 /* Merge the na elements starting at pa with the nb elements starting at pb
584  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
585  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
586  * merge, and should have na <= nb. See listsort.txt for more info.
587  * Return 0 if successful, -1 if error.
588  */
589 template <typename T>
590 template <typename Comp>
591 int
593  T *pb, octave_idx_type nb,
594  Comp comp)
595 {
596  octave_idx_type k;
597  T *dest;
598  int result = -1; /* guilty until proved innocent */
599  octave_idx_type min_gallop = m_ms->m_min_gallop;
600 
601  m_ms->getmem (na);
602 
603  std::copy (pa, pa + na, m_ms->m_a);
604  dest = pa;
605  pa = m_ms->m_a;
606 
607  *dest++ = *pb++;
608  --nb;
609  if (nb == 0)
610  goto Succeed;
611  if (na == 1)
612  goto CopyB;
613 
614  for (;;)
615  {
616  octave_idx_type acount = 0; /* # of times A won in a row */
617  octave_idx_type bcount = 0; /* # of times B won in a row */
618 
619  /* Do the straightforward thing until (if ever) one run
620  * appears to win consistently.
621  */
622  for (;;)
623  {
624 
625  // FIXME: these loops are candidates for further optimizations.
626  // Rather than testing everything in each cycle, it may be more
627  // efficient to do it in hunks.
628  if (comp (*pb, *pa))
629  {
630  *dest++ = *pb++;
631  ++bcount;
632  acount = 0;
633  --nb;
634  if (nb == 0)
635  goto Succeed;
636  if (bcount >= min_gallop)
637  break;
638  }
639  else
640  {
641  *dest++ = *pa++;
642  ++acount;
643  bcount = 0;
644  --na;
645  if (na == 1)
646  goto CopyB;
647  if (acount >= min_gallop)
648  break;
649  }
650  }
651 
652  /* One run is winning so consistently that galloping may
653  * be a huge win. So try that, and continue galloping until
654  * (if ever) neither run appears to be winning consistently
655  * anymore.
656  */
657  ++min_gallop;
658  do
659  {
660  min_gallop -= (min_gallop > 1);
661  m_ms->m_min_gallop = min_gallop;
662  k = gallop_right (*pb, pa, na, 0, comp);
663  acount = k;
664  if (k)
665  {
666  if (k < 0)
667  goto Fail;
668  dest = std::copy (pa, pa + k, dest);
669  pa += k;
670  na -= k;
671  if (na == 1)
672  goto CopyB;
673  /* na==0 is impossible now if the comparison
674  * function is consistent, but we can't assume
675  * that it is.
676  */
677  if (na == 0)
678  goto Succeed;
679  }
680  *dest++ = *pb++;
681  --nb;
682  if (nb == 0)
683  goto Succeed;
684 
685  k = gallop_left (*pa, pb, nb, 0, comp);
686  bcount = k;
687  if (k)
688  {
689  if (k < 0)
690  goto Fail;
691  dest = std::copy (pb, pb + k, dest);
692  pb += k;
693  nb -= k;
694  if (nb == 0)
695  goto Succeed;
696  }
697  *dest++ = *pa++;
698  --na;
699  if (na == 1)
700  goto CopyB;
701  }
702  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
703 
704  ++min_gallop; /* penalize it for leaving galloping mode */
705  m_ms->m_min_gallop = min_gallop;
706  }
707 
708 Succeed:
709  result = 0;
710 
711 Fail:
712  if (na)
713  std::copy (pa, pa + na, dest);
714  return result;
715 
716 CopyB:
717  /* The last element of pa belongs at the end of the merge. */
718  std::copy (pb, pb + nb, dest);
719  dest[nb] = *pa;
720 
721  return 0;
722 }
723 
724 template <typename T>
725 template <typename Comp>
726 int
728  T *pb, octave_idx_type *ipb, octave_idx_type nb,
729  Comp comp)
730 {
731  octave_idx_type k;
732  T *dest;
733  octave_idx_type *idest;
734  int result = -1; /* guilty until proved innocent */
735  octave_idx_type min_gallop = m_ms->m_min_gallop;
736 
737  m_ms->getmemi (na);
738 
739  std::copy (pa, pa + na, m_ms->m_a);
740  std::copy (ipa, ipa + na, m_ms->m_ia);
741  dest = pa; idest = ipa;
742  pa = m_ms->m_a; ipa = m_ms->m_ia;
743 
744  *dest++ = *pb++; *idest++ = *ipb++;
745  --nb;
746  if (nb == 0)
747  goto Succeed;
748  if (na == 1)
749  goto CopyB;
750 
751  for (;;)
752  {
753  octave_idx_type acount = 0; /* # of times A won in a row */
754  octave_idx_type bcount = 0; /* # of times B won in a row */
755 
756  /* Do the straightforward thing until (if ever) one run
757  * appears to win consistently.
758  */
759  for (;;)
760  {
761 
762  if (comp (*pb, *pa))
763  {
764  *dest++ = *pb++; *idest++ = *ipb++;
765  ++bcount;
766  acount = 0;
767  --nb;
768  if (nb == 0)
769  goto Succeed;
770  if (bcount >= min_gallop)
771  break;
772  }
773  else
774  {
775  *dest++ = *pa++; *idest++ = *ipa++;
776  ++acount;
777  bcount = 0;
778  --na;
779  if (na == 1)
780  goto CopyB;
781  if (acount >= min_gallop)
782  break;
783  }
784  }
785 
786  /* One run is winning so consistently that galloping may
787  * be a huge win. So try that, and continue galloping until
788  * (if ever) neither run appears to be winning consistently
789  * anymore.
790  */
791  ++min_gallop;
792  do
793  {
794  min_gallop -= (min_gallop > 1);
795  m_ms->m_min_gallop = min_gallop;
796  k = gallop_right (*pb, pa, na, 0, comp);
797  acount = k;
798  if (k)
799  {
800  if (k < 0)
801  goto Fail;
802  dest = std::copy (pa, pa + k, dest);
803  idest = std::copy (ipa, ipa + k, idest);
804  pa += k; ipa += k;
805  na -= k;
806  if (na == 1)
807  goto CopyB;
808  /* na==0 is impossible now if the comparison
809  * function is consistent, but we can't assume
810  * that it is.
811  */
812  if (na == 0)
813  goto Succeed;
814  }
815  *dest++ = *pb++; *idest++ = *ipb++;
816  --nb;
817  if (nb == 0)
818  goto Succeed;
819 
820  k = gallop_left (*pa, pb, nb, 0, comp);
821  bcount = k;
822  if (k)
823  {
824  if (k < 0)
825  goto Fail;
826  dest = std::copy (pb, pb + k, dest);
827  idest = std::copy (ipb, ipb + k, idest);
828  pb += k; ipb += k;
829  nb -= k;
830  if (nb == 0)
831  goto Succeed;
832  }
833  *dest++ = *pa++; *idest++ = *ipa++;
834  --na;
835  if (na == 1)
836  goto CopyB;
837  }
838  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
839 
840  ++min_gallop; /* penalize it for leaving galloping mode */
841  m_ms->m_min_gallop = min_gallop;
842  }
843 
844 Succeed:
845  result = 0;
846 
847 Fail:
848  if (na)
849  {
850  std::copy (pa, pa + na, dest);
851  std::copy (ipa, ipa + na, idest);
852  }
853  return result;
854 
855 CopyB:
856  /* The last element of pa belongs at the end of the merge. */
857  std::copy (pb, pb + nb, dest);
858  std::copy (ipb, ipb + nb, idest);
859  dest[nb] = *pa;
860  idest[nb] = *ipa;
861 
862  return 0;
863 }
864 
865 /* Merge the na elements starting at pa with the nb elements starting at pb
866  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
867  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
868  * merge, and should have na >= nb. See listsort.txt for more info.
869  * Return 0 if successful, -1 if error.
870  */
871 template <typename T>
872 template <typename Comp>
873 int
875  T *pb, octave_idx_type nb,
876  Comp comp)
877 {
878  octave_idx_type k;
879  T *dest;
880  int result = -1; /* guilty until proved innocent */
881  T *basea, *baseb;
882  octave_idx_type min_gallop = m_ms->m_min_gallop;
883 
884  m_ms->getmem (nb);
885 
886  dest = pb + nb - 1;
887  std::copy (pb, pb + nb, m_ms->m_a);
888  basea = pa;
889  baseb = m_ms->m_a;
890  pb = m_ms->m_a + nb - 1;
891  pa += na - 1;
892 
893  *dest-- = *pa--;
894  --na;
895  if (na == 0)
896  goto Succeed;
897  if (nb == 1)
898  goto CopyA;
899 
900  for (;;)
901  {
902  octave_idx_type acount = 0; /* # of times A won in a row */
903  octave_idx_type bcount = 0; /* # of times B won in a row */
904 
905  /* Do the straightforward thing until (if ever) one run
906  * appears to win consistently.
907  */
908  for (;;)
909  {
910  if (comp (*pb, *pa))
911  {
912  *dest-- = *pa--;
913  ++acount;
914  bcount = 0;
915  --na;
916  if (na == 0)
917  goto Succeed;
918  if (acount >= min_gallop)
919  break;
920  }
921  else
922  {
923  *dest-- = *pb--;
924  ++bcount;
925  acount = 0;
926  --nb;
927  if (nb == 1)
928  goto CopyA;
929  if (bcount >= min_gallop)
930  break;
931  }
932  }
933 
934  /* One run is winning so consistently that galloping may
935  * be a huge win. So try that, and continue galloping until
936  * (if ever) neither run appears to be winning consistently
937  * anymore.
938  */
939  ++min_gallop;
940  do
941  {
942  min_gallop -= (min_gallop > 1);
943  m_ms->m_min_gallop = min_gallop;
944  k = gallop_right (*pb, basea, na, na-1, comp);
945  if (k < 0)
946  goto Fail;
947  k = na - k;
948  acount = k;
949  if (k)
950  {
951  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
952  pa -= k;
953  na -= k;
954  if (na == 0)
955  goto Succeed;
956  }
957  *dest-- = *pb--;
958  --nb;
959  if (nb == 1)
960  goto CopyA;
961 
962  k = gallop_left (*pa, baseb, nb, nb-1, comp);
963  if (k < 0)
964  goto Fail;
965  k = nb - k;
966  bcount = k;
967  if (k)
968  {
969  dest -= k;
970  pb -= k;
971  std::copy (pb+1, pb+1 + k, dest+1);
972  nb -= k;
973  if (nb == 1)
974  goto CopyA;
975  /* nb==0 is impossible now if the comparison
976  * function is consistent, but we can't assume
977  * that it is.
978  */
979  if (nb == 0)
980  goto Succeed;
981  }
982  *dest-- = *pa--;
983  --na;
984  if (na == 0)
985  goto Succeed;
986  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
987  ++min_gallop; /* penalize it for leaving galloping mode */
988  m_ms->m_min_gallop = min_gallop;
989  }
990 
991 Succeed:
992  result = 0;
993 
994 Fail:
995  if (nb)
996  std::copy (baseb, baseb + nb, dest-(nb-1));
997  return result;
998 
999 CopyA:
1000  /* The first element of pb belongs at the front of the merge. */
1001  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1002  pa -= na;
1003  *dest = *pb;
1004 
1005  return 0;
1006 }
1007 
1008 template <typename T>
1009 template <typename Comp>
1010 int
1012  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1013  Comp comp)
1014 {
1015  octave_idx_type k;
1016  T *dest;
1017  octave_idx_type *idest;
1018  int result = -1; /* guilty until proved innocent */
1019  T *basea, *baseb;
1020  octave_idx_type *ibaseb;
1021  octave_idx_type min_gallop = m_ms->m_min_gallop;
1022 
1023  m_ms->getmemi (nb);
1024 
1025  dest = pb + nb - 1;
1026  idest = ipb + nb - 1;
1027  std::copy (pb, pb + nb, m_ms->m_a);
1028  std::copy (ipb, ipb + nb, m_ms->m_ia);
1029  basea = pa;
1030  baseb = m_ms->m_a; ibaseb = m_ms->m_ia;
1031  pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1;
1032  pa += na - 1; ipa += na - 1;
1033 
1034  *dest-- = *pa--; *idest-- = *ipa--;
1035  --na;
1036  if (na == 0)
1037  goto Succeed;
1038  if (nb == 1)
1039  goto CopyA;
1040 
1041  for (;;)
1042  {
1043  octave_idx_type acount = 0; /* # of times A won in a row */
1044  octave_idx_type bcount = 0; /* # of times B won in a row */
1045 
1046  /* Do the straightforward thing until (if ever) one run
1047  * appears to win consistently.
1048  */
1049  for (;;)
1050  {
1051  if (comp (*pb, *pa))
1052  {
1053  *dest-- = *pa--; *idest-- = *ipa--;
1054  ++acount;
1055  bcount = 0;
1056  --na;
1057  if (na == 0)
1058  goto Succeed;
1059  if (acount >= min_gallop)
1060  break;
1061  }
1062  else
1063  {
1064  *dest-- = *pb--; *idest-- = *ipb--;
1065  ++bcount;
1066  acount = 0;
1067  --nb;
1068  if (nb == 1)
1069  goto CopyA;
1070  if (bcount >= min_gallop)
1071  break;
1072  }
1073  }
1074 
1075  /* One run is winning so consistently that galloping may
1076  * be a huge win. So try that, and continue galloping until
1077  * (if ever) neither run appears to be winning consistently
1078  * anymore.
1079  */
1080  ++min_gallop;
1081  do
1082  {
1083  min_gallop -= (min_gallop > 1);
1084  m_ms->m_min_gallop = min_gallop;
1085  k = gallop_right (*pb, basea, na, na-1, comp);
1086  if (k < 0)
1087  goto Fail;
1088  k = na - k;
1089  acount = k;
1090  if (k)
1091  {
1092  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1093  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1094  pa -= k; ipa -= k;
1095  na -= k;
1096  if (na == 0)
1097  goto Succeed;
1098  }
1099  *dest-- = *pb--; *idest-- = *ipb--;
1100  --nb;
1101  if (nb == 1)
1102  goto CopyA;
1103 
1104  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1105  if (k < 0)
1106  goto Fail;
1107  k = nb - k;
1108  bcount = k;
1109  if (k)
1110  {
1111  dest -= k; idest -= k;
1112  pb -= k; ipb -= k;
1113  std::copy (pb+1, pb+1 + k, dest+1);
1114  std::copy (ipb+1, ipb+1 + k, idest+1);
1115  nb -= k;
1116  if (nb == 1)
1117  goto CopyA;
1118  /* nb==0 is impossible now if the comparison
1119  * function is consistent, but we can't assume
1120  * that it is.
1121  */
1122  if (nb == 0)
1123  goto Succeed;
1124  }
1125  *dest-- = *pa--; *idest-- = *ipa--;
1126  --na;
1127  if (na == 0)
1128  goto Succeed;
1129  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1130  ++min_gallop; /* penalize it for leaving galloping mode */
1131  m_ms->m_min_gallop = min_gallop;
1132  }
1133 
1134 Succeed:
1135  result = 0;
1136 
1137 Fail:
1138  if (nb)
1139  {
1140  std::copy (baseb, baseb + nb, dest-(nb-1));
1141  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1142  }
1143  return result;
1144 
1145 CopyA:
1146  /* The first element of pb belongs at the front of the merge. */
1147  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1148  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1149  pa -= na; ipa -= na;
1150  *dest = *pb; *idest = *ipb;
1151 
1152  return 0;
1153 }
1154 
1155 /* Merge the two runs at stack indices i and i+1.
1156  * Returns 0 on success, -1 on error.
1157  */
1158 template <typename T>
1159 template <typename Comp>
1160 int
1162  Comp comp)
1163 {
1164  T *pa, *pb;
1165  octave_idx_type na, nb;
1166  octave_idx_type k;
1167 
1168  pa = data + m_ms->m_pending[i].m_base;
1169  na = m_ms->m_pending[i].m_len;
1170  pb = data + m_ms->m_pending[i+1].m_base;
1171  nb = m_ms->m_pending[i+1].m_len;
1172 
1173  /* Record the length of the combined runs; if i is the 3rd-last
1174  * run now, also slide over the last run (which isn't involved
1175  * in this merge). The current run i+1 goes away in any case.
1176  */
1177  m_ms->m_pending[i].m_len = na + nb;
1178  if (i == m_ms->m_n - 3)
1179  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1180  m_ms->m_n--;
1181 
1182  /* Where does b start in a? Elements in a before that can be
1183  * ignored (already in place).
1184  */
1185  k = gallop_right (*pb, pa, na, 0, comp);
1186  if (k < 0)
1187  return -1;
1188  pa += k;
1189  na -= k;
1190  if (na == 0)
1191  return 0;
1192 
1193  /* Where does a end in b? Elements in b after that can be
1194  * ignored (already in place).
1195  */
1196  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1197  if (nb <= 0)
1198  return nb;
1199 
1200  /* Merge what remains of the runs, using a temp array with
1201  * min (na, nb) elements.
1202  */
1203  if (na <= nb)
1204  return merge_lo (pa, na, pb, nb, comp);
1205  else
1206  return merge_hi (pa, na, pb, nb, comp);
1207 }
1208 
1209 template <typename T>
1210 template <typename Comp>
1211 int
1213  Comp comp)
1214 {
1215  T *pa, *pb;
1216  octave_idx_type *ipa, *ipb;
1217  octave_idx_type na, nb;
1218  octave_idx_type k;
1219 
1220  pa = data + m_ms->m_pending[i].m_base;
1221  ipa = idx + m_ms->m_pending[i].m_base;
1222  na = m_ms->m_pending[i].m_len;
1223  pb = data + m_ms->m_pending[i+1].m_base;
1224  ipb = idx + m_ms->m_pending[i+1].m_base;
1225  nb = m_ms->m_pending[i+1].m_len;
1226 
1227  /* Record the length of the combined runs; if i is the 3rd-last
1228  * run now, also slide over the last run (which isn't involved
1229  * in this merge). The current run i+1 goes away in any case.
1230  */
1231  m_ms->m_pending[i].m_len = na + nb;
1232  if (i == m_ms->m_n - 3)
1233  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1234  m_ms->m_n--;
1235 
1236  /* Where does b start in a? Elements in a before that can be
1237  * ignored (already in place).
1238  */
1239  k = gallop_right (*pb, pa, na, 0, comp);
1240  if (k < 0)
1241  return -1;
1242  pa += k; ipa += k;
1243  na -= k;
1244  if (na == 0)
1245  return 0;
1246 
1247  /* Where does a end in b? Elements in b after that can be
1248  * ignored (already in place).
1249  */
1250  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1251  if (nb <= 0)
1252  return nb;
1253 
1254  /* Merge what remains of the runs, using a temp array with
1255  * min (na, nb) elements.
1256  */
1257  if (na <= nb)
1258  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1259  else
1260  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1261 }
1262 
1263 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1264  * until the stack invariants are re-established:
1265  *
1266  * 1. len[-3] > len[-2] + len[-1]
1267  * 2. len[-2] > len[-1]
1268  *
1269  * See listsort.txt for more info.
1270  *
1271  * Returns 0 on success, -1 on error.
1272  */
1273 template <typename T>
1274 template <typename Comp>
1275 int
1277 {
1278  struct s_slice *p = m_ms->m_pending;
1279 
1280  while (m_ms->m_n > 1)
1281  {
1282  octave_idx_type n = m_ms->m_n - 2;
1283  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1284  {
1285  if (p[n-1].m_len < p[n+1].m_len)
1286  --n;
1287  if (merge_at (n, data, comp) < 0)
1288  return -1;
1289  }
1290  else if (p[n].m_len <= p[n+1].m_len)
1291  {
1292  if (merge_at (n, data, comp) < 0)
1293  return -1;
1294  }
1295  else
1296  break;
1297  }
1298 
1299  return 0;
1300 }
1301 
1302 template <typename T>
1303 template <typename Comp>
1304 int
1306 {
1307  struct s_slice *p = m_ms->m_pending;
1308 
1309  while (m_ms->m_n > 1)
1310  {
1311  octave_idx_type n = m_ms->m_n - 2;
1312  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1313  {
1314  if (p[n-1].m_len < p[n+1].m_len)
1315  --n;
1316  if (merge_at (n, data, idx, comp) < 0)
1317  return -1;
1318  }
1319  else if (p[n].m_len <= p[n+1].m_len)
1320  {
1321  if (merge_at (n, data, idx, comp) < 0)
1322  return -1;
1323  }
1324  else
1325  break;
1326  }
1327 
1328  return 0;
1329 }
1330 
1331 /* Regardless of invariants, merge all runs on the stack until only one
1332  * remains. This is used at the end of the mergesort.
1333  *
1334  * Returns 0 on success, -1 on error.
1335  */
1336 template <typename T>
1337 template <typename Comp>
1338 int
1340 {
1341  struct s_slice *p = m_ms->m_pending;
1342 
1343  while (m_ms->m_n > 1)
1344  {
1345  octave_idx_type n = m_ms->m_n - 2;
1346  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1347  --n;
1348  if (merge_at (n, data, comp) < 0)
1349  return -1;
1350  }
1351 
1352  return 0;
1353 }
1354 
1355 template <typename T>
1356 template <typename Comp>
1357 int
1359 {
1360  struct s_slice *p = m_ms->m_pending;
1361 
1362  while (m_ms->m_n > 1)
1363  {
1364  octave_idx_type n = m_ms->m_n - 2;
1365  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1366  --n;
1367  if (merge_at (n, data, idx, comp) < 0)
1368  return -1;
1369  }
1370 
1371  return 0;
1372 }
1373 
1374 /* Compute a good value for the minimum run length; natural runs shorter
1375  * than this are boosted artificially via binary insertion.
1376  *
1377  * If n < 64, return n (it's too small to bother with fancy stuff).
1378  * Else if n is an exact power of 2, return 32.
1379  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1380  * strictly less than, an exact power of 2.
1381  *
1382  * See listsort.txt for more info.
1383  */
1384 template <typename T>
1387 {
1388  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1389 
1390  while (n >= 64)
1391  {
1392  r |= n & 1;
1393  n >>= 1;
1394  }
1395 
1396  return n + r;
1397 }
1398 
1399 template <typename T>
1400 template <typename Comp>
1401 void
1402 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1403 {
1404  /* Re-initialize the Mergestate as this might be the second time called */
1405  if (! m_ms) m_ms = new MergeState;
1406 
1407  m_ms->reset ();
1409 
1410  if (nel > 1)
1411  {
1412  octave_idx_type nremaining = nel;
1413  octave_idx_type lo = 0;
1414 
1415  /* March over the array once, left to right, finding natural runs,
1416  * and extending short natural runs to minrun elements.
1417  */
1418  octave_idx_type minrun = merge_compute_minrun (nremaining);
1419  do
1420  {
1421  bool descending;
1423 
1424  /* Identify next run. */
1425  n = count_run (data + lo, nremaining, descending, comp);
1426  if (n < 0)
1427  return;
1428  if (descending)
1429  std::reverse (data + lo, data + lo + n);
1430  /* If short, extend to min (minrun, nremaining). */
1431  if (n < minrun)
1432  {
1433  const octave_idx_type force = (nremaining <= minrun ? nremaining
1434  : minrun);
1435  binarysort (data + lo, force, n, comp);
1436  n = force;
1437  }
1438  /* Push run onto m_pending-runs stack, and maybe merge. */
1439  assert (m_ms->m_n < MAX_MERGE_PENDING);
1440  m_ms->m_pending[m_ms->m_n].m_base = lo;
1441  m_ms->m_pending[m_ms->m_n].m_len = n;
1442  m_ms->m_n++;
1443  if (merge_collapse (data, comp) < 0)
1444  return;
1445  /* Advance to find next run. */
1446  lo += n;
1447  nremaining -= n;
1448  }
1449  while (nremaining);
1450 
1451  merge_force_collapse (data, comp);
1452  }
1453 }
1454 
1455 template <typename T>
1456 template <typename Comp>
1457 void
1459  Comp comp)
1460 {
1461  /* Re-initialize the Mergestate as this might be the second time called */
1462  if (! m_ms) m_ms = new MergeState;
1463 
1464  m_ms->reset ();
1466 
1467  if (nel > 1)
1468  {
1469  octave_idx_type nremaining = nel;
1470  octave_idx_type lo = 0;
1471 
1472  /* March over the array once, left to right, finding natural runs,
1473  * and extending short natural runs to minrun elements.
1474  */
1475  octave_idx_type minrun = merge_compute_minrun (nremaining);
1476  do
1477  {
1478  bool descending;
1480 
1481  /* Identify next run. */
1482  n = count_run (data + lo, nremaining, descending, comp);
1483  if (n < 0)
1484  return;
1485  if (descending)
1486  {
1487  std::reverse (data + lo, data + lo + n);
1488  std::reverse (idx + lo, idx + lo + n);
1489  }
1490  /* If short, extend to min (minrun, nremaining). */
1491  if (n < minrun)
1492  {
1493  const octave_idx_type force = (nremaining <= minrun ? nremaining
1494  : minrun);
1495  binarysort (data + lo, idx + lo, force, n, comp);
1496  n = force;
1497  }
1498  /* Push run onto m_pending-runs stack, and maybe merge. */
1499  assert (m_ms->m_n < MAX_MERGE_PENDING);
1500  m_ms->m_pending[m_ms->m_n].m_base = lo;
1501  m_ms->m_pending[m_ms->m_n].m_len = n;
1502  m_ms->m_n++;
1503  if (merge_collapse (data, idx, comp) < 0)
1504  return;
1505  /* Advance to find next run. */
1506  lo += n;
1507  nremaining -= n;
1508  }
1509  while (nremaining);
1510 
1511  merge_force_collapse (data, idx, comp);
1512  }
1513 }
1514 
1515 template <typename T>
1516 void
1518 {
1519 #if defined (INLINE_ASCENDING_SORT)
1521  sort (data, nel, std::less<T> ());
1522  else
1523 #endif
1524 #if defined (INLINE_DESCENDING_SORT)
1526  sort (data, nel, std::greater<T> ());
1527  else
1528 #endif
1529  if (m_compare)
1530  sort (data, nel, m_compare);
1531 }
1532 
1533 template <typename T>
1534 void
1536 {
1537 #if defined (INLINE_ASCENDING_SORT)
1539  sort (data, idx, nel, std::less<T> ());
1540  else
1541 #endif
1542 #if defined (INLINE_DESCENDING_SORT)
1544  sort (data, idx, nel, std::greater<T> ());
1545  else
1546 #endif
1547  if (m_compare)
1548  sort (data, idx, nel, m_compare);
1549 }
1550 
1551 template <typename T>
1552 template <typename Comp>
1553 bool
1554 octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp)
1555 {
1556  const T *end = data + nel;
1557  if (data != end)
1558  {
1559  const T *next = data;
1560  while (++next != end)
1561  {
1562  if (comp (*next, *data))
1563  break;
1564  data = next;
1565  }
1566  data = next;
1567  }
1568 
1569  return data == end;
1570 }
1571 
1572 template <typename T>
1573 bool
1575 {
1576  bool retval = false;
1577 #if defined (INLINE_ASCENDING_SORT)
1579  retval = issorted (data, nel, std::less<T> ());
1580  else
1581 #endif
1582 #if defined (INLINE_DESCENDING_SORT)
1584  retval = issorted (data, nel, std::greater<T> ());
1585  else
1586 #endif
1587  if (m_compare)
1588  retval = issorted (data, nel, m_compare);
1589 
1590  return retval;
1591 }
1592 
1593 // FIXME: is there really no way to make this local to the following function?
1595 {
1597  : col (c), ofs (o), nel (n) { }
1599 };
1600 
1601 template <typename T>
1602 template <typename Comp>
1603 void
1605  octave_idx_type rows, octave_idx_type cols,
1606  Comp comp)
1607 {
1608  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1609  for (octave_idx_type i = 0; i < rows; i++)
1610  idx[i] = i;
1611 
1612  if (cols == 0 || rows <= 1)
1613  return;
1614 
1615  // This is a breadth-first traversal.
1616  typedef sortrows_run_t run_t;
1617  std::stack<run_t> runs;
1618 
1619  runs.push (run_t (0, 0, rows));
1620 
1621  while (! runs.empty ())
1622  {
1623  octave_idx_type col = runs.top ().col;
1624  octave_idx_type ofs = runs.top ().ofs;
1625  octave_idx_type nel = runs.top ().nel;
1626  runs.pop ();
1627  assert (nel > 1);
1628 
1629  T *lbuf = buf + ofs;
1630  const T *ldata = data + rows*col;
1631  octave_idx_type *lidx = idx + ofs;
1632 
1633  // Gather.
1634  for (octave_idx_type i = 0; i < nel; i++)
1635  lbuf[i] = ldata[lidx[i]];
1636 
1637  // Sort.
1638  sort (lbuf, lidx, nel, comp);
1639 
1640  // Identify constant runs and schedule subsorts.
1641  if (col < cols-1)
1642  {
1643  octave_idx_type lst = 0;
1644  for (octave_idx_type i = 0; i < nel; i++)
1645  {
1646  if (comp (lbuf[lst], lbuf[i]))
1647  {
1648  if (i > lst + 1)
1649  runs.push (run_t (col+1, ofs + lst, i - lst));
1650  lst = i;
1651  }
1652  }
1653  if (nel > lst + 1)
1654  runs.push (run_t (col+1, ofs + lst, nel - lst));
1655  }
1656  }
1657 }
1658 
1659 template <typename T>
1660 void
1662  octave_idx_type rows, octave_idx_type cols)
1663 {
1664 #if defined (INLINE_ASCENDING_SORT)
1666  sort_rows (data, idx, rows, cols, std::less<T> ());
1667  else
1668 #endif
1669 #if defined (INLINE_DESCENDING_SORT)
1671  sort_rows (data, idx, rows, cols, std::greater<T> ());
1672  else
1673 #endif
1674  if (m_compare)
1675  sort_rows (data, idx, rows, cols, m_compare);
1676 }
1677 
1678 template <typename T>
1679 template <typename Comp>
1680 bool
1682  octave_idx_type cols, Comp comp)
1683 {
1684  if (rows <= 1 || cols == 0)
1685  return true;
1686 
1687  // This is a breadth-first traversal.
1688  const T *lastrow = data + rows*(cols - 1);
1689  typedef std::pair<const T *, octave_idx_type> run_t;
1690  std::stack<run_t> runs;
1691 
1692  bool sorted = true;
1693  runs.push (run_t (data, rows));
1694  while (sorted && ! runs.empty ())
1695  {
1696  const T *lo = runs.top ().first;
1697  octave_idx_type n = runs.top ().second;
1698  runs.pop ();
1699  if (lo < lastrow)
1700  {
1701  // Not the final column.
1702  assert (n > 1);
1703  const T *hi = lo + n;
1704  const T *lst = lo;
1705  for (lo++; lo < hi; lo++)
1706  {
1707  if (comp (*lst, *lo))
1708  {
1709  if (lo > lst + 1)
1710  runs.push (run_t (lst + rows, lo - lst));
1711  lst = lo;
1712  }
1713  else if (comp (*lo, *lst))
1714  break;
1715 
1716  }
1717  if (lo == hi)
1718  {
1719  if (lo > lst + 1)
1720  runs.push (run_t (lst + rows, lo - lst));
1721  }
1722  else
1723  {
1724  sorted = false;
1725  break;
1726  }
1727  }
1728  else
1729  // The final column - use fast code.
1730  sorted = issorted (lo, n, comp);
1731  }
1732 
1733  return sorted;
1734 }
1735 
1736 template <typename T>
1737 bool
1739  octave_idx_type cols)
1740 {
1741  bool retval = false;
1742 
1743 #if defined (INLINE_ASCENDING_SORT)
1745  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1746  else
1747 #endif
1748 #if defined (INLINE_DESCENDING_SORT)
1750  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1751  else
1752 #endif
1753  if (m_compare)
1754  retval = is_sorted_rows (data, rows, cols, m_compare);
1755 
1756  return retval;
1757 }
1758 
1759 // The simple binary lookup.
1760 
1761 template <typename T>
1762 template <typename Comp>
1765  const T& value, Comp comp)
1766 {
1767  octave_idx_type lo = 0;
1768  octave_idx_type hi = nel;
1769 
1770  while (lo < hi)
1771  {
1772  octave_idx_type mid = lo + ((hi-lo) >> 1);
1773  if (comp (value, data[mid]))
1774  hi = mid;
1775  else
1776  lo = mid + 1;
1777  }
1778 
1779  return lo;
1780 }
1781 
1782 template <typename T>
1785  const T& value)
1786 {
1787  octave_idx_type retval = 0;
1788 
1789 #if defined (INLINE_ASCENDING_SORT)
1791  retval = lookup (data, nel, value, std::less<T> ());
1792  else
1793 #endif
1794 #if defined (INLINE_DESCENDING_SORT)
1796  retval = lookup (data, nel, value, std::greater<T> ());
1797  else
1798 #endif
1799  if (m_compare)
1800  retval = lookup (data, nel, value, std::ptr_fun (m_compare));
1801 
1802  return retval;
1803 }
1804 
1805 template <typename T>
1806 template <typename Comp>
1807 void
1809  const T *values, octave_idx_type nvalues,
1810  octave_idx_type *idx, Comp comp)
1811 {
1812  // Use a sequence of binary lookups.
1813  // FIXME: Can this be sped up generally? The sorted merge case is dealt with
1814  // elsewhere.
1815  for (octave_idx_type j = 0; j < nvalues; j++)
1816  idx[j] = lookup (data, nel, values[j], comp);
1817 }
1818 
1819 template <typename T>
1820 void
1822  const T *values, octave_idx_type nvalues,
1823  octave_idx_type *idx)
1824 {
1825 #if defined (INLINE_ASCENDING_SORT)
1827  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1828  else
1829 #endif
1830 #if defined (INLINE_DESCENDING_SORT)
1832  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1833  else
1834 #endif
1835  if (m_compare)
1836  lookup (data, nel, values, nvalues, idx, std::ptr_fun (m_compare));
1837 }
1838 
1839 template <typename T>
1840 template <typename Comp>
1841 void
1843  const T *values, octave_idx_type nvalues,
1844  octave_idx_type *idx, bool rev, Comp comp)
1845 {
1846  if (rev)
1847  {
1848  octave_idx_type i = 0;
1849  octave_idx_type j = nvalues - 1;
1850 
1851  if (nvalues > 0 && nel > 0)
1852  {
1853  while (true)
1854  {
1855  if (comp (values[j], data[i]))
1856  {
1857  idx[j] = i;
1858  if (--j < 0)
1859  break;
1860  }
1861  else if (++i == nel)
1862  break;
1863  }
1864  }
1865 
1866  for (; j >= 0; j--)
1867  idx[j] = i;
1868  }
1869  else
1870  {
1871  octave_idx_type i = 0;
1872  octave_idx_type j = 0;
1873 
1874  if (nvalues > 0 && nel > 0)
1875  {
1876  while (true)
1877  {
1878  if (comp (values[j], data[i]))
1879  {
1880  idx[j] = i;
1881  if (++j == nvalues)
1882  break;
1883  }
1884  else if (++i == nel)
1885  break;
1886  }
1887  }
1888 
1889  for (; j != nvalues; j++)
1890  idx[j] = i;
1891  }
1892 }
1893 
1894 template <typename T>
1895 void
1897  const T *values, octave_idx_type nvalues,
1898  octave_idx_type *idx, bool rev)
1899 {
1900 #if defined (INLINE_ASCENDING_SORT)
1902  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1903  else
1904 #endif
1905 #if defined (INLINE_DESCENDING_SORT)
1907  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1908  else
1909 #endif
1910  if (m_compare)
1911  lookup_sorted (data, nel, values, nvalues, idx, rev,
1912  std::ptr_fun (m_compare));
1913 }
1914 
1915 template <typename T>
1916 template <typename Comp>
1917 void
1920  Comp comp)
1921 {
1922  // Simply wrap the STL algorithms.
1923  // FIXME: this will fail if we attempt to inline <,> for Complex.
1924  if (up == lo+1)
1925  std::nth_element (data, data + lo, data + nel, comp);
1926  else if (lo == 0)
1927  std::partial_sort (data, data + up, data + nel, comp);
1928  else
1929  {
1930  std::nth_element (data, data + lo, data + nel, comp);
1931  if (up == lo + 2)
1932  {
1933  // Finding two subsequent elements.
1934  std::swap (data[lo+1],
1935  *std::min_element (data + lo + 1, data + nel, comp));
1936  }
1937  else
1938  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1939  }
1940 }
1941 
1942 template <typename T>
1943 void
1946 {
1947  if (up < 0)
1948  up = lo + 1;
1949 #if defined (INLINE_ASCENDING_SORT)
1951  nth_element (data, nel, lo, up, std::less<T> ());
1952  else
1953 #endif
1954 #if defined (INLINE_DESCENDING_SORT)
1956  nth_element (data, nel, lo, up, std::greater<T> ());
1957  else
1958 #endif
1959  if (m_compare)
1960  nth_element (data, nel, lo, up, std::ptr_fun (m_compare));
1961 }
1962 
1963 template <typename T>
1964 bool
1966  typename ref_param<T>::type y)
1967 {
1968  return x < y;
1969 }
1970 
1971 template <typename T>
1972 bool
1974  typename ref_param<T>::type y)
1975 {
1976  return x > y;
1977 }
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
octave_sort(void)
Definition: oct-sort.cc:123
static const int MERGESTATE_TEMP_SIZE
Definition: oct-sort.h:180
void set_compare(compare_fcn_type comp)
Definition: oct-sort.h:118
int merge_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1276
~octave_sort(void)
Definition: oct-sort.cc:133
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1661
octave_idx_type gallop_right(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:420
int merge_hi(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:874
octave_idx_type count_run(T *lo, octave_idx_type n, bool &descending, Comp comp)
Definition: oct-sort.cc:265
compare_fcn_type m_compare
Definition: oct-sort.h:241
octave_idx_type gallop_left(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:325
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1973
bool issorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1574
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1965
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1784
MergeState * m_ms
Definition: oct-sort.h:243
int merge_at(octave_idx_type i, T *data, Comp comp)
Definition: oct-sort.cc:1161
octave_idx_type merge_compute_minrun(octave_idx_type n)
Definition: oct-sort.cc:1386
void binarysort(T *data, octave_idx_type nel, octave_idx_type start, Comp comp)
Definition: oct-sort.cc:153
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1517
static const int MIN_GALLOP
Definition: oct-sort.h:177
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1944
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1738
static const int MAX_MERGE_PENDING
Definition: oct-sort.h:173
int merge_force_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1339
int merge_lo(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:592
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1896
if_then_else< is_class_type< T >::no, T, T const & >::result type
Definition: lo-traits.h:121
F77_RET_T const F77_DBLE * x
T octave_idx_type m
Definition: mx-inlines.cc:773
octave_idx_type n
Definition: mx-inlines.cc:753
T * r
Definition: mx-inlines.cc:773
static uint32_t * next
Definition: randmtzig.cc:189
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
static octave_idx_type roundupsize(size_t n)
Definition: oct-sort.cc:499
sortmode
Definition: oct-sort.h:95
@ ASCENDING
Definition: oct-sort.h:95
@ DESCENDING
Definition: oct-sort.h:95
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811
void getmem(octave_idx_type need)
Definition: oct-sort.cc:548
struct s_slice m_pending[MAX_MERGE_PENDING]
Definition: oct-sort.h:238
octave_idx_type m_n
Definition: oct-sort.h:237
octave_idx_type m_alloced
Definition: oct-sort.h:227
void getmemi(octave_idx_type need)
Definition: oct-sort.cc:566
octave_idx_type * m_ia
Definition: oct-sort.h:226
octave_idx_type m_min_gallop
Definition: oct-sort.h:221
octave_idx_type m_base
Definition: oct-sort.h:192
octave_idx_type m_len
Definition: oct-sort.h:192
octave_idx_type col
Definition: oct-sort.cc:1598
sortrows_run_t(octave_idx_type c, octave_idx_type o, octave_idx_type n)
Definition: oct-sort.cc:1596