GNU Octave  8.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-sort.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2003-2023 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 // Code stolen in large part from Python's, listobject.c, which itself had
25 // no license header. However, thanks to Tim Peters for the parts of the
26 // code I ripped-off.
27 //
28 // As required in the Python license the short description of the changes
29 // made are
30 //
31 // * convert the sorting code in listobject.cc into a generic class,
32 // replacing PyObject* with the type of the class T.
33 //
34 // * replaced usages of malloc, free, memcpy and memmove by standard C++
35 // new [], delete [] and std::copy and std::copy_backward. Note that replacing
36 // memmove by std::copy is possible if the destination starts before the source.
37 // If not, std::copy_backward needs to be used.
38 //
39 // * templatize comparison operator in most methods, provide possible dispatch
40 //
41 // * duplicate methods to avoid by-the-way indexed sorting
42 //
43 // * add methods for verifying sortedness of array
44 //
45 // * row sorting via breadth-first tree subsorting
46 //
47 // * binary lookup and sequential binary lookup optimized for dense downsampling.
48 //
49 // * NOTE: the memory management routines rely on the fact that delete [] silently
50 // ignores null pointers. Don't gripe about the missing checks - they're there.
51 //
52 //
53 // The Python license is
54 //
55 // PSF LICENSE AGREEMENT FOR PYTHON 2.3
56 // --------------------------------------
57 //
58 // 1. This LICENSE AGREEMENT is between the Python Software Foundation
59 // ("PSF"), and the Individual or Organization ("Licensee") accessing and
60 // otherwise using Python 2.3 software in source or binary form and its
61 // associated documentation.
62 //
63 // 2. Subject to the terms and conditions of this License Agreement, PSF
64 // hereby grants Licensee a nonexclusive, royalty-free, world-wide
65 // license to reproduce, analyze, test, perform and/or display publicly,
66 // prepare derivative works, distribute, and otherwise use Python 2.3
67 // alone or in any derivative version, provided, however, that PSF's
68 // License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
69 // 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
70 // retained in Python 2.3 alone or in any derivative version prepared by
71 // Licensee.
72 //
73 // 3. In the event Licensee prepares a derivative work that is based on
74 // or incorporates Python 2.3 or any part thereof, and wants to make
75 // the derivative work available to others as provided herein, then
76 // Licensee hereby agrees to include in any such work a brief summary of
77 // the changes made to Python 2.3.
78 //
79 // 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
80 // basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
81 // IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
82 // DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
83 // FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
84 // INFRINGE ANY THIRD PARTY RIGHTS.
85 //
86 // 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
87 // 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
88 // A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
89 // OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
90 //
91 // 6. This License Agreement will automatically terminate upon a material
92 // breach of its terms and conditions.
93 //
94 // 7. Nothing in this License Agreement shall be deemed to create any
95 // relationship of agency, partnership, or joint venture between PSF and
96 // Licensee. This License Agreement does not grant permission to use PSF
97 // trademarks or trade name in a trademark sense to endorse or promote
98 // products or services of Licensee, or any third party.
99 //
100 // 8. By copying, installing or otherwise using Python 2.3, Licensee
101 // agrees to be bound by the terms and conditions of this License
102 // Agreement.
103 //
104 ////////////////////////////////////////////////////////////////////////
105 
106 // This file should not include config.h. It is only included in other
107 // C++ source files that should have included config.h before including
108 // this file.
109 
110 #include <cassert>
111 #include <algorithm>
112 #include <cstring>
113 #include <stack>
114 
115 #include "lo-error.h"
116 #include "lo-mappers.h"
117 #include "quit.h"
118 #include "oct-sort.h"
119 #include "oct-locbuf.h"
120 
121 template <typename T>
123  m_compare (ascending_compare), m_ms (nullptr)
124 { }
125 
126 template <typename T>
128  : m_compare (comp), m_ms (nullptr)
129 { }
130 
131 template <typename T>
133 {
134  delete m_ms;
135 }
136 
137 template <typename T>
138 void
140 {
141  if (mode == ASCENDING)
142  m_compare = ascending_compare;
143  else if (mode == DESCENDING)
144  m_compare = descending_compare;
145  else
146  m_compare = compare_fcn_type ();
147 }
148 
149 template <typename T>
150 template <typename Comp>
151 void
153  octave_idx_type start, Comp comp)
154 {
155  if (start == 0)
156  ++start;
157 
158  for (; start < nel; ++start)
159  {
160  /* set l to where *start belongs */
161  octave_idx_type l = 0;
162  octave_idx_type r = start;
163  T pivot = data[start];
164  /* Invariants:
165  * pivot >= all in [lo, l).
166  * pivot < all in [r, start).
167  * The second is vacuously true at the start.
168  */
169  do
170  {
171  octave_idx_type p = l + ((r - l) >> 1);
172  if (comp (pivot, data[p]))
173  r = p;
174  else
175  l = p+1;
176  }
177  while (l < r);
178  /* The invariants still hold, so pivot >= all in [lo, l) and
179  pivot < all in [l, start), so pivot belongs at l. Note
180  that if there are elements equal to pivot, l points to the
181  first slot after them -- that's why this sort is stable.
182  Slide over to make room.
183  Caution: using memmove is much slower under MSVC 5;
184  we're not usually moving many slots. */
185  // NOTE: using swap and going upwards appears to be faster.
186  for (octave_idx_type p = l; p < start; p++)
187  std::swap (pivot, data[p]);
188  data[start] = pivot;
189  }
190 
191  return;
192 }
193 
194 template <typename T>
195 template <typename Comp>
196 void
198  octave_idx_type start, Comp comp)
199 {
200  if (start == 0)
201  ++start;
202 
203  for (; start < nel; ++start)
204  {
205  /* set l to where *start belongs */
206  octave_idx_type l = 0;
207  octave_idx_type r = start;
208  T pivot = data[start];
209  /* Invariants:
210  * pivot >= all in [lo, l).
211  * pivot < all in [r, start).
212  * The second is vacuously true at the start.
213  */
214  do
215  {
216  octave_idx_type p = l + ((r - l) >> 1);
217  if (comp (pivot, data[p]))
218  r = p;
219  else
220  l = p+1;
221  }
222  while (l < r);
223  /* The invariants still hold, so pivot >= all in [lo, l) and
224  pivot < all in [l, start), so pivot belongs at l. Note
225  that if there are elements equal to pivot, l points to the
226  first slot after them -- that's why this sort is stable.
227  Slide over to make room.
228  Caution: using memmove is much slower under MSVC 5;
229  we're not usually moving many slots. */
230  // NOTE: using swap and going upwards appears to be faster.
231  for (octave_idx_type p = l; p < start; p++)
232  std::swap (pivot, data[p]);
233  data[start] = pivot;
234  octave_idx_type ipivot = idx[start];
235  for (octave_idx_type p = l; p < start; p++)
236  std::swap (ipivot, idx[p]);
237  idx[start] = ipivot;
238  }
239 
240  return;
241 }
242 
243 /*
244 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
245 is required on entry. "A run" is the longest ascending sequence, with
246 
247  lo[0] <= lo[1] <= lo[2] <= ...
248 
249 or the longest descending sequence, with
250 
251  lo[0] > lo[1] > lo[2] > ...
252 
253 DESCENDING is set to false in the former case, or to true in the latter.
254 For its intended use in a stable mergesort, the strictness of the defn of
255 "descending" is needed so that the caller can safely reverse a descending
256 sequence without violating stability (strict > ensures there are no equal
257 elements to get out of order).
258 
259 Returns -1 in case of error.
260 */
261 template <typename T>
262 template <typename Comp>
264 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
265  Comp comp)
266 {
268  T *hi = lo + nel;
269 
270  descending = false;
271  ++lo;
272  if (lo == hi)
273  return 1;
274 
275  n = 2;
276 
277  if (comp (*lo, *(lo-1)))
278  {
279  descending = true;
280  for (lo = lo+1; lo < hi; ++lo, ++n)
281  {
282  if (comp (*lo, *(lo-1)))
283  ;
284  else
285  break;
286  }
287  }
288  else
289  {
290  for (lo = lo+1; lo < hi; ++lo, ++n)
291  {
292  if (comp (*lo, *(lo-1)))
293  break;
294  }
295  }
296 
297  return n;
298 }
299 
300 /*
301 Locate the proper position of key in a sorted vector; if the vector contains
302 an element equal to key, return the position immediately to the left of
303 the leftmost equal element. [gallop_right() does the same except returns
304 the position to the right of the rightmost equal element (if any).]
305 
306 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
307 
308 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
309 hint is to the final result, the faster this runs.
310 
311 The return value is the int k in 0..n such that
312 
313  a[k-1] < key <= a[k]
314 
315 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
316 key belongs at index k; or, IOW, the first k elements of a should precede
317 key, and the last n-k should follow key.
318 
319 Returns -1 on error. See listsort.txt for info on the method.
320 */
321 template <typename T>
322 template <typename Comp>
325  octave_idx_type hint,
326  Comp comp)
327 {
328  octave_idx_type ofs;
329  octave_idx_type lastofs;
330  octave_idx_type k;
331 
332  a += hint;
333  lastofs = 0;
334  ofs = 1;
335  if (comp (*a, key))
336  {
337  /* a[hint] < key -- gallop right, until
338  * a[hint + lastofs] < key <= a[hint + ofs]
339  */
340  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
341  while (ofs < maxofs)
342  {
343  if (comp (a[ofs], key))
344  {
345  lastofs = ofs;
346  ofs = (ofs << 1) + 1;
347  if (ofs <= 0) /* int overflow */
348  ofs = maxofs;
349  }
350  else /* key <= a[hint + ofs] */
351  break;
352  }
353  if (ofs > maxofs)
354  ofs = maxofs;
355  /* Translate back to offsets relative to &a[0]. */
356  lastofs += hint;
357  ofs += hint;
358  }
359  else
360  {
361  /* key <= a[hint] -- gallop left, until
362  * a[hint - ofs] < key <= a[hint - lastofs]
363  */
364  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
365  while (ofs < maxofs)
366  {
367  if (comp (*(a-ofs), key))
368  break;
369  /* key <= a[hint - ofs] */
370  lastofs = ofs;
371  ofs = (ofs << 1) + 1;
372  if (ofs <= 0) /* int overflow */
373  ofs = maxofs;
374  }
375  if (ofs > maxofs)
376  ofs = maxofs;
377  /* Translate back to positive offsets relative to &a[0]. */
378  k = lastofs;
379  lastofs = hint - ofs;
380  ofs = hint - k;
381  }
382  a -= hint;
383 
384  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
385  * right of lastofs but no farther right than ofs. Do a binary
386  * search, with invariant a[lastofs-1] < key <= a[ofs].
387  */
388  ++lastofs;
389  while (lastofs < ofs)
390  {
391  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
392 
393  if (comp (a[m], key))
394  lastofs = m+1; /* a[m] < key */
395  else
396  ofs = m; /* key <= a[m] */
397  }
398 
399  return ofs;
400 }
401 
402 /*
403 Exactly like gallop_left(), except that if key already exists in a[0:n],
404 finds the position immediately to the right of the rightmost equal value.
405 
406 The return value is the int k in 0..n such that
407 
408  a[k-1] <= key < a[k]
409 
410 or -1 if error.
411 
412 The code duplication is massive, but this is enough different given that
413 we're sticking to "<" comparisons that it's much harder to follow if
414 written as one routine with yet another "left or right?" flag.
415 */
416 template <typename T>
417 template <typename Comp>
420  octave_idx_type hint,
421  Comp comp)
422 {
423  octave_idx_type ofs;
424  octave_idx_type lastofs;
425  octave_idx_type k;
426 
427  a += hint;
428  lastofs = 0;
429  ofs = 1;
430  if (comp (key, *a))
431  {
432  /* key < a[hint] -- gallop left, until
433  * a[hint - ofs] <= key < a[hint - lastofs]
434  */
435  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
436  while (ofs < maxofs)
437  {
438  if (comp (key, *(a-ofs)))
439  {
440  lastofs = ofs;
441  ofs = (ofs << 1) + 1;
442  if (ofs <= 0) /* int overflow */
443  ofs = maxofs;
444  }
445  else /* a[hint - ofs] <= key */
446  break;
447  }
448  if (ofs > maxofs)
449  ofs = maxofs;
450  /* Translate back to positive offsets relative to &a[0]. */
451  k = lastofs;
452  lastofs = hint - ofs;
453  ofs = hint - k;
454  }
455  else
456  {
457  /* a[hint] <= key -- gallop right, until
458  * a[hint + lastofs] <= key < a[hint + ofs]
459  */
460  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
461  while (ofs < maxofs)
462  {
463  if (comp (key, a[ofs]))
464  break;
465  /* a[hint + ofs] <= key */
466  lastofs = ofs;
467  ofs = (ofs << 1) + 1;
468  if (ofs <= 0) /* int overflow */
469  ofs = maxofs;
470  }
471  if (ofs > maxofs)
472  ofs = maxofs;
473  /* Translate back to offsets relative to &a[0]. */
474  lastofs += hint;
475  ofs += hint;
476  }
477  a -= hint;
478 
479  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
480  * right of lastofs but no farther right than ofs. Do a binary
481  * search, with invariant a[lastofs-1] <= key < a[ofs].
482  */
483  ++lastofs;
484  while (lastofs < ofs)
485  {
486  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
487 
488  if (comp (key, a[m]))
489  ofs = m; /* key < a[m] */
490  else
491  lastofs = m+1; /* a[m] <= key */
492  }
493 
494  return ofs;
495 }
496 
497 static inline octave_idx_type
498 roundupsize (std::size_t n)
499 {
500  unsigned int nbits = 3;
501  std::size_t n2 = n >> 8;
502 
503  /* Round up:
504  * If n < 256, to a multiple of 8.
505  * If n < 2048, to a multiple of 64.
506  * If n < 16384, to a multiple of 512.
507  * If n < 131072, to a multiple of 4096.
508  * If n < 1048576, to a multiple of 32768.
509  * If n < 8388608, to a multiple of 262144.
510  * If n < 67108864, to a multiple of 2097152.
511  * If n < 536870912, to a multiple of 16777216.
512  * ...
513  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
514  *
515  * This over-allocates proportional to the list size, making room
516  * for additional growth. The over-allocation is mild, but is
517  * enough to give linear-time amortized behavior over a long
518  * sequence of appends() in the presence of a poorly-performing
519  * system realloc() (which is a reality, e.g., across all flavors
520  * of Windows, with Win9x behavior being particularly bad -- and
521  * we've still got address space fragmentation problems on Win9x
522  * even with this scheme, although it requires much longer lists to
523  * provoke them than it used to).
524  */
525  while (n2)
526  {
527  n2 >>= 3;
528  nbits += 3;
529  }
530 
531  std::size_t new_size = ((n >> nbits) + 1) << nbits;
532 
533  if (new_size == 0
534  || new_size
535  > static_cast<std::size_t> (std::numeric_limits<octave_idx_type>::max ()))
536  (*current_liboctave_error_handler)
537  ("unable to allocate sufficient memory for sort");
538 
539  return static_cast<octave_idx_type> (new_size);
540 }
541 
542 /* Ensure enough temp memory for 'need' array slots is available.
543  * Returns 0 on success and -1 if the memory can't be gotten.
544  */
545 template <typename T>
546 void
548 {
549  if (need <= m_alloced)
550  return;
551 
552  need = roundupsize (need);
553  /* Don't realloc! That can cost cycles to copy the old data, but
554  * we don't care what's in the block.
555  */
556  delete [] m_a;
557  delete [] m_ia; // Must do this or fool possible next getmemi.
558  m_a = new T [need];
559  m_alloced = need;
560 
561 }
562 
563 template <typename T>
564 void
566 {
567  if (m_ia && need <= m_alloced)
568  return;
569 
570  need = roundupsize (need);
571  /* Don't realloc! That can cost cycles to copy the old data, but
572  * we don't care what's in the block.
573  */
574  delete [] m_a;
575  delete [] m_ia;
576 
577  m_a = new T [need];
578  m_ia = new octave_idx_type [need];
579  m_alloced = need;
580 }
581 
582 /* Merge the na elements starting at pa with the nb elements starting at pb
583  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
584  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
585  * merge, and should have na <= nb. See listsort.txt for more info.
586  * Return 0 if successful, -1 if error.
587  */
588 template <typename T>
589 template <typename Comp>
590 int
592  T *pb, octave_idx_type nb,
593  Comp comp)
594 {
595  octave_idx_type k;
596  T *dest;
597  int result = -1; /* guilty until proved innocent */
598  octave_idx_type min_gallop = m_ms->m_min_gallop;
599 
600  m_ms->getmem (na);
601 
602  std::copy (pa, pa + na, m_ms->m_a);
603  dest = pa;
604  pa = m_ms->m_a;
605 
606  *dest++ = *pb++;
607  --nb;
608  if (nb == 0)
609  goto Succeed;
610  if (na == 1)
611  goto CopyB;
612 
613  for (;;)
614  {
615  octave_idx_type acount = 0; /* # of times A won in a row */
616  octave_idx_type bcount = 0; /* # of times B won in a row */
617 
618  /* Do the straightforward thing until (if ever) one run
619  * appears to win consistently.
620  */
621  for (;;)
622  {
623 
624  // FIXME: these loops are candidates for further optimizations.
625  // Rather than testing everything in each cycle, it may be more
626  // efficient to do it in hunks.
627  if (comp (*pb, *pa))
628  {
629  *dest++ = *pb++;
630  ++bcount;
631  acount = 0;
632  --nb;
633  if (nb == 0)
634  goto Succeed;
635  if (bcount >= min_gallop)
636  break;
637  }
638  else
639  {
640  *dest++ = *pa++;
641  ++acount;
642  bcount = 0;
643  --na;
644  if (na == 1)
645  goto CopyB;
646  if (acount >= min_gallop)
647  break;
648  }
649  }
650 
651  /* One run is winning so consistently that galloping may
652  * be a huge win. So try that, and continue galloping until
653  * (if ever) neither run appears to be winning consistently
654  * anymore.
655  */
656  ++min_gallop;
657  do
658  {
659  min_gallop -= (min_gallop > 1);
660  m_ms->m_min_gallop = min_gallop;
661  k = gallop_right (*pb, pa, na, 0, comp);
662  acount = k;
663  if (k)
664  {
665  if (k < 0)
666  goto Fail;
667  dest = std::copy (pa, pa + k, dest);
668  pa += k;
669  na -= k;
670  if (na == 1)
671  goto CopyB;
672  /* na==0 is impossible now if the comparison
673  * function is consistent, but we can't assume
674  * that it is.
675  */
676  if (na == 0)
677  goto Succeed;
678  }
679  *dest++ = *pb++;
680  --nb;
681  if (nb == 0)
682  goto Succeed;
683 
684  k = gallop_left (*pa, pb, nb, 0, comp);
685  bcount = k;
686  if (k)
687  {
688  if (k < 0)
689  goto Fail;
690  dest = std::copy (pb, pb + k, dest);
691  pb += k;
692  nb -= k;
693  if (nb == 0)
694  goto Succeed;
695  }
696  *dest++ = *pa++;
697  --na;
698  if (na == 1)
699  goto CopyB;
700  }
701  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
702 
703  ++min_gallop; /* penalize it for leaving galloping mode */
704  m_ms->m_min_gallop = min_gallop;
705  }
706 
707 Succeed:
708  result = 0;
709 
710 Fail:
711  if (na)
712  std::copy (pa, pa + na, dest);
713  return result;
714 
715 CopyB:
716  /* The last element of pa belongs at the end of the merge. */
717  std::copy (pb, pb + nb, dest);
718  dest[nb] = *pa;
719 
720  return 0;
721 }
722 
723 template <typename T>
724 template <typename Comp>
725 int
727  T *pb, octave_idx_type *ipb, octave_idx_type nb,
728  Comp comp)
729 {
730  octave_idx_type k;
731  T *dest;
732  octave_idx_type *idest;
733  int result = -1; /* guilty until proved innocent */
734  octave_idx_type min_gallop = m_ms->m_min_gallop;
735 
736  m_ms->getmemi (na);
737 
738  std::copy (pa, pa + na, m_ms->m_a);
739  std::copy (ipa, ipa + na, m_ms->m_ia);
740  dest = pa; idest = ipa;
741  pa = m_ms->m_a; ipa = m_ms->m_ia;
742 
743  *dest++ = *pb++; *idest++ = *ipb++;
744  --nb;
745  if (nb == 0)
746  goto Succeed;
747  if (na == 1)
748  goto CopyB;
749 
750  for (;;)
751  {
752  octave_idx_type acount = 0; /* # of times A won in a row */
753  octave_idx_type bcount = 0; /* # of times B won in a row */
754 
755  /* Do the straightforward thing until (if ever) one run
756  * appears to win consistently.
757  */
758  for (;;)
759  {
760 
761  if (comp (*pb, *pa))
762  {
763  *dest++ = *pb++; *idest++ = *ipb++;
764  ++bcount;
765  acount = 0;
766  --nb;
767  if (nb == 0)
768  goto Succeed;
769  if (bcount >= min_gallop)
770  break;
771  }
772  else
773  {
774  *dest++ = *pa++; *idest++ = *ipa++;
775  ++acount;
776  bcount = 0;
777  --na;
778  if (na == 1)
779  goto CopyB;
780  if (acount >= min_gallop)
781  break;
782  }
783  }
784 
785  /* One run is winning so consistently that galloping may
786  * be a huge win. So try that, and continue galloping until
787  * (if ever) neither run appears to be winning consistently
788  * anymore.
789  */
790  ++min_gallop;
791  do
792  {
793  min_gallop -= (min_gallop > 1);
794  m_ms->m_min_gallop = min_gallop;
795  k = gallop_right (*pb, pa, na, 0, comp);
796  acount = k;
797  if (k)
798  {
799  if (k < 0)
800  goto Fail;
801  dest = std::copy (pa, pa + k, dest);
802  idest = std::copy (ipa, ipa + k, idest);
803  pa += k; ipa += k;
804  na -= k;
805  if (na == 1)
806  goto CopyB;
807  /* na==0 is impossible now if the comparison
808  * function is consistent, but we can't assume
809  * that it is.
810  */
811  if (na == 0)
812  goto Succeed;
813  }
814  *dest++ = *pb++; *idest++ = *ipb++;
815  --nb;
816  if (nb == 0)
817  goto Succeed;
818 
819  k = gallop_left (*pa, pb, nb, 0, comp);
820  bcount = k;
821  if (k)
822  {
823  if (k < 0)
824  goto Fail;
825  dest = std::copy (pb, pb + k, dest);
826  idest = std::copy (ipb, ipb + k, idest);
827  pb += k; ipb += k;
828  nb -= k;
829  if (nb == 0)
830  goto Succeed;
831  }
832  *dest++ = *pa++; *idest++ = *ipa++;
833  --na;
834  if (na == 1)
835  goto CopyB;
836  }
837  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
838 
839  ++min_gallop; /* penalize it for leaving galloping mode */
840  m_ms->m_min_gallop = min_gallop;
841  }
842 
843 Succeed:
844  result = 0;
845 
846 Fail:
847  if (na)
848  {
849  std::copy (pa, pa + na, dest);
850  std::copy (ipa, ipa + na, idest);
851  }
852  return result;
853 
854 CopyB:
855  /* The last element of pa belongs at the end of the merge. */
856  std::copy (pb, pb + nb, dest);
857  std::copy (ipb, ipb + nb, idest);
858  dest[nb] = *pa;
859  idest[nb] = *ipa;
860 
861  return 0;
862 }
863 
864 /* Merge the na elements starting at pa with the nb elements starting at pb
865  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
866  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
867  * merge, and should have na >= nb. See listsort.txt for more info.
868  * Return 0 if successful, -1 if error.
869  */
870 template <typename T>
871 template <typename Comp>
872 int
874  T *pb, octave_idx_type nb,
875  Comp comp)
876 {
877  octave_idx_type k;
878  T *dest;
879  int result = -1; /* guilty until proved innocent */
880  T *basea, *baseb;
881  octave_idx_type min_gallop = m_ms->m_min_gallop;
882 
883  m_ms->getmem (nb);
884 
885  dest = pb + nb - 1;
886  std::copy (pb, pb + nb, m_ms->m_a);
887  basea = pa;
888  baseb = m_ms->m_a;
889  pb = m_ms->m_a + nb - 1;
890  pa += na - 1;
891 
892  *dest-- = *pa--;
893  --na;
894  if (na == 0)
895  goto Succeed;
896  if (nb == 1)
897  goto CopyA;
898 
899  for (;;)
900  {
901  octave_idx_type acount = 0; /* # of times A won in a row */
902  octave_idx_type bcount = 0; /* # of times B won in a row */
903 
904  /* Do the straightforward thing until (if ever) one run
905  * appears to win consistently.
906  */
907  for (;;)
908  {
909  if (comp (*pb, *pa))
910  {
911  *dest-- = *pa--;
912  ++acount;
913  bcount = 0;
914  --na;
915  if (na == 0)
916  goto Succeed;
917  if (acount >= min_gallop)
918  break;
919  }
920  else
921  {
922  *dest-- = *pb--;
923  ++bcount;
924  acount = 0;
925  --nb;
926  if (nb == 1)
927  goto CopyA;
928  if (bcount >= min_gallop)
929  break;
930  }
931  }
932 
933  /* One run is winning so consistently that galloping may
934  * be a huge win. So try that, and continue galloping until
935  * (if ever) neither run appears to be winning consistently
936  * anymore.
937  */
938  ++min_gallop;
939  do
940  {
941  min_gallop -= (min_gallop > 1);
942  m_ms->m_min_gallop = min_gallop;
943  k = gallop_right (*pb, basea, na, na-1, comp);
944  if (k < 0)
945  goto Fail;
946  k = na - k;
947  acount = k;
948  if (k)
949  {
950  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
951  pa -= k;
952  na -= k;
953  if (na == 0)
954  goto Succeed;
955  }
956  *dest-- = *pb--;
957  --nb;
958  if (nb == 1)
959  goto CopyA;
960 
961  k = gallop_left (*pa, baseb, nb, nb-1, comp);
962  if (k < 0)
963  goto Fail;
964  k = nb - k;
965  bcount = k;
966  if (k)
967  {
968  dest -= k;
969  pb -= k;
970  std::copy (pb+1, pb+1 + k, dest+1);
971  nb -= k;
972  if (nb == 1)
973  goto CopyA;
974  /* nb==0 is impossible now if the comparison
975  * function is consistent, but we can't assume
976  * that it is.
977  */
978  if (nb == 0)
979  goto Succeed;
980  }
981  *dest-- = *pa--;
982  --na;
983  if (na == 0)
984  goto Succeed;
985  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
986  ++min_gallop; /* penalize it for leaving galloping mode */
987  m_ms->m_min_gallop = min_gallop;
988  }
989 
990 Succeed:
991  result = 0;
992 
993 Fail:
994  if (nb)
995  std::copy (baseb, baseb + nb, dest-(nb-1));
996  return result;
997 
998 CopyA:
999  /* The first element of pb belongs at the front of the merge. */
1000  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1001  pa -= na;
1002  *dest = *pb;
1003 
1004  return 0;
1005 }
1006 
1007 template <typename T>
1008 template <typename Comp>
1009 int
1011  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1012  Comp comp)
1013 {
1014  octave_idx_type k;
1015  T *dest;
1016  octave_idx_type *idest;
1017  int result = -1; /* guilty until proved innocent */
1018  T *basea, *baseb;
1019  octave_idx_type *ibaseb;
1020  octave_idx_type min_gallop = m_ms->m_min_gallop;
1021 
1022  m_ms->getmemi (nb);
1023 
1024  dest = pb + nb - 1;
1025  idest = ipb + nb - 1;
1026  std::copy (pb, pb + nb, m_ms->m_a);
1027  std::copy (ipb, ipb + nb, m_ms->m_ia);
1028  basea = pa;
1029  baseb = m_ms->m_a; ibaseb = m_ms->m_ia;
1030  pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1;
1031  pa += na - 1; ipa += na - 1;
1032 
1033  *dest-- = *pa--; *idest-- = *ipa--;
1034  --na;
1035  if (na == 0)
1036  goto Succeed;
1037  if (nb == 1)
1038  goto CopyA;
1039 
1040  for (;;)
1041  {
1042  octave_idx_type acount = 0; /* # of times A won in a row */
1043  octave_idx_type bcount = 0; /* # of times B won in a row */
1044 
1045  /* Do the straightforward thing until (if ever) one run
1046  * appears to win consistently.
1047  */
1048  for (;;)
1049  {
1050  if (comp (*pb, *pa))
1051  {
1052  *dest-- = *pa--; *idest-- = *ipa--;
1053  ++acount;
1054  bcount = 0;
1055  --na;
1056  if (na == 0)
1057  goto Succeed;
1058  if (acount >= min_gallop)
1059  break;
1060  }
1061  else
1062  {
1063  *dest-- = *pb--; *idest-- = *ipb--;
1064  ++bcount;
1065  acount = 0;
1066  --nb;
1067  if (nb == 1)
1068  goto CopyA;
1069  if (bcount >= min_gallop)
1070  break;
1071  }
1072  }
1073 
1074  /* One run is winning so consistently that galloping may
1075  * be a huge win. So try that, and continue galloping until
1076  * (if ever) neither run appears to be winning consistently
1077  * anymore.
1078  */
1079  ++min_gallop;
1080  do
1081  {
1082  min_gallop -= (min_gallop > 1);
1083  m_ms->m_min_gallop = min_gallop;
1084  k = gallop_right (*pb, basea, na, na-1, comp);
1085  if (k < 0)
1086  goto Fail;
1087  k = na - k;
1088  acount = k;
1089  if (k)
1090  {
1091  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1092  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1093  pa -= k; ipa -= k;
1094  na -= k;
1095  if (na == 0)
1096  goto Succeed;
1097  }
1098  *dest-- = *pb--; *idest-- = *ipb--;
1099  --nb;
1100  if (nb == 1)
1101  goto CopyA;
1102 
1103  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1104  if (k < 0)
1105  goto Fail;
1106  k = nb - k;
1107  bcount = k;
1108  if (k)
1109  {
1110  dest -= k; idest -= k;
1111  pb -= k; ipb -= k;
1112  std::copy (pb+1, pb+1 + k, dest+1);
1113  std::copy (ipb+1, ipb+1 + k, idest+1);
1114  nb -= k;
1115  if (nb == 1)
1116  goto CopyA;
1117  /* nb==0 is impossible now if the comparison
1118  * function is consistent, but we can't assume
1119  * that it is.
1120  */
1121  if (nb == 0)
1122  goto Succeed;
1123  }
1124  *dest-- = *pa--; *idest-- = *ipa--;
1125  --na;
1126  if (na == 0)
1127  goto Succeed;
1128  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1129  ++min_gallop; /* penalize it for leaving galloping mode */
1130  m_ms->m_min_gallop = min_gallop;
1131  }
1132 
1133 Succeed:
1134  result = 0;
1135 
1136 Fail:
1137  if (nb)
1138  {
1139  std::copy (baseb, baseb + nb, dest-(nb-1));
1140  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1141  }
1142  return result;
1143 
1144 CopyA:
1145  /* The first element of pb belongs at the front of the merge. */
1146  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1147  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1148  pa -= na; ipa -= na;
1149  *dest = *pb; *idest = *ipb;
1150 
1151  return 0;
1152 }
1153 
1154 /* Merge the two runs at stack indices i and i+1.
1155  * Returns 0 on success, -1 on error.
1156  */
1157 template <typename T>
1158 template <typename Comp>
1159 int
1161  Comp comp)
1162 {
1163  T *pa, *pb;
1164  octave_idx_type na, nb;
1165  octave_idx_type k;
1166 
1167  pa = data + m_ms->m_pending[i].m_base;
1168  na = m_ms->m_pending[i].m_len;
1169  pb = data + m_ms->m_pending[i+1].m_base;
1170  nb = m_ms->m_pending[i+1].m_len;
1171 
1172  /* Record the length of the combined runs; if i is the 3rd-last
1173  * run now, also slide over the last run (which isn't involved
1174  * in this merge). The current run i+1 goes away in any case.
1175  */
1176  m_ms->m_pending[i].m_len = na + nb;
1177  if (i == m_ms->m_n - 3)
1178  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1179  m_ms->m_n--;
1180 
1181  /* Where does b start in a? Elements in a before that can be
1182  * ignored (already in place).
1183  */
1184  k = gallop_right (*pb, pa, na, 0, comp);
1185  if (k < 0)
1186  return -1;
1187  pa += k;
1188  na -= k;
1189  if (na == 0)
1190  return 0;
1191 
1192  /* Where does a end in b? Elements in b after that can be
1193  * ignored (already in place).
1194  */
1195  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1196  if (nb <= 0)
1197  return nb;
1198 
1199  /* Merge what remains of the runs, using a temp array with
1200  * min (na, nb) elements.
1201  */
1202  if (na <= nb)
1203  return merge_lo (pa, na, pb, nb, comp);
1204  else
1205  return merge_hi (pa, na, pb, nb, comp);
1206 }
1207 
1208 template <typename T>
1209 template <typename Comp>
1210 int
1212  Comp comp)
1213 {
1214  T *pa, *pb;
1215  octave_idx_type *ipa, *ipb;
1216  octave_idx_type na, nb;
1217  octave_idx_type k;
1218 
1219  pa = data + m_ms->m_pending[i].m_base;
1220  ipa = idx + m_ms->m_pending[i].m_base;
1221  na = m_ms->m_pending[i].m_len;
1222  pb = data + m_ms->m_pending[i+1].m_base;
1223  ipb = idx + m_ms->m_pending[i+1].m_base;
1224  nb = m_ms->m_pending[i+1].m_len;
1225 
1226  /* Record the length of the combined runs; if i is the 3rd-last
1227  * run now, also slide over the last run (which isn't involved
1228  * in this merge). The current run i+1 goes away in any case.
1229  */
1230  m_ms->m_pending[i].m_len = na + nb;
1231  if (i == m_ms->m_n - 3)
1232  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1233  m_ms->m_n--;
1234 
1235  /* Where does b start in a? Elements in a before that can be
1236  * ignored (already in place).
1237  */
1238  k = gallop_right (*pb, pa, na, 0, comp);
1239  if (k < 0)
1240  return -1;
1241  pa += k; ipa += k;
1242  na -= k;
1243  if (na == 0)
1244  return 0;
1245 
1246  /* Where does a end in b? Elements in b after that can be
1247  * ignored (already in place).
1248  */
1249  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1250  if (nb <= 0)
1251  return nb;
1252 
1253  /* Merge what remains of the runs, using a temp array with
1254  * min (na, nb) elements.
1255  */
1256  if (na <= nb)
1257  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1258  else
1259  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1260 }
1261 
1262 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1263  * until the stack invariants are re-established:
1264  *
1265  * 1. len[-3] > len[-2] + len[-1]
1266  * 2. len[-2] > len[-1]
1267  *
1268  * See listsort.txt for more info.
1269  *
1270  * Returns 0 on success, -1 on error.
1271  */
1272 template <typename T>
1273 template <typename Comp>
1274 int
1276 {
1277  struct s_slice *p = m_ms->m_pending;
1278 
1279  while (m_ms->m_n > 1)
1280  {
1281  octave_idx_type n = m_ms->m_n - 2;
1282  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1283  {
1284  if (p[n-1].m_len < p[n+1].m_len)
1285  --n;
1286  if (merge_at (n, data, comp) < 0)
1287  return -1;
1288  }
1289  else if (p[n].m_len <= p[n+1].m_len)
1290  {
1291  if (merge_at (n, data, comp) < 0)
1292  return -1;
1293  }
1294  else
1295  break;
1296  }
1297 
1298  return 0;
1299 }
1300 
1301 template <typename T>
1302 template <typename Comp>
1303 int
1305 {
1306  struct s_slice *p = m_ms->m_pending;
1307 
1308  while (m_ms->m_n > 1)
1309  {
1310  octave_idx_type n = m_ms->m_n - 2;
1311  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1312  {
1313  if (p[n-1].m_len < p[n+1].m_len)
1314  --n;
1315  if (merge_at (n, data, idx, comp) < 0)
1316  return -1;
1317  }
1318  else if (p[n].m_len <= p[n+1].m_len)
1319  {
1320  if (merge_at (n, data, idx, comp) < 0)
1321  return -1;
1322  }
1323  else
1324  break;
1325  }
1326 
1327  return 0;
1328 }
1329 
1330 /* Regardless of invariants, merge all runs on the stack until only one
1331  * remains. This is used at the end of the mergesort.
1332  *
1333  * Returns 0 on success, -1 on error.
1334  */
1335 template <typename T>
1336 template <typename Comp>
1337 int
1339 {
1340  struct s_slice *p = m_ms->m_pending;
1341 
1342  while (m_ms->m_n > 1)
1343  {
1344  octave_idx_type n = m_ms->m_n - 2;
1345  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1346  --n;
1347  if (merge_at (n, data, comp) < 0)
1348  return -1;
1349  }
1350 
1351  return 0;
1352 }
1353 
1354 template <typename T>
1355 template <typename Comp>
1356 int
1358 {
1359  struct s_slice *p = m_ms->m_pending;
1360 
1361  while (m_ms->m_n > 1)
1362  {
1363  octave_idx_type n = m_ms->m_n - 2;
1364  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1365  --n;
1366  if (merge_at (n, data, idx, comp) < 0)
1367  return -1;
1368  }
1369 
1370  return 0;
1371 }
1372 
1373 /* Compute a good value for the minimum run length; natural runs shorter
1374  * than this are boosted artificially via binary insertion.
1375  *
1376  * If n < 64, return n (it's too small to bother with fancy stuff).
1377  * Else if n is an exact power of 2, return 32.
1378  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1379  * strictly less than, an exact power of 2.
1380  *
1381  * See listsort.txt for more info.
1382  */
1383 template <typename T>
1386 {
1387  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1388 
1389  while (n >= 64)
1390  {
1391  r |= n & 1;
1392  n >>= 1;
1393  }
1394 
1395  return n + r;
1396 }
1397 
1398 template <typename T>
1399 template <typename Comp>
1400 void
1401 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1402 {
1403  /* Re-initialize the Mergestate as this might be the second time called */
1404  if (! m_ms) m_ms = new MergeState;
1405 
1406  m_ms->reset ();
1408 
1409  if (nel > 1)
1410  {
1411  octave_idx_type nremaining = nel;
1412  octave_idx_type lo = 0;
1413 
1414  /* March over the array once, left to right, finding natural runs,
1415  * and extending short natural runs to minrun elements.
1416  */
1417  octave_idx_type minrun = merge_compute_minrun (nremaining);
1418  do
1419  {
1420  bool descending;
1422 
1423  /* Identify next run. */
1424  n = count_run (data + lo, nremaining, descending, comp);
1425  if (n < 0)
1426  return;
1427  if (descending)
1428  std::reverse (data + lo, data + lo + n);
1429  /* If short, extend to min (minrun, nremaining). */
1430  if (n < minrun)
1431  {
1432  const octave_idx_type force = (nremaining <= minrun ? nremaining
1433  : minrun);
1434  binarysort (data + lo, force, n, comp);
1435  n = force;
1436  }
1437  /* Push run onto m_pending-runs stack, and maybe merge. */
1438  assert (m_ms->m_n < MAX_MERGE_PENDING);
1439  m_ms->m_pending[m_ms->m_n].m_base = lo;
1440  m_ms->m_pending[m_ms->m_n].m_len = n;
1441  m_ms->m_n++;
1442  if (merge_collapse (data, comp) < 0)
1443  return;
1444  /* Advance to find next run. */
1445  lo += n;
1446  nremaining -= n;
1447  }
1448  while (nremaining);
1449 
1450  merge_force_collapse (data, comp);
1451  }
1452 }
1453 
1454 template <typename T>
1455 template <typename Comp>
1456 void
1458  Comp comp)
1459 {
1460  /* Re-initialize the Mergestate as this might be the second time called */
1461  if (! m_ms) m_ms = new MergeState;
1462 
1463  m_ms->reset ();
1465 
1466  if (nel > 1)
1467  {
1468  octave_idx_type nremaining = nel;
1469  octave_idx_type lo = 0;
1470 
1471  /* March over the array once, left to right, finding natural runs,
1472  * and extending short natural runs to minrun elements.
1473  */
1474  octave_idx_type minrun = merge_compute_minrun (nremaining);
1475  do
1476  {
1477  bool descending;
1479 
1480  /* Identify next run. */
1481  n = count_run (data + lo, nremaining, descending, comp);
1482  if (n < 0)
1483  return;
1484  if (descending)
1485  {
1486  std::reverse (data + lo, data + lo + n);
1487  std::reverse (idx + lo, idx + lo + n);
1488  }
1489  /* If short, extend to min (minrun, nremaining). */
1490  if (n < minrun)
1491  {
1492  const octave_idx_type force = (nremaining <= minrun ? nremaining
1493  : minrun);
1494  binarysort (data + lo, idx + lo, force, n, comp);
1495  n = force;
1496  }
1497  /* Push run onto m_pending-runs stack, and maybe merge. */
1498  assert (m_ms->m_n < MAX_MERGE_PENDING);
1499  m_ms->m_pending[m_ms->m_n].m_base = lo;
1500  m_ms->m_pending[m_ms->m_n].m_len = n;
1501  m_ms->m_n++;
1502  if (merge_collapse (data, idx, comp) < 0)
1503  return;
1504  /* Advance to find next run. */
1505  lo += n;
1506  nremaining -= n;
1507  }
1508  while (nremaining);
1509 
1510  merge_force_collapse (data, idx, comp);
1511  }
1512 }
1513 
1514 template <typename T>
1515 using compare_fcn_ptr = bool (*) (typename ref_param<T>::type,
1516  typename ref_param<T>::type);
1517 
1518 template <typename T>
1519 void
1521 {
1522 #if defined (INLINE_ASCENDING_SORT)
1523  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1524  sort (data, nel, std::less<T> ());
1525  else
1526 #endif
1527 #if defined (INLINE_DESCENDING_SORT)
1528  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1529  sort (data, nel, std::greater<T> ());
1530  else
1531 #endif
1532  if (m_compare)
1533  sort (data, nel, m_compare);
1534 }
1535 
1536 template <typename T>
1537 void
1539 {
1540 #if defined (INLINE_ASCENDING_SORT)
1541  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1542  sort (data, idx, nel, std::less<T> ());
1543  else
1544 #endif
1545 #if defined (INLINE_DESCENDING_SORT)
1546  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1547  sort (data, idx, nel, std::greater<T> ());
1548  else
1549 #endif
1550  if (m_compare)
1551  sort (data, idx, nel, m_compare);
1552 }
1553 
1554 template <typename T>
1555 template <typename Comp>
1556 bool
1557 octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp)
1558 {
1559  const T *end = data + nel;
1560  if (data != end)
1561  {
1562  const T *next = data;
1563  while (++next != end)
1564  {
1565  if (comp (*next, *data))
1566  break;
1567  data = next;
1568  }
1569  data = next;
1570  }
1571 
1572  return data == end;
1573 }
1574 
1575 template <typename T>
1576 bool
1578 {
1579  bool retval = false;
1580 #if defined (INLINE_ASCENDING_SORT)
1581  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1582  retval = issorted (data, nel, std::less<T> ());
1583  else
1584 #endif
1585 #if defined (INLINE_DESCENDING_SORT)
1586  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1587  retval = issorted (data, nel, std::greater<T> ());
1588  else
1589 #endif
1590  if (m_compare)
1591  retval = issorted (data, nel, m_compare);
1592 
1593  return retval;
1594 }
1595 
1597 {
1598 public:
1600  : col (c), ofs (o), nel (n) { }
1601  //--------
1603 };
1604 
1605 template <typename T>
1606 template <typename Comp>
1607 void
1609  octave_idx_type rows, octave_idx_type cols,
1610  Comp comp)
1611 {
1612  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1613  for (octave_idx_type i = 0; i < rows; i++)
1614  idx[i] = i;
1615 
1616  if (cols == 0 || rows <= 1)
1617  return;
1618 
1619  // This is a breadth-first traversal.
1620  typedef sortrows_run_t run_t;
1621  std::stack<run_t> runs;
1622 
1623  runs.push (run_t (0, 0, rows));
1624 
1625  while (! runs.empty ())
1626  {
1627  octave_idx_type col = runs.top ().col;
1628  octave_idx_type ofs = runs.top ().ofs;
1629  octave_idx_type nel = runs.top ().nel;
1630  runs.pop ();
1631  assert (nel > 1);
1632 
1633  T *lbuf = buf + ofs;
1634  const T *ldata = data + rows*col;
1635  octave_idx_type *lidx = idx + ofs;
1636 
1637  // Gather.
1638  for (octave_idx_type i = 0; i < nel; i++)
1639  lbuf[i] = ldata[lidx[i]];
1640 
1641  // Sort.
1642  sort (lbuf, lidx, nel, comp);
1643 
1644  // Identify constant runs and schedule subsorts.
1645  if (col < cols-1)
1646  {
1647  octave_idx_type lst = 0;
1648  for (octave_idx_type i = 0; i < nel; i++)
1649  {
1650  if (comp (lbuf[lst], lbuf[i]))
1651  {
1652  if (i > lst + 1)
1653  runs.push (run_t (col+1, ofs + lst, i - lst));
1654  lst = i;
1655  }
1656  }
1657  if (nel > lst + 1)
1658  runs.push (run_t (col+1, ofs + lst, nel - lst));
1659  }
1660  }
1661 }
1662 
1663 template <typename T>
1664 void
1666  octave_idx_type rows, octave_idx_type cols)
1667 {
1668 #if defined (INLINE_ASCENDING_SORT)
1669  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1670  sort_rows (data, idx, rows, cols, std::less<T> ());
1671  else
1672 #endif
1673 #if defined (INLINE_DESCENDING_SORT)
1674  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1675  sort_rows (data, idx, rows, cols, std::greater<T> ());
1676  else
1677 #endif
1678  if (m_compare)
1679  sort_rows (data, idx, rows, cols, m_compare);
1680 }
1681 
1682 template <typename T>
1683 template <typename Comp>
1684 bool
1686  octave_idx_type cols, Comp comp)
1687 {
1688  if (rows <= 1 || cols == 0)
1689  return true;
1690 
1691  // This is a breadth-first traversal.
1692  const T *lastrow = data + rows*(cols - 1);
1693  typedef std::pair<const T *, octave_idx_type> run_t;
1694  std::stack<run_t> runs;
1695 
1696  bool sorted = true;
1697  runs.push (run_t (data, rows));
1698  while (sorted && ! runs.empty ())
1699  {
1700  const T *lo = runs.top ().first;
1701  octave_idx_type n = runs.top ().second;
1702  runs.pop ();
1703  if (lo < lastrow)
1704  {
1705  // Not the final column.
1706  assert (n > 1);
1707  const T *hi = lo + n;
1708  const T *lst = lo;
1709  for (lo++; lo < hi; lo++)
1710  {
1711  if (comp (*lst, *lo))
1712  {
1713  if (lo > lst + 1)
1714  runs.push (run_t (lst + rows, lo - lst));
1715  lst = lo;
1716  }
1717  else if (comp (*lo, *lst))
1718  break;
1719 
1720  }
1721  if (lo == hi)
1722  {
1723  if (lo > lst + 1)
1724  runs.push (run_t (lst + rows, lo - lst));
1725  }
1726  else
1727  {
1728  sorted = false;
1729  break;
1730  }
1731  }
1732  else
1733  // The final column - use fast code.
1734  sorted = issorted (lo, n, comp);
1735  }
1736 
1737  return sorted;
1738 }
1739 
1740 template <typename T>
1741 bool
1743  octave_idx_type cols)
1744 {
1745  bool retval = false;
1746 #if defined (INLINE_ASCENDING_SORT)
1747  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1748  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1749  else
1750 #endif
1751 #if defined (INLINE_DESCENDING_SORT)
1752  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1753  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1754  else
1755 #endif
1756  if (m_compare)
1757  retval = is_sorted_rows (data, rows, cols, m_compare);
1758 
1759  return retval;
1760 }
1761 
1762 // The simple binary lookup.
1763 
1764 template <typename T>
1765 template <typename Comp>
1768  const T& value, Comp comp)
1769 {
1770  octave_idx_type lo = 0;
1771  octave_idx_type hi = nel;
1772 
1773  while (lo < hi)
1774  {
1775  octave_idx_type mid = lo + ((hi-lo) >> 1);
1776  if (comp (value, data[mid]))
1777  hi = mid;
1778  else
1779  lo = mid + 1;
1780  }
1781 
1782  return lo;
1783 }
1784 
1785 template <typename T>
1788  const T& value)
1789 {
1790  octave_idx_type retval = 0;
1791 #if defined (INLINE_ASCENDING_SORT)
1792  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1793  retval = lookup (data, nel, value, std::less<T> ());
1794  else
1795 #endif
1796 #if defined (INLINE_DESCENDING_SORT)
1797  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1798  retval = lookup (data, nel, value, std::greater<T> ());
1799  else
1800 #endif
1801  if (m_compare)
1802  retval = lookup (data, nel, value, m_compare);
1803 
1804  return retval;
1805 }
1806 
1807 template <typename T>
1808 template <typename Comp>
1809 void
1811  const T *values, octave_idx_type nvalues,
1812  octave_idx_type *idx, Comp comp)
1813 {
1814  // Use a sequence of binary lookups.
1815  // FIXME: Can this be sped up generally? The sorted merge case is dealt with
1816  // elsewhere.
1817  for (octave_idx_type j = 0; j < nvalues; j++)
1818  idx[j] = lookup (data, nel, values[j], comp);
1819 }
1820 
1821 template <typename T>
1822 void
1824  const T *values, octave_idx_type nvalues,
1825  octave_idx_type *idx)
1826 {
1827 #if defined (INLINE_ASCENDING_SORT)
1828  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1829  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1830  else
1831 #endif
1832 #if defined (INLINE_DESCENDING_SORT)
1833  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1834  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1835  else
1836 #endif
1837  if (m_compare)
1838  lookup (data, nel, values, nvalues, idx, m_compare);
1839 }
1840 
1841 template <typename T>
1842 template <typename Comp>
1843 void
1845  const T *values, octave_idx_type nvalues,
1846  octave_idx_type *idx, bool rev, Comp comp)
1847 {
1848  if (rev)
1849  {
1850  octave_idx_type i = 0;
1851  octave_idx_type j = nvalues - 1;
1852 
1853  if (nvalues > 0 && nel > 0)
1854  {
1855  while (true)
1856  {
1857  if (comp (values[j], data[i]))
1858  {
1859  idx[j] = i;
1860  if (--j < 0)
1861  break;
1862  }
1863  else if (++i == nel)
1864  break;
1865  }
1866  }
1867 
1868  for (; j >= 0; j--)
1869  idx[j] = i;
1870  }
1871  else
1872  {
1873  octave_idx_type i = 0;
1874  octave_idx_type j = 0;
1875 
1876  if (nvalues > 0 && nel > 0)
1877  {
1878  while (true)
1879  {
1880  if (comp (values[j], data[i]))
1881  {
1882  idx[j] = i;
1883  if (++j == nvalues)
1884  break;
1885  }
1886  else if (++i == nel)
1887  break;
1888  }
1889  }
1890 
1891  for (; j != nvalues; j++)
1892  idx[j] = i;
1893  }
1894 }
1895 
1896 template <typename T>
1897 void
1899  const T *values, octave_idx_type nvalues,
1900  octave_idx_type *idx, bool rev)
1901 {
1902 #if defined (INLINE_ASCENDING_SORT)
1903  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1904  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1905  else
1906 #endif
1907 #if defined (INLINE_DESCENDING_SORT)
1908  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1909  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1910  else
1911 #endif
1912  if (m_compare)
1913  lookup_sorted (data, nel, values, nvalues, idx, rev, m_compare);
1914 }
1915 
1916 template <typename T>
1917 template <typename Comp>
1918 void
1921  Comp comp)
1922 {
1923  // Simply wrap the STL algorithms.
1924  // FIXME: this will fail if we attempt to inline <,> for Complex.
1925  if (up == lo+1)
1926  std::nth_element (data, data + lo, data + nel, comp);
1927  else if (lo == 0)
1928  std::partial_sort (data, data + up, data + nel, comp);
1929  else
1930  {
1931  std::nth_element (data, data + lo, data + nel, comp);
1932  if (up == lo + 2)
1933  {
1934  // Finding two subsequent elements.
1935  std::swap (data[lo+1],
1936  *std::min_element (data + lo + 1, data + nel, comp));
1937  }
1938  else
1939  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1940  }
1941 }
1942 
1943 template <typename T>
1944 void
1947 {
1948  if (up < 0)
1949  up = lo + 1;
1950 
1951 #if defined (INLINE_ASCENDING_SORT)
1952  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1953  nth_element (data, nel, lo, up, std::less<T> ());
1954  else
1955 #endif
1956 #if defined (INLINE_DESCENDING_SORT)
1957  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1958  nth_element (data, nel, lo, up, std::greater<T> ());
1959  else
1960 #endif
1961  if (m_compare)
1962  nth_element (data, nel, lo, up, m_compare);
1963 }
1964 
1965 template <typename T>
1966 bool
1968  typename ref_param<T>::type y)
1969 {
1970  return x < y;
1971 }
1972 
1973 template <typename T>
1974 bool
1976  typename ref_param<T>::type y)
1977 {
1978  return x > y;
1979 }
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
octave_sort(void)
Definition: oct-sort.cc:122
void set_compare(const compare_fcn_type &comp)
Definition: oct-sort.h:121
static const int MERGESTATE_TEMP_SIZE
Definition: oct-sort.h:183
int merge_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1275
~octave_sort(void)
Definition: oct-sort.cc:132
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1665
octave_idx_type gallop_right(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:419
int merge_hi(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:873
octave_idx_type count_run(T *lo, octave_idx_type n, bool &descending, Comp comp)
Definition: oct-sort.cc:264
compare_fcn_type m_compare
Definition: oct-sort.h:247
octave_idx_type gallop_left(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:324
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1975
bool issorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1577
std::function< bool(typename ref_param< T >::type, typename ref_param< T >::type)> compare_fcn_type
Definition: oct-sort.h:107
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1967
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1787
MergeState * m_ms
Definition: oct-sort.h:249
int merge_at(octave_idx_type i, T *data, Comp comp)
Definition: oct-sort.cc:1160
octave_idx_type merge_compute_minrun(octave_idx_type n)
Definition: oct-sort.cc:1385
void binarysort(T *data, octave_idx_type nel, octave_idx_type start, Comp comp)
Definition: oct-sort.cc:152
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1520
static const int MIN_GALLOP
Definition: oct-sort.h:180
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1945
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1742
static const int MAX_MERGE_PENDING
Definition: oct-sort.h:176
int merge_force_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1338
int merge_lo(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:591
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1898
if_then_else< is_class_type< T >::no, T, T const & >::result type
Definition: lo-traits.h:121
F77_RET_T const F77_DBLE * x
T octave_idx_type m
Definition: mx-inlines.cc:773
octave_idx_type n
Definition: mx-inlines.cc:753
T * r
Definition: mx-inlines.cc:773
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
bool(*)(typename ref_param< T >::type, typename ref_param< T >::type) compare_fcn_ptr
Definition: oct-sort.cc:1516
static octave_idx_type roundupsize(std::size_t n)
Definition: oct-sort.cc:498
sortmode
Definition: oct-sort.h:97
@ ASCENDING
Definition: oct-sort.h:97
@ DESCENDING
Definition: oct-sort.h:97
static uint32_t * next
Definition: randmtzig.cc:192
void getmem(octave_idx_type need)
Definition: oct-sort.cc:547
struct s_slice m_pending[MAX_MERGE_PENDING]
Definition: oct-sort.h:244
octave_idx_type m_n
Definition: oct-sort.h:243
octave_idx_type m_alloced
Definition: oct-sort.h:233
void getmemi(octave_idx_type need)
Definition: oct-sort.cc:565
octave_idx_type * m_ia
Definition: oct-sort.h:232
octave_idx_type m_min_gallop
Definition: oct-sort.h:227
octave_idx_type m_base
Definition: oct-sort.h:195
octave_idx_type m_len
Definition: oct-sort.h:195
octave_idx_type col
Definition: oct-sort.cc:1602
sortrows_run_t(octave_idx_type c, octave_idx_type o, octave_idx_type n)
Definition: oct-sort.cc:1599