GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-sort.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2003-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 // Code stolen in large part from Python's, listobject.c, which itself had
25 // no license header. However, thanks to Tim Peters for the parts of the
26 // code I ripped-off.
27 //
28 // As required in the Python license the short description of the changes
29 // made are
30 //
31 // * convert the sorting code in listobject.cc into a generic class,
32 // replacing PyObject* with the type of the class T.
33 //
34 // * replaced usages of malloc, free, memcpy and memmove by standard C++
35 // new [], delete [] and std::copy and std::copy_backward. Note that replacing
36 // memmove by std::copy is possible if the destination starts before the source.
37 // If not, std::copy_backward needs to be used.
38 //
39 // * templatize comparison operator in most methods, provide possible dispatch
40 //
41 // * duplicate methods to avoid by-the-way indexed sorting
42 //
43 // * add methods for verifying sortedness of array
44 //
45 // * row sorting via breadth-first tree subsorting
46 //
47 // * binary lookup and sequential binary lookup optimized for dense downsampling.
48 //
49 // * NOTE: the memory management routines rely on the fact that delete [] silently
50 // ignores null pointers. Don't gripe about the missing checks - they're there.
51 //
52 //
53 // The Python license is
54 //
55 // PSF LICENSE AGREEMENT FOR PYTHON 2.3
56 // --------------------------------------
57 //
58 // 1. This LICENSE AGREEMENT is between the Python Software Foundation
59 // ("PSF"), and the Individual or Organization ("Licensee") accessing and
60 // otherwise using Python 2.3 software in source or binary form and its
61 // associated documentation.
62 //
63 // 2. Subject to the terms and conditions of this License Agreement, PSF
64 // hereby grants Licensee a nonexclusive, royalty-free, world-wide
65 // license to reproduce, analyze, test, perform and/or display publicly,
66 // prepare derivative works, distribute, and otherwise use Python 2.3
67 // alone or in any derivative version, provided, however, that PSF's
68 // License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
69 // 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
70 // retained in Python 2.3 alone or in any derivative version prepared by
71 // Licensee.
72 //
73 // 3. In the event Licensee prepares a derivative work that is based on
74 // or incorporates Python 2.3 or any part thereof, and wants to make
75 // the derivative work available to others as provided herein, then
76 // Licensee hereby agrees to include in any such work a brief summary of
77 // the changes made to Python 2.3.
78 //
79 // 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
80 // basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
81 // IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
82 // DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
83 // FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
84 // INFRINGE ANY THIRD PARTY RIGHTS.
85 //
86 // 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
87 // 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
88 // A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
89 // OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
90 //
91 // 6. This License Agreement will automatically terminate upon a material
92 // breach of its terms and conditions.
93 //
94 // 7. Nothing in this License Agreement shall be deemed to create any
95 // relationship of agency, partnership, or joint venture between PSF and
96 // Licensee. This License Agreement does not grant permission to use PSF
97 // trademarks or trade name in a trademark sense to endorse or promote
98 // products or services of Licensee, or any third party.
99 //
100 // 8. By copying, installing or otherwise using Python 2.3, Licensee
101 // agrees to be bound by the terms and conditions of this License
102 // Agreement.
103 //
104 ////////////////////////////////////////////////////////////////////////
105 
106 // This file should not include config.h. It is only included in other
107 // C++ source files that should have included config.h before including
108 // this file.
109 
110 #include <cassert>
111 #include <algorithm>
112 #include <cstring>
113 #include <stack>
114 
115 #include "lo-error.h"
116 #include "lo-mappers.h"
117 #include "quit.h"
118 #include "oct-sort.h"
119 #include "oct-locbuf.h"
120 
121 template <typename T>
123  m_compare (ascending_compare), m_ms (nullptr)
124 { }
125 
126 template <typename T>
128  : m_compare (comp), m_ms (nullptr)
129 { }
130 
131 template <typename T>
133 {
134  delete m_ms;
135 }
136 
137 template <typename T>
138 void
140 {
141  if (mode == ASCENDING)
142  m_compare = ascending_compare;
143  else if (mode == DESCENDING)
144  m_compare = descending_compare;
145  else
146  m_compare = compare_fcn_type ();
147 }
148 
149 template <typename T>
150 template <typename Comp>
151 void
153  octave_idx_type start, Comp comp)
154 {
155  if (start == 0)
156  ++start;
157 
158  for (; start < nel; ++start)
159  {
160  /* set l to where *start belongs */
161  octave_idx_type l = 0;
162  octave_idx_type r = start;
163  T pivot = data[start];
164  /* Invariants:
165  * pivot >= all in [lo, l).
166  * pivot < all in [r, start).
167  * The second is vacuously true at the start.
168  */
169  do
170  {
171  octave_idx_type p = l + ((r - l) >> 1);
172  if (comp (pivot, data[p]))
173  r = p;
174  else
175  l = p+1;
176  }
177  while (l < r);
178  /* The invariants still hold, so pivot >= all in [lo, l) and
179  pivot < all in [l, start), so pivot belongs at l. Note
180  that if there are elements equal to pivot, l points to the
181  first slot after them -- that's why this sort is stable.
182  Slide over to make room.
183  Caution: using memmove is much slower under MSVC 5;
184  we're not usually moving many slots. */
185  // NOTE: using swap and going upwards appears to be faster.
186  for (octave_idx_type p = l; p < start; p++)
187  std::swap (pivot, data[p]);
188  data[start] = pivot;
189  }
190 
191  return;
192 }
193 
194 template <typename T>
195 template <typename Comp>
196 void
198  octave_idx_type start, Comp comp)
199 {
200  if (start == 0)
201  ++start;
202 
203  for (; start < nel; ++start)
204  {
205  /* set l to where *start belongs */
206  octave_idx_type l = 0;
207  octave_idx_type r = start;
208  T pivot = data[start];
209  /* Invariants:
210  * pivot >= all in [lo, l).
211  * pivot < all in [r, start).
212  * The second is vacuously true at the start.
213  */
214  do
215  {
216  octave_idx_type p = l + ((r - l) >> 1);
217  if (comp (pivot, data[p]))
218  r = p;
219  else
220  l = p+1;
221  }
222  while (l < r);
223  /* The invariants still hold, so pivot >= all in [lo, l) and
224  pivot < all in [l, start), so pivot belongs at l. Note
225  that if there are elements equal to pivot, l points to the
226  first slot after them -- that's why this sort is stable.
227  Slide over to make room.
228  Caution: using memmove is much slower under MSVC 5;
229  we're not usually moving many slots. */
230  // NOTE: using swap and going upwards appears to be faster.
231  for (octave_idx_type p = l; p < start; p++)
232  std::swap (pivot, data[p]);
233  data[start] = pivot;
234  octave_idx_type ipivot = idx[start];
235  for (octave_idx_type p = l; p < start; p++)
236  std::swap (ipivot, idx[p]);
237  idx[start] = ipivot;
238  }
239 
240  return;
241 }
242 
243 /*
244 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
245 is required on entry. "A run" is the longest ascending sequence, with
246 
247  lo[0] <= lo[1] <= lo[2] <= ...
248 
249 or the longest descending sequence, with
250 
251  lo[0] > lo[1] > lo[2] > ...
252 
253 DESCENDING is set to false in the former case, or to true in the latter.
254 For its intended use in a stable mergesort, the strictness of the defn of
255 "descending" is needed so that the caller can safely reverse a descending
256 sequence without violating stability (strict > ensures there are no equal
257 elements to get out of order).
258 
259 Returns -1 in case of error.
260 */
261 template <typename T>
262 template <typename Comp>
264 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
265  Comp comp)
266 {
268  T *hi = lo + nel;
269 
270  descending = false;
271  ++lo;
272  if (lo == hi)
273  return 1;
274 
275  n = 2;
276 
277  if (comp (*lo, *(lo-1)))
278  {
279  descending = true;
280  for (lo = lo+1; lo < hi; ++lo, ++n)
281  {
282  if (comp (*lo, *(lo-1)))
283  ;
284  else
285  break;
286  }
287  }
288  else
289  {
290  for (lo = lo+1; lo < hi; ++lo, ++n)
291  {
292  if (comp (*lo, *(lo-1)))
293  break;
294  }
295  }
296 
297  return n;
298 }
299 
300 /*
301 Locate the proper position of key in a sorted vector; if the vector contains
302 an element equal to key, return the position immediately to the left of
303 the leftmost equal element. [gallop_right() does the same except returns
304 the position to the right of the rightmost equal element (if any).]
305 
306 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
307 
308 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
309 hint is to the final result, the faster this runs.
310 
311 The return value is the int k in 0..n such that
312 
313  a[k-1] < key <= a[k]
314 
315 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
316 key belongs at index k; or, IOW, the first k elements of a should precede
317 key, and the last n-k should follow key.
318 
319 Returns -1 on error. See listsort.txt for info on the method.
320 */
321 template <typename T>
322 template <typename Comp>
325  octave_idx_type hint,
326  Comp comp)
327 {
328  octave_idx_type ofs;
329  octave_idx_type lastofs;
330  octave_idx_type k;
331 
332  a += hint;
333  lastofs = 0;
334  ofs = 1;
335  if (comp (*a, key))
336  {
337  /* a[hint] < key -- gallop right, until
338  * a[hint + lastofs] < key <= a[hint + ofs]
339  */
340  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
341  while (ofs < maxofs)
342  {
343  if (comp (a[ofs], key))
344  {
345  lastofs = ofs;
346  ofs = (ofs << 1) + 1;
347  if (ofs <= 0) /* int overflow */
348  ofs = maxofs;
349  }
350  else /* key <= a[hint + ofs] */
351  break;
352  }
353  if (ofs > maxofs)
354  ofs = maxofs;
355  /* Translate back to offsets relative to &a[0]. */
356  lastofs += hint;
357  ofs += hint;
358  }
359  else
360  {
361  /* key <= a[hint] -- gallop left, until
362  * a[hint - ofs] < key <= a[hint - lastofs]
363  */
364  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
365  while (ofs < maxofs)
366  {
367  if (comp (*(a-ofs), key))
368  break;
369  /* key <= a[hint - ofs] */
370  lastofs = ofs;
371  ofs = (ofs << 1) + 1;
372  if (ofs <= 0) /* int overflow */
373  ofs = maxofs;
374  }
375  if (ofs > maxofs)
376  ofs = maxofs;
377  /* Translate back to positive offsets relative to &a[0]. */
378  k = lastofs;
379  lastofs = hint - ofs;
380  ofs = hint - k;
381  }
382  a -= hint;
383 
384  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
385  * right of lastofs but no farther right than ofs. Do a binary
386  * search, with invariant a[lastofs-1] < key <= a[ofs].
387  */
388  ++lastofs;
389  while (lastofs < ofs)
390  {
391  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
392 
393  if (comp (a[m], key))
394  lastofs = m+1; /* a[m] < key */
395  else
396  ofs = m; /* key <= a[m] */
397  }
398 
399  return ofs;
400 }
401 
402 /*
403 Exactly like gallop_left(), except that if key already exists in a[0:n],
404 finds the position immediately to the right of the rightmost equal value.
405 
406 The return value is the int k in 0..n such that
407 
408  a[k-1] <= key < a[k]
409 
410 or -1 if error.
411 
412 The code duplication is massive, but this is enough different given that
413 we're sticking to "<" comparisons that it's much harder to follow if
414 written as one routine with yet another "left or right?" flag.
415 */
416 template <typename T>
417 template <typename Comp>
420  octave_idx_type hint,
421  Comp comp)
422 {
423  octave_idx_type ofs;
424  octave_idx_type lastofs;
425  octave_idx_type k;
426 
427  a += hint;
428  lastofs = 0;
429  ofs = 1;
430  if (comp (key, *a))
431  {
432  /* key < a[hint] -- gallop left, until
433  * a[hint - ofs] <= key < a[hint - lastofs]
434  */
435  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
436  while (ofs < maxofs)
437  {
438  if (comp (key, *(a-ofs)))
439  {
440  lastofs = ofs;
441  ofs = (ofs << 1) + 1;
442  if (ofs <= 0) /* int overflow */
443  ofs = maxofs;
444  }
445  else /* a[hint - ofs] <= key */
446  break;
447  }
448  if (ofs > maxofs)
449  ofs = maxofs;
450  /* Translate back to positive offsets relative to &a[0]. */
451  k = lastofs;
452  lastofs = hint - ofs;
453  ofs = hint - k;
454  }
455  else
456  {
457  /* a[hint] <= key -- gallop right, until
458  * a[hint + lastofs] <= key < a[hint + ofs]
459  */
460  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
461  while (ofs < maxofs)
462  {
463  if (comp (key, a[ofs]))
464  break;
465  /* a[hint + ofs] <= key */
466  lastofs = ofs;
467  ofs = (ofs << 1) + 1;
468  if (ofs <= 0) /* int overflow */
469  ofs = maxofs;
470  }
471  if (ofs > maxofs)
472  ofs = maxofs;
473  /* Translate back to offsets relative to &a[0]. */
474  lastofs += hint;
475  ofs += hint;
476  }
477  a -= hint;
478 
479  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
480  * right of lastofs but no farther right than ofs. Do a binary
481  * search, with invariant a[lastofs-1] <= key < a[ofs].
482  */
483  ++lastofs;
484  while (lastofs < ofs)
485  {
486  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
487 
488  if (comp (key, a[m]))
489  ofs = m; /* key < a[m] */
490  else
491  lastofs = m+1; /* a[m] <= key */
492  }
493 
494  return ofs;
495 }
496 
497 static inline octave_idx_type
498 roundupsize (std::size_t n)
499 {
500  unsigned int nbits = 3;
501  std::size_t n2 = n >> 8;
502 
503  /* Round up:
504  * If n < 256, to a multiple of 8.
505  * If n < 2048, to a multiple of 64.
506  * If n < 16384, to a multiple of 512.
507  * If n < 131072, to a multiple of 4096.
508  * If n < 1048576, to a multiple of 32768.
509  * If n < 8388608, to a multiple of 262144.
510  * If n < 67108864, to a multiple of 2097152.
511  * If n < 536870912, to a multiple of 16777216.
512  * ...
513  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
514  *
515  * This over-allocates proportional to the list size, making room
516  * for additional growth. The over-allocation is mild, but is
517  * enough to give linear-time amortized behavior over a long
518  * sequence of appends() in the presence of a poorly-performing
519  * system realloc() (which is a reality, e.g., across all flavors
520  * of Windows, with Win9x behavior being particularly bad -- and
521  * we've still got address space fragmentation problems on Win9x
522  * even with this scheme, although it requires much longer lists to
523  * provoke them than it used to).
524  */
525  while (n2)
526  {
527  n2 >>= 3;
528  nbits += 3;
529  }
530 
531  std::size_t new_size = ((n >> nbits) + 1) << nbits;
532 
533  if (new_size == 0
534  || new_size
535  > static_cast<std::size_t> (std::numeric_limits<octave_idx_type>::max ()))
536  (*current_liboctave_error_handler)
537  ("unable to allocate sufficient memory for sort");
538 
539  return static_cast<octave_idx_type> (new_size);
540 }
541 
542 /* Ensure enough temp memory for 'need' array slots is available.
543  * Returns 0 on success and -1 if the memory can't be gotten.
544  */
545 template <typename T>
546 void
548 {
549  if (need <= m_alloced)
550  return;
551 
552  need = roundupsize (need);
553  /* Don't realloc! That can cost cycles to copy the old data, but
554  * we don't care what's in the block.
555  */
556  delete [] m_a;
557  delete [] m_ia; // Must do this or fool possible next getmemi.
558  m_a = new T [need];
559  m_alloced = need;
560 
561 }
562 
563 template <typename T>
564 void
566 {
567  if (m_ia && need <= m_alloced)
568  return;
569 
570  need = roundupsize (need);
571  /* Don't realloc! That can cost cycles to copy the old data, but
572  * we don't care what's in the block.
573  */
574  delete [] m_a;
575  delete [] m_ia;
576 
577  m_a = new T [need];
578  m_ia = new octave_idx_type [need];
579  m_alloced = need;
580 }
581 
582 /* Merge the na elements starting at pa with the nb elements starting at pb
583  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
584  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
585  * merge, and should have na <= nb. See listsort.txt for more info.
586  * Return 0 if successful, -1 if error.
587  */
588 template <typename T>
589 template <typename Comp>
590 int
592  T *pb, octave_idx_type nb,
593  Comp comp)
594 {
595  octave_idx_type k;
596  T *dest;
597  int result = -1; /* guilty until proved innocent */
598  octave_idx_type min_gallop = m_ms->m_min_gallop;
599 
600  m_ms->getmem (na);
601 
602  std::copy (pa, pa + na, m_ms->m_a);
603  dest = pa;
604  pa = m_ms->m_a;
605 
606  *dest++ = *pb++;
607  --nb;
608  if (nb == 0)
609  goto Succeed;
610  if (na == 1)
611  goto CopyB;
612 
613  for (;;)
614  {
615  octave_idx_type acount = 0; /* # of times A won in a row */
616  octave_idx_type bcount = 0; /* # of times B won in a row */
617 
618  /* Do the straightforward thing until (if ever) one run
619  * appears to win consistently.
620  */
621  for (;;)
622  {
623 
624  // FIXME: these loops are candidates for further optimizations.
625  // Rather than testing everything in each cycle, it may be more
626  // efficient to do it in hunks.
627  if (comp (*pb, *pa))
628  {
629  *dest++ = *pb++;
630  ++bcount;
631  acount = 0;
632  --nb;
633  if (nb == 0)
634  goto Succeed;
635  if (bcount >= min_gallop)
636  break;
637  }
638  else
639  {
640  *dest++ = *pa++;
641  ++acount;
642  bcount = 0;
643  --na;
644  if (na == 1)
645  goto CopyB;
646  if (acount >= min_gallop)
647  break;
648  }
649  }
650 
651  /* One run is winning so consistently that galloping may
652  * be a huge win. So try that, and continue galloping until
653  * (if ever) neither run appears to be winning consistently
654  * anymore.
655  */
656  ++min_gallop;
657  do
658  {
659  min_gallop -= (min_gallop > 1);
660  m_ms->m_min_gallop = min_gallop;
661  k = gallop_right (*pb, pa, na, 0, comp);
662  acount = k;
663  if (k)
664  {
665  if (k < 0)
666  goto Fail;
667  dest = std::copy (pa, pa + k, dest);
668  pa += k;
669  na -= k;
670  if (na == 1)
671  goto CopyB;
672  /* na==0 is impossible now if the comparison
673  * function is consistent, but we can't assume
674  * that it is.
675  */
676  if (na == 0)
677  goto Succeed;
678  }
679  *dest++ = *pb++;
680  --nb;
681  if (nb == 0)
682  goto Succeed;
683 
684  k = gallop_left (*pa, pb, nb, 0, comp);
685  bcount = k;
686  if (k)
687  {
688  if (k < 0)
689  goto Fail;
690  dest = std::copy (pb, pb + k, dest);
691  pb += k;
692  nb -= k;
693  if (nb == 0)
694  goto Succeed;
695  }
696  *dest++ = *pa++;
697  --na;
698  if (na == 1)
699  goto CopyB;
700  }
701  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
702 
703  ++min_gallop; /* penalize it for leaving galloping mode */
704  m_ms->m_min_gallop = min_gallop;
705  }
706 
707 Succeed:
708  result = 0;
709 
710 Fail:
711  if (na)
712  std::copy (pa, pa + na, dest);
713  return result;
714 
715 CopyB:
716  /* The last element of pa belongs at the end of the merge. */
717  std::copy (pb, pb + nb, dest);
718  dest[nb] = *pa;
719 
720  return 0;
721 }
722 
723 template <typename T>
724 template <typename Comp>
725 int
727  T *pb, octave_idx_type *ipb, octave_idx_type nb,
728  Comp comp)
729 {
730  octave_idx_type k;
731  T *dest;
732  octave_idx_type *idest;
733  int result = -1; /* guilty until proved innocent */
734  octave_idx_type min_gallop = m_ms->m_min_gallop;
735 
736  m_ms->getmemi (na);
737 
738  std::copy (pa, pa + na, m_ms->m_a);
739  std::copy (ipa, ipa + na, m_ms->m_ia);
740  dest = pa; idest = ipa;
741  pa = m_ms->m_a; ipa = m_ms->m_ia;
742 
743  *dest++ = *pb++; *idest++ = *ipb++;
744  --nb;
745  if (nb == 0)
746  goto Succeed;
747  if (na == 1)
748  goto CopyB;
749 
750  for (;;)
751  {
752  octave_idx_type acount = 0; /* # of times A won in a row */
753  octave_idx_type bcount = 0; /* # of times B won in a row */
754 
755  /* Do the straightforward thing until (if ever) one run
756  * appears to win consistently.
757  */
758  for (;;)
759  {
760 
761  if (comp (*pb, *pa))
762  {
763  *dest++ = *pb++; *idest++ = *ipb++;
764  ++bcount;
765  acount = 0;
766  --nb;
767  if (nb == 0)
768  goto Succeed;
769  if (bcount >= min_gallop)
770  break;
771  }
772  else
773  {
774  *dest++ = *pa++; *idest++ = *ipa++;
775  ++acount;
776  bcount = 0;
777  --na;
778  if (na == 1)
779  goto CopyB;
780  if (acount >= min_gallop)
781  break;
782  }
783  }
784 
785  /* One run is winning so consistently that galloping may
786  * be a huge win. So try that, and continue galloping until
787  * (if ever) neither run appears to be winning consistently
788  * anymore.
789  */
790  ++min_gallop;
791  do
792  {
793  min_gallop -= (min_gallop > 1);
794  m_ms->m_min_gallop = min_gallop;
795  k = gallop_right (*pb, pa, na, 0, comp);
796  acount = k;
797  if (k)
798  {
799  if (k < 0)
800  goto Fail;
801  dest = std::copy (pa, pa + k, dest);
802  idest = std::copy (ipa, ipa + k, idest);
803  pa += k; ipa += k;
804  na -= k;
805  if (na == 1)
806  goto CopyB;
807  /* na==0 is impossible now if the comparison
808  * function is consistent, but we can't assume
809  * that it is.
810  */
811  if (na == 0)
812  goto Succeed;
813  }
814  *dest++ = *pb++; *idest++ = *ipb++;
815  --nb;
816  if (nb == 0)
817  goto Succeed;
818 
819  k = gallop_left (*pa, pb, nb, 0, comp);
820  bcount = k;
821  if (k)
822  {
823  if (k < 0)
824  goto Fail;
825  dest = std::copy (pb, pb + k, dest);
826  idest = std::copy (ipb, ipb + k, idest);
827  pb += k; ipb += k;
828  nb -= k;
829  if (nb == 0)
830  goto Succeed;
831  }
832  *dest++ = *pa++; *idest++ = *ipa++;
833  --na;
834  if (na == 1)
835  goto CopyB;
836  }
837  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
838 
839  ++min_gallop; /* penalize it for leaving galloping mode */
840  m_ms->m_min_gallop = min_gallop;
841  }
842 
843 Succeed:
844  result = 0;
845 
846 Fail:
847  if (na)
848  {
849  std::copy (pa, pa + na, dest);
850  std::copy (ipa, ipa + na, idest);
851  }
852  return result;
853 
854 CopyB:
855  /* The last element of pa belongs at the end of the merge. */
856  std::copy (pb, pb + nb, dest);
857  std::copy (ipb, ipb + nb, idest);
858  dest[nb] = *pa;
859  idest[nb] = *ipa;
860 
861  return 0;
862 }
863 
864 /* Merge the na elements starting at pa with the nb elements starting at pb
865  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
866  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
867  * merge, and should have na >= nb. See listsort.txt for more info.
868  * Return 0 if successful, -1 if error.
869  */
870 template <typename T>
871 template <typename Comp>
872 int
874  T *pb, octave_idx_type nb,
875  Comp comp)
876 {
877  octave_idx_type k;
878  T *dest;
879  int result = -1; /* guilty until proved innocent */
880  T *basea, *baseb;
881  octave_idx_type min_gallop = m_ms->m_min_gallop;
882 
883  m_ms->getmem (nb);
884 
885  dest = pb + nb - 1;
886  std::copy (pb, pb + nb, m_ms->m_a);
887  basea = pa;
888  baseb = m_ms->m_a;
889  pb = m_ms->m_a + nb - 1;
890  pa += na - 1;
891 
892  *dest-- = *pa--;
893  --na;
894  if (na == 0)
895  goto Succeed;
896  if (nb == 1)
897  goto CopyA;
898 
899  for (;;)
900  {
901  octave_idx_type acount = 0; /* # of times A won in a row */
902  octave_idx_type bcount = 0; /* # of times B won in a row */
903 
904  /* Do the straightforward thing until (if ever) one run
905  * appears to win consistently.
906  */
907  for (;;)
908  {
909  if (comp (*pb, *pa))
910  {
911  *dest-- = *pa--;
912  ++acount;
913  bcount = 0;
914  --na;
915  if (na == 0)
916  goto Succeed;
917  if (acount >= min_gallop)
918  break;
919  }
920  else
921  {
922  *dest-- = *pb--;
923  ++bcount;
924  acount = 0;
925  --nb;
926  if (nb == 1)
927  goto CopyA;
928  if (bcount >= min_gallop)
929  break;
930  }
931  }
932 
933  /* One run is winning so consistently that galloping may
934  * be a huge win. So try that, and continue galloping until
935  * (if ever) neither run appears to be winning consistently
936  * anymore.
937  */
938  ++min_gallop;
939  do
940  {
941  min_gallop -= (min_gallop > 1);
942  m_ms->m_min_gallop = min_gallop;
943  k = gallop_right (*pb, basea, na, na-1, comp);
944  if (k < 0)
945  goto Fail;
946  k = na - k;
947  acount = k;
948  if (k)
949  {
950  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
951  pa -= k;
952  na -= k;
953  if (na == 0)
954  goto Succeed;
955  }
956  *dest-- = *pb--;
957  --nb;
958  if (nb == 1)
959  goto CopyA;
960 
961  k = gallop_left (*pa, baseb, nb, nb-1, comp);
962  if (k < 0)
963  goto Fail;
964  k = nb - k;
965  bcount = k;
966  if (k)
967  {
968  dest -= k;
969  pb -= k;
970  std::copy (pb+1, pb+1 + k, dest+1);
971  nb -= k;
972  if (nb == 1)
973  goto CopyA;
974  /* nb==0 is impossible now if the comparison
975  * function is consistent, but we can't assume
976  * that it is.
977  */
978  if (nb == 0)
979  goto Succeed;
980  }
981  *dest-- = *pa--;
982  --na;
983  if (na == 0)
984  goto Succeed;
985  }
986  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
987  ++min_gallop; /* penalize it for leaving galloping mode */
988  m_ms->m_min_gallop = min_gallop;
989  }
990 
991 Succeed:
992  result = 0;
993 
994 Fail:
995  if (nb)
996  std::copy (baseb, baseb + nb, dest-(nb-1));
997  return result;
998 
999 CopyA:
1000  /* The first element of pb belongs at the front of the merge. */
1001  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1002  pa -= na;
1003  *dest = *pb;
1004 
1005  return 0;
1006 }
1007 
1008 template <typename T>
1009 template <typename Comp>
1010 int
1012  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1013  Comp comp)
1014 {
1015  octave_idx_type k;
1016  T *dest;
1017  octave_idx_type *idest;
1018  int result = -1; /* guilty until proved innocent */
1019  T *basea, *baseb;
1020  octave_idx_type *ibaseb;
1021  octave_idx_type min_gallop = m_ms->m_min_gallop;
1022 
1023  m_ms->getmemi (nb);
1024 
1025  dest = pb + nb - 1;
1026  idest = ipb + nb - 1;
1027  std::copy (pb, pb + nb, m_ms->m_a);
1028  std::copy (ipb, ipb + nb, m_ms->m_ia);
1029  basea = pa;
1030  baseb = m_ms->m_a; ibaseb = m_ms->m_ia;
1031  pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1;
1032  pa += na - 1; ipa += na - 1;
1033 
1034  *dest-- = *pa--; *idest-- = *ipa--;
1035  --na;
1036  if (na == 0)
1037  goto Succeed;
1038  if (nb == 1)
1039  goto CopyA;
1040 
1041  for (;;)
1042  {
1043  octave_idx_type acount = 0; /* # of times A won in a row */
1044  octave_idx_type bcount = 0; /* # of times B won in a row */
1045 
1046  /* Do the straightforward thing until (if ever) one run
1047  * appears to win consistently.
1048  */
1049  for (;;)
1050  {
1051  if (comp (*pb, *pa))
1052  {
1053  *dest-- = *pa--; *idest-- = *ipa--;
1054  ++acount;
1055  bcount = 0;
1056  --na;
1057  if (na == 0)
1058  goto Succeed;
1059  if (acount >= min_gallop)
1060  break;
1061  }
1062  else
1063  {
1064  *dest-- = *pb--; *idest-- = *ipb--;
1065  ++bcount;
1066  acount = 0;
1067  --nb;
1068  if (nb == 1)
1069  goto CopyA;
1070  if (bcount >= min_gallop)
1071  break;
1072  }
1073  }
1074 
1075  /* One run is winning so consistently that galloping may
1076  * be a huge win. So try that, and continue galloping until
1077  * (if ever) neither run appears to be winning consistently
1078  * anymore.
1079  */
1080  ++min_gallop;
1081  do
1082  {
1083  min_gallop -= (min_gallop > 1);
1084  m_ms->m_min_gallop = min_gallop;
1085  k = gallop_right (*pb, basea, na, na-1, comp);
1086  if (k < 0)
1087  goto Fail;
1088  k = na - k;
1089  acount = k;
1090  if (k)
1091  {
1092  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1093  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1094  pa -= k; ipa -= k;
1095  na -= k;
1096  if (na == 0)
1097  goto Succeed;
1098  }
1099  *dest-- = *pb--; *idest-- = *ipb--;
1100  --nb;
1101  if (nb == 1)
1102  goto CopyA;
1103 
1104  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1105  if (k < 0)
1106  goto Fail;
1107  k = nb - k;
1108  bcount = k;
1109  if (k)
1110  {
1111  dest -= k; idest -= k;
1112  pb -= k; ipb -= k;
1113  std::copy (pb+1, pb+1 + k, dest+1);
1114  std::copy (ipb+1, ipb+1 + k, idest+1);
1115  nb -= k;
1116  if (nb == 1)
1117  goto CopyA;
1118  /* nb==0 is impossible now if the comparison
1119  * function is consistent, but we can't assume
1120  * that it is.
1121  */
1122  if (nb == 0)
1123  goto Succeed;
1124  }
1125  *dest-- = *pa--; *idest-- = *ipa--;
1126  --na;
1127  if (na == 0)
1128  goto Succeed;
1129  }
1130  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1131  ++min_gallop; /* penalize it for leaving galloping mode */
1132  m_ms->m_min_gallop = min_gallop;
1133  }
1134 
1135 Succeed:
1136  result = 0;
1137 
1138 Fail:
1139  if (nb)
1140  {
1141  std::copy (baseb, baseb + nb, dest-(nb-1));
1142  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1143  }
1144  return result;
1145 
1146 CopyA:
1147  /* The first element of pb belongs at the front of the merge. */
1148  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1149  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1150  pa -= na; ipa -= na;
1151  *dest = *pb; *idest = *ipb;
1152 
1153  return 0;
1154 }
1155 
1156 /* Merge the two runs at stack indices i and i+1.
1157  * Returns 0 on success, -1 on error.
1158  */
1159 template <typename T>
1160 template <typename Comp>
1161 int
1163  Comp comp)
1164 {
1165  T *pa, *pb;
1166  octave_idx_type na, nb;
1167  octave_idx_type k;
1168 
1169  pa = data + m_ms->m_pending[i].m_base;
1170  na = m_ms->m_pending[i].m_len;
1171  pb = data + m_ms->m_pending[i+1].m_base;
1172  nb = m_ms->m_pending[i+1].m_len;
1173 
1174  /* Record the length of the combined runs; if i is the 3rd-last
1175  * run now, also slide over the last run (which isn't involved
1176  * in this merge). The current run i+1 goes away in any case.
1177  */
1178  m_ms->m_pending[i].m_len = na + nb;
1179  if (i == m_ms->m_n - 3)
1180  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1181  m_ms->m_n--;
1182 
1183  /* Where does b start in a? Elements in a before that can be
1184  * ignored (already in place).
1185  */
1186  k = gallop_right (*pb, pa, na, 0, comp);
1187  if (k < 0)
1188  return -1;
1189  pa += k;
1190  na -= k;
1191  if (na == 0)
1192  return 0;
1193 
1194  /* Where does a end in b? Elements in b after that can be
1195  * ignored (already in place).
1196  */
1197  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1198  if (nb <= 0)
1199  return nb;
1200 
1201  /* Merge what remains of the runs, using a temp array with
1202  * min (na, nb) elements.
1203  */
1204  if (na <= nb)
1205  return merge_lo (pa, na, pb, nb, comp);
1206  else
1207  return merge_hi (pa, na, pb, nb, comp);
1208 }
1209 
1210 template <typename T>
1211 template <typename Comp>
1212 int
1214  Comp comp)
1215 {
1216  T *pa, *pb;
1217  octave_idx_type *ipa, *ipb;
1218  octave_idx_type na, nb;
1219  octave_idx_type k;
1220 
1221  pa = data + m_ms->m_pending[i].m_base;
1222  ipa = idx + m_ms->m_pending[i].m_base;
1223  na = m_ms->m_pending[i].m_len;
1224  pb = data + m_ms->m_pending[i+1].m_base;
1225  ipb = idx + m_ms->m_pending[i+1].m_base;
1226  nb = m_ms->m_pending[i+1].m_len;
1227 
1228  /* Record the length of the combined runs; if i is the 3rd-last
1229  * run now, also slide over the last run (which isn't involved
1230  * in this merge). The current run i+1 goes away in any case.
1231  */
1232  m_ms->m_pending[i].m_len = na + nb;
1233  if (i == m_ms->m_n - 3)
1234  m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
1235  m_ms->m_n--;
1236 
1237  /* Where does b start in a? Elements in a before that can be
1238  * ignored (already in place).
1239  */
1240  k = gallop_right (*pb, pa, na, 0, comp);
1241  if (k < 0)
1242  return -1;
1243  pa += k; ipa += k;
1244  na -= k;
1245  if (na == 0)
1246  return 0;
1247 
1248  /* Where does a end in b? Elements in b after that can be
1249  * ignored (already in place).
1250  */
1251  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1252  if (nb <= 0)
1253  return nb;
1254 
1255  /* Merge what remains of the runs, using a temp array with
1256  * min (na, nb) elements.
1257  */
1258  if (na <= nb)
1259  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1260  else
1261  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1262 }
1263 
1264 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1265  * until the stack invariants are re-established:
1266  *
1267  * 1. len[-3] > len[-2] + len[-1]
1268  * 2. len[-2] > len[-1]
1269  *
1270  * See listsort.txt for more info.
1271  *
1272  * Returns 0 on success, -1 on error.
1273  */
1274 template <typename T>
1275 template <typename Comp>
1276 int
1277 octave_sort<T>::merge_collapse (T *data, Comp comp)
1278 {
1279  struct s_slice *p = m_ms->m_pending;
1280 
1281  while (m_ms->m_n > 1)
1282  {
1283  octave_idx_type n = m_ms->m_n - 2;
1284  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1285  {
1286  if (p[n-1].m_len < p[n+1].m_len)
1287  --n;
1288  if (merge_at (n, data, comp) < 0)
1289  return -1;
1290  }
1291  else if (p[n].m_len <= p[n+1].m_len)
1292  {
1293  if (merge_at (n, data, comp) < 0)
1294  return -1;
1295  }
1296  else
1297  break;
1298  }
1299 
1300  return 0;
1301 }
1302 
1303 template <typename T>
1304 template <typename Comp>
1305 int
1306 octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp)
1307 {
1308  struct s_slice *p = m_ms->m_pending;
1309 
1310  while (m_ms->m_n > 1)
1311  {
1312  octave_idx_type n = m_ms->m_n - 2;
1313  if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
1314  {
1315  if (p[n-1].m_len < p[n+1].m_len)
1316  --n;
1317  if (merge_at (n, data, idx, comp) < 0)
1318  return -1;
1319  }
1320  else if (p[n].m_len <= p[n+1].m_len)
1321  {
1322  if (merge_at (n, data, idx, comp) < 0)
1323  return -1;
1324  }
1325  else
1326  break;
1327  }
1328 
1329  return 0;
1330 }
1331 
1332 /* Regardless of invariants, merge all runs on the stack until only one
1333  * remains. This is used at the end of the mergesort.
1334  *
1335  * Returns 0 on success, -1 on error.
1336  */
1337 template <typename T>
1338 template <typename Comp>
1339 int
1340 octave_sort<T>::merge_force_collapse (T *data, Comp comp)
1341 {
1342  struct s_slice *p = m_ms->m_pending;
1343 
1344  while (m_ms->m_n > 1)
1345  {
1346  octave_idx_type n = m_ms->m_n - 2;
1347  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1348  --n;
1349  if (merge_at (n, data, comp) < 0)
1350  return -1;
1351  }
1352 
1353  return 0;
1354 }
1355 
1356 template <typename T>
1357 template <typename Comp>
1358 int
1359 octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp)
1360 {
1361  struct s_slice *p = m_ms->m_pending;
1362 
1363  while (m_ms->m_n > 1)
1364  {
1365  octave_idx_type n = m_ms->m_n - 2;
1366  if (n > 0 && p[n-1].m_len < p[n+1].m_len)
1367  --n;
1368  if (merge_at (n, data, idx, comp) < 0)
1369  return -1;
1370  }
1371 
1372  return 0;
1373 }
1374 
1375 /* Compute a good value for the minimum run length; natural runs shorter
1376  * than this are boosted artificially via binary insertion.
1377  *
1378  * If n < 64, return n (it's too small to bother with fancy stuff).
1379  * Else if n is an exact power of 2, return 32.
1380  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1381  * strictly less than, an exact power of 2.
1382  *
1383  * See listsort.txt for more info.
1384  */
1385 template <typename T>
1388 {
1389  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1390 
1391  while (n >= 64)
1392  {
1393  r |= n & 1;
1394  n >>= 1;
1395  }
1396 
1397  return n + r;
1398 }
1399 
1400 template <typename T>
1401 template <typename Comp>
1402 void
1403 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1404 {
1405  /* Re-initialize the Mergestate as this might be the second time called */
1406  if (! m_ms) m_ms = new MergeState;
1407 
1408  m_ms->reset ();
1409  m_ms->getmem (MERGESTATE_TEMP_SIZE);
1410 
1411  if (nel > 1)
1412  {
1413  octave_idx_type nremaining = nel;
1414  octave_idx_type lo = 0;
1415 
1416  /* March over the array once, left to right, finding natural runs,
1417  * and extending short natural runs to minrun elements.
1418  */
1419  octave_idx_type minrun = merge_compute_minrun (nremaining);
1420  do
1421  {
1422  bool descending;
1424 
1425  /* Identify next run. */
1426  n = count_run (data + lo, nremaining, descending, comp);
1427  if (n < 0)
1428  return;
1429  if (descending)
1430  std::reverse (data + lo, data + lo + n);
1431  /* If short, extend to min (minrun, nremaining). */
1432  if (n < minrun)
1433  {
1434  const octave_idx_type force = (nremaining <= minrun ? nremaining
1435  : minrun);
1436  binarysort (data + lo, force, n, comp);
1437  n = force;
1438  }
1439  /* Push run onto m_pending-runs stack, and maybe merge. */
1440  assert (m_ms->m_n < MAX_MERGE_PENDING);
1441  m_ms->m_pending[m_ms->m_n].m_base = lo;
1442  m_ms->m_pending[m_ms->m_n].m_len = n;
1443  m_ms->m_n++;
1444  if (merge_collapse (data, comp) < 0)
1445  return;
1446  /* Advance to find next run. */
1447  lo += n;
1448  nremaining -= n;
1449  }
1450  while (nremaining);
1451 
1452  merge_force_collapse (data, comp);
1453  }
1454 }
1455 
1456 template <typename T>
1457 template <typename Comp>
1458 void
1460  Comp comp)
1461 {
1462  /* Re-initialize the Mergestate as this might be the second time called */
1463  if (! m_ms) m_ms = new MergeState;
1464 
1465  m_ms->reset ();
1466  m_ms->getmemi (MERGESTATE_TEMP_SIZE);
1467 
1468  if (nel > 1)
1469  {
1470  octave_idx_type nremaining = nel;
1471  octave_idx_type lo = 0;
1472 
1473  /* March over the array once, left to right, finding natural runs,
1474  * and extending short natural runs to minrun elements.
1475  */
1476  octave_idx_type minrun = merge_compute_minrun (nremaining);
1477  do
1478  {
1479  bool descending;
1481 
1482  /* Identify next run. */
1483  n = count_run (data + lo, nremaining, descending, comp);
1484  if (n < 0)
1485  return;
1486  if (descending)
1487  {
1488  std::reverse (data + lo, data + lo + n);
1489  std::reverse (idx + lo, idx + lo + n);
1490  }
1491  /* If short, extend to min (minrun, nremaining). */
1492  if (n < minrun)
1493  {
1494  const octave_idx_type force = (nremaining <= minrun ? nremaining
1495  : minrun);
1496  binarysort (data + lo, idx + lo, force, n, comp);
1497  n = force;
1498  }
1499  /* Push run onto m_pending-runs stack, and maybe merge. */
1500  assert (m_ms->m_n < MAX_MERGE_PENDING);
1501  m_ms->m_pending[m_ms->m_n].m_base = lo;
1502  m_ms->m_pending[m_ms->m_n].m_len = n;
1503  m_ms->m_n++;
1504  if (merge_collapse (data, idx, comp) < 0)
1505  return;
1506  /* Advance to find next run. */
1507  lo += n;
1508  nremaining -= n;
1509  }
1510  while (nremaining);
1511 
1512  merge_force_collapse (data, idx, comp);
1513  }
1514 }
1515 
1516 template <typename T>
1517 using compare_fcn_ptr = bool (*) (typename ref_param<T>::type,
1518  typename ref_param<T>::type);
1519 
1520 template <typename T>
1521 void
1523 {
1524 #if defined (INLINE_ASCENDING_SORT)
1525  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1526  sort (data, nel, std::less<T> ());
1527  else
1528 #endif
1529 #if defined (INLINE_DESCENDING_SORT)
1530  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1531  sort (data, nel, std::greater<T> ());
1532  else
1533 #endif
1534  if (m_compare)
1535  sort (data, nel, m_compare);
1536 }
1537 
1538 template <typename T>
1539 void
1541 {
1542 #if defined (INLINE_ASCENDING_SORT)
1543  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1544  sort (data, idx, nel, std::less<T> ());
1545  else
1546 #endif
1547 #if defined (INLINE_DESCENDING_SORT)
1548  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1549  sort (data, idx, nel, std::greater<T> ());
1550  else
1551 #endif
1552  if (m_compare)
1553  sort (data, idx, nel, m_compare);
1554 }
1555 
1556 template <typename T>
1557 template <typename Comp>
1558 bool
1559 octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp)
1560 {
1561  const T *end = data + nel;
1562  if (data != end)
1563  {
1564  const T *next = data;
1565  while (++next != end)
1566  {
1567  if (comp (*next, *data))
1568  break;
1569  data = next;
1570  }
1571  data = next;
1572  }
1573 
1574  return data == end;
1575 }
1576 
1577 template <typename T>
1578 bool
1580 {
1581  bool retval = false;
1582 #if defined (INLINE_ASCENDING_SORT)
1583  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1584  retval = issorted (data, nel, std::less<T> ());
1585  else
1586 #endif
1587 #if defined (INLINE_DESCENDING_SORT)
1588  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1589  retval = issorted (data, nel, std::greater<T> ());
1590  else
1591 #endif
1592  if (m_compare)
1593  retval = issorted (data, nel, m_compare);
1594 
1595  return retval;
1596 }
1597 
1598 struct sortrows_run_t
1599 {
1600 public:
1601  sortrows_run_t (octave_idx_type c, octave_idx_type o, octave_idx_type n)
1602  : col (c), ofs (o), nel (n) { }
1603  //--------
1604  octave_idx_type col, ofs, nel;
1605 };
1606 
1607 template <typename T>
1608 template <typename Comp>
1609 void
1610 octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,
1611  octave_idx_type rows, octave_idx_type cols,
1612  Comp comp)
1613 {
1614  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1615  for (octave_idx_type i = 0; i < rows; i++)
1616  idx[i] = i;
1617 
1618  if (cols == 0 || rows <= 1)
1619  return;
1620 
1621  // This is a breadth-first traversal.
1622  typedef sortrows_run_t run_t;
1623  std::stack<run_t> runs;
1624 
1625  runs.push (run_t (0, 0, rows));
1626 
1627  while (! runs.empty ())
1628  {
1629  octave_idx_type col = runs.top ().col;
1630  octave_idx_type ofs = runs.top ().ofs;
1631  octave_idx_type nel = runs.top ().nel;
1632  runs.pop ();
1633  assert (nel > 1);
1634 
1635  T *lbuf = buf + ofs;
1636  const T *ldata = data + rows*col;
1637  octave_idx_type *lidx = idx + ofs;
1638 
1639  // Gather.
1640  for (octave_idx_type i = 0; i < nel; i++)
1641  lbuf[i] = ldata[lidx[i]];
1642 
1643  // Sort.
1644  sort (lbuf, lidx, nel, comp);
1645 
1646  // Identify constant runs and schedule subsorts.
1647  if (col < cols-1)
1648  {
1649  octave_idx_type lst = 0;
1650  for (octave_idx_type i = 0; i < nel; i++)
1651  {
1652  if (comp (lbuf[lst], lbuf[i]))
1653  {
1654  if (i > lst + 1)
1655  runs.push (run_t (col+1, ofs + lst, i - lst));
1656  lst = i;
1657  }
1658  }
1659  if (nel > lst + 1)
1660  runs.push (run_t (col+1, ofs + lst, nel - lst));
1661  }
1662  }
1663 }
1664 
1665 template <typename T>
1666 void
1668  octave_idx_type rows, octave_idx_type cols)
1669 {
1670 #if defined (INLINE_ASCENDING_SORT)
1671  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1672  sort_rows (data, idx, rows, cols, std::less<T> ());
1673  else
1674 #endif
1675 #if defined (INLINE_DESCENDING_SORT)
1676  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1677  sort_rows (data, idx, rows, cols, std::greater<T> ());
1678  else
1679 #endif
1680  if (m_compare)
1681  sort_rows (data, idx, rows, cols, m_compare);
1682 }
1683 
1684 template <typename T>
1685 template <typename Comp>
1686 bool
1687 octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,
1688  octave_idx_type cols, Comp comp)
1689 {
1690  if (rows <= 1 || cols == 0)
1691  return true;
1692 
1693  // This is a breadth-first traversal.
1694  const T *lastrow = data + rows*(cols - 1);
1695  typedef std::pair<const T *, octave_idx_type> run_t;
1696  std::stack<run_t> runs;
1697 
1698  bool sorted = true;
1699  runs.push (run_t (data, rows));
1700  while (sorted && ! runs.empty ())
1701  {
1702  const T *lo = runs.top ().first;
1703  octave_idx_type n = runs.top ().second;
1704  runs.pop ();
1705  if (lo < lastrow)
1706  {
1707  // Not the final column.
1708  assert (n > 1);
1709  const T *hi = lo + n;
1710  const T *lst = lo;
1711  for (lo++; lo < hi; lo++)
1712  {
1713  if (comp (*lst, *lo))
1714  {
1715  if (lo > lst + 1)
1716  runs.push (run_t (lst + rows, lo - lst));
1717  lst = lo;
1718  }
1719  else if (comp (*lo, *lst))
1720  break;
1721 
1722  }
1723  if (lo == hi)
1724  {
1725  if (lo > lst + 1)
1726  runs.push (run_t (lst + rows, lo - lst));
1727  }
1728  else
1729  {
1730  sorted = false;
1731  break;
1732  }
1733  }
1734  else
1735  // The final column - use fast code.
1736  sorted = issorted (lo, n, comp);
1737  }
1738 
1739  return sorted;
1740 }
1741 
1742 template <typename T>
1743 bool
1745  octave_idx_type cols)
1746 {
1747  bool retval = false;
1748 #if defined (INLINE_ASCENDING_SORT)
1749  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1750  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1751  else
1752 #endif
1753 #if defined (INLINE_DESCENDING_SORT)
1754  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1755  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1756  else
1757 #endif
1758  if (m_compare)
1759  retval = is_sorted_rows (data, rows, cols, m_compare);
1760 
1761  return retval;
1762 }
1763 
1764 // The simple binary lookup.
1765 
1766 template <typename T>
1767 template <typename Comp>
1769 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
1770  const T& value, Comp comp)
1771 {
1772  octave_idx_type lo = 0;
1773  octave_idx_type hi = nel;
1774 
1775  while (lo < hi)
1776  {
1777  octave_idx_type mid = lo + ((hi-lo) >> 1);
1778  if (comp (value, data[mid]))
1779  hi = mid;
1780  else
1781  lo = mid + 1;
1782  }
1783 
1784  return lo;
1785 }
1786 
1787 template <typename T>
1790  const T& value)
1791 {
1792  octave_idx_type retval = 0;
1793 #if defined (INLINE_ASCENDING_SORT)
1794  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1795  retval = lookup (data, nel, value, std::less<T> ());
1796  else
1797 #endif
1798 #if defined (INLINE_DESCENDING_SORT)
1799  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1800  retval = lookup (data, nel, value, std::greater<T> ());
1801  else
1802 #endif
1803  if (m_compare)
1804  retval = lookup (data, nel, value, m_compare);
1805 
1806  return retval;
1807 }
1808 
1809 template <typename T>
1810 template <typename Comp>
1811 void
1812 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
1813  const T *values, octave_idx_type nvalues,
1814  octave_idx_type *idx, Comp comp)
1815 {
1816  // Use a sequence of binary lookups.
1817  // FIXME: Can this be sped up generally? The sorted merge case is dealt with
1818  // elsewhere.
1819  for (octave_idx_type j = 0; j < nvalues; j++)
1820  idx[j] = lookup (data, nel, values[j], comp);
1821 }
1822 
1823 template <typename T>
1824 void
1826  const T *values, octave_idx_type nvalues,
1827  octave_idx_type *idx)
1828 {
1829 #if defined (INLINE_ASCENDING_SORT)
1830  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1831  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1832  else
1833 #endif
1834 #if defined (INLINE_DESCENDING_SORT)
1835  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1836  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1837  else
1838 #endif
1839  if (m_compare)
1840  lookup (data, nel, values, nvalues, idx, m_compare);
1841 }
1842 
1843 template <typename T>
1844 template <typename Comp>
1845 void
1847  const T *values, octave_idx_type nvalues,
1848  octave_idx_type *idx, bool rev, Comp comp)
1849 {
1850  if (rev)
1851  {
1852  octave_idx_type i = 0;
1853  octave_idx_type j = nvalues - 1;
1854 
1855  if (nvalues > 0 && nel > 0)
1856  {
1857  while (true)
1858  {
1859  if (comp (values[j], data[i]))
1860  {
1861  idx[j] = i;
1862  if (--j < 0)
1863  break;
1864  }
1865  else if (++i == nel)
1866  break;
1867  }
1868  }
1869 
1870  for (; j >= 0; j--)
1871  idx[j] = i;
1872  }
1873  else
1874  {
1875  octave_idx_type i = 0;
1876  octave_idx_type j = 0;
1877 
1878  if (nvalues > 0 && nel > 0)
1879  {
1880  while (true)
1881  {
1882  if (comp (values[j], data[i]))
1883  {
1884  idx[j] = i;
1885  if (++j == nvalues)
1886  break;
1887  }
1888  else if (++i == nel)
1889  break;
1890  }
1891  }
1892 
1893  for (; j != nvalues; j++)
1894  idx[j] = i;
1895  }
1896 }
1897 
1898 template <typename T>
1899 void
1901  const T *values, octave_idx_type nvalues,
1902  octave_idx_type *idx, bool rev)
1903 {
1904 #if defined (INLINE_ASCENDING_SORT)
1905  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1906  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1907  else
1908 #endif
1909 #if defined (INLINE_DESCENDING_SORT)
1910  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1911  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1912  else
1913 #endif
1914  if (m_compare)
1915  lookup_sorted (data, nel, values, nvalues, idx, rev, m_compare);
1916 }
1917 
1918 template <typename T>
1919 template <typename Comp>
1920 void
1923  Comp comp)
1924 {
1925  // Simply wrap the STL algorithms.
1926  // FIXME: this will fail if we attempt to inline <,> for Complex.
1927  if (up == lo+1)
1928  std::nth_element (data, data + lo, data + nel, comp);
1929  else if (lo == 0)
1930  std::partial_sort (data, data + up, data + nel, comp);
1931  else
1932  {
1933  std::nth_element (data, data + lo, data + nel, comp);
1934  if (up == lo + 2)
1935  {
1936  // Finding two subsequent elements.
1937  std::swap (data[lo+1],
1938  *std::min_element (data + lo + 1, data + nel, comp));
1939  }
1940  else
1941  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1942  }
1943 }
1944 
1945 template <typename T>
1946 void
1949 {
1950  if (up < 0)
1951  up = lo + 1;
1952 
1953 #if defined (INLINE_ASCENDING_SORT)
1954  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
1955  nth_element (data, nel, lo, up, std::less<T> ());
1956  else
1957 #endif
1958 #if defined (INLINE_DESCENDING_SORT)
1959  if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
1960  nth_element (data, nel, lo, up, std::greater<T> ());
1961  else
1962 #endif
1963  if (m_compare)
1964  nth_element (data, nel, lo, up, m_compare);
1965 }
1966 
1967 template <typename T>
1968 bool
1970  typename ref_param<T>::type y)
1971 {
1972  return x < y;
1973 }
1974 
1975 template <typename T>
1976 bool
1978  typename ref_param<T>::type y)
1979 {
1980  return x > y;
1981 }
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
void set_compare(const compare_fcn_type &comp)
Definition: oct-sort.h:117
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1667
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1977
bool issorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1579
std::function< bool(typename ref_param< T >::type, typename ref_param< T >::type)> compare_fcn_type
Definition: oct-sort.h:107
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1969
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1789
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1522
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1947
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1744
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1900
if_then_else< is_class_type< T >::no, T, T const & >::result type
Definition: lo-traits.h:121
F77_RET_T const F77_DBLE * x
T octave_idx_type m
Definition: mx-inlines.cc:781
octave_idx_type n
Definition: mx-inlines.cc:761
T * r
Definition: mx-inlines.cc:781
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
bool(*)(typename ref_param< T >::type, typename ref_param< T >::type) compare_fcn_ptr
Definition: oct-sort.cc:1518
sortmode
Definition: oct-sort.h:97
@ ASCENDING
Definition: oct-sort.h:97
@ DESCENDING
Definition: oct-sort.h:97