GNU Octave  4.0.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
oct-sort.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2003-2015 David Bateman
4 Copyright (C) 2008-2009 Jaroslav Hajek
5 Copyright (C) 2009-2010 VZLU Prague
6 
7 This file is part of Octave.
8 
9 Octave is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 Octave is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Octave; see the file COPYING. If not, see
21 <http://www.gnu.org/licenses/>.
22 
23 Code stolen in large part from Python's, listobject.c, which itself had
24 no license header. However, thanks to Tim Peters for the parts of the
25 code I ripped-off.
26 
27 As required in the Python license the short description of the changes
28 made are
29 
30 * convert the sorting code in listobject.cc into a generic class,
31  replacing PyObject* with the type of the class T.
32 
33 * replaced usages of malloc, free, memcpy and memmove by standard C++
34  new [], delete [] and std::copy and std::copy_backward. Note that replacing
35  memmove by std::copy is possible if the destination starts before the source.
36  If not, std::copy_backward needs to be used.
37 
38 * templatize comparison operator in most methods, provide possible dispatch
39 
40 * duplicate methods to avoid by-the-way indexed sorting
41 
42 * add methods for verifying sortedness of array
43 
44 * row sorting via breadth-first tree subsorting
45 
46 * binary lookup and sequential binary lookup optimized for dense downsampling.
47 
48 * NOTE: the memory management routines rely on the fact that delete [] silently
49  ignores null pointers. Don't gripe about the missing checks - they're there.
50 
51 
52 The Python license is
53 
54  PSF LICENSE AGREEMENT FOR PYTHON 2.3
55  --------------------------------------
56 
57  1. This LICENSE AGREEMENT is between the Python Software Foundation
58  ("PSF"), and the Individual or Organization ("Licensee") accessing and
59  otherwise using Python 2.3 software in source or binary form and its
60  associated documentation.
61 
62  2. Subject to the terms and conditions of this License Agreement, PSF
63  hereby grants Licensee a nonexclusive, royalty-free, world-wide
64  license to reproduce, analyze, test, perform and/or display publicly,
65  prepare derivative works, distribute, and otherwise use Python 2.3
66  alone or in any derivative version, provided, however, that PSF's
67  License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
68  2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
69  retained in Python 2.3 alone or in any derivative version prepared by
70  Licensee.
71 
72  3. In the event Licensee prepares a derivative work that is based on
73  or incorporates Python 2.3 or any part thereof, and wants to make
74  the derivative work available to others as provided herein, then
75  Licensee hereby agrees to include in any such work a brief summary of
76  the changes made to Python 2.3.
77 
78  4. PSF is making Python 2.3 available to Licensee on an "AS IS"
79  basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
80  IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
81  DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
82  FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
83  INFRINGE ANY THIRD PARTY RIGHTS.
84 
85  5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
86  2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
87  A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
88  OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
89 
90  6. This License Agreement will automatically terminate upon a material
91  breach of its terms and conditions.
92 
93  7. Nothing in this License Agreement shall be deemed to create any
94  relationship of agency, partnership, or joint venture between PSF and
95  Licensee. This License Agreement does not grant permission to use PSF
96  trademarks or trade name in a trademark sense to endorse or promote
97  products or services of Licensee, or any third party.
98 
99  8. By copying, installing or otherwise using Python 2.3, Licensee
100  agrees to be bound by the terms and conditions of this License
101  Agreement.
102 */
103 
104 #ifdef HAVE_CONFIG_H
105 #include <config.h>
106 #endif
107 
108 #include <cassert>
109 #include <algorithm>
110 #include <functional>
111 #include <cstring>
112 #include <stack>
113 
114 #include "lo-mappers.h"
115 #include "quit.h"
116 #include "oct-sort.h"
117 #include "oct-locbuf.h"
118 
119 template <class T>
121  compare (ascending_compare), ms (0)
122 {
123 }
124 
125 template <class T>
126 octave_sort<T>::octave_sort (compare_fcn_type comp)
127  : compare (comp), ms (0)
128 {
129 }
130 
131 template <class T>
133 {
134  delete ms;
135 }
136 
137 template <class T>
138 void
140 {
141  if (mode == ASCENDING)
142  compare = ascending_compare;
143  else if (mode == DESCENDING)
144  compare = descending_compare;
145  else
146  compare = 0;
147 }
148 
149 template <class T>
150 template <class Comp>
151 void
153  octave_idx_type start, Comp comp)
154 {
155  if (start == 0)
156  ++start;
157 
158  for (; start < nel; ++start)
159  {
160  /* set l to where *start belongs */
161  octave_idx_type l = 0;
162  octave_idx_type r = start;
163  T pivot = data[start];
164  /* Invariants:
165  * pivot >= all in [lo, l).
166  * pivot < all in [r, start).
167  * The second is vacuously true at the start.
168  */
169  do
170  {
171  octave_idx_type p = l + ((r - l) >> 1);
172  if (comp (pivot, data[p]))
173  r = p;
174  else
175  l = p+1;
176  }
177  while (l < r);
178  /* The invariants still hold, so pivot >= all in [lo, l) and
179  pivot < all in [l, start), so pivot belongs at l. Note
180  that if there are elements equal to pivot, l points to the
181  first slot after them -- that's why this sort is stable.
182  Slide over to make room.
183  Caution: using memmove is much slower under MSVC 5;
184  we're not usually moving many slots. */
185  // NOTE: using swap and going upwards appears to be faster.
186  for (octave_idx_type p = l; p < start; p++)
187  std::swap (pivot, data[p]);
188  data[start] = pivot;
189  }
190 
191  return;
192 }
193 
194 template <class T>
195 template <class Comp>
196 void
198  octave_idx_type start, Comp comp)
199 {
200  if (start == 0)
201  ++start;
202 
203  for (; start < nel; ++start)
204  {
205  /* set l to where *start belongs */
206  octave_idx_type l = 0;
207  octave_idx_type r = start;
208  T pivot = data[start];
209  /* Invariants:
210  * pivot >= all in [lo, l).
211  * pivot < all in [r, start).
212  * The second is vacuously true at the start.
213  */
214  do
215  {
216  octave_idx_type p = l + ((r - l) >> 1);
217  if (comp (pivot, data[p]))
218  r = p;
219  else
220  l = p+1;
221  }
222  while (l < r);
223  /* The invariants still hold, so pivot >= all in [lo, l) and
224  pivot < all in [l, start), so pivot belongs at l. Note
225  that if there are elements equal to pivot, l points to the
226  first slot after them -- that's why this sort is stable.
227  Slide over to make room.
228  Caution: using memmove is much slower under MSVC 5;
229  we're not usually moving many slots. */
230  // NOTE: using swap and going upwards appears to be faster.
231  for (octave_idx_type p = l; p < start; p++)
232  std::swap (pivot, data[p]);
233  data[start] = pivot;
234  octave_idx_type ipivot = idx[start];
235  for (octave_idx_type p = l; p < start; p++)
236  std::swap (ipivot, idx[p]);
237  idx[start] = ipivot;
238  }
239 
240  return;
241 }
242 
243 /*
244 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
245 is required on entry. "A run" is the longest ascending sequence, with
246 
247  lo[0] <= lo[1] <= lo[2] <= ...
248 
249 or the longest descending sequence, with
250 
251  lo[0] > lo[1] > lo[2] > ...
252 
253 DESCENDING is set to false in the former case, or to true in the latter.
254 For its intended use in a stable mergesort, the strictness of the defn of
255 "descending" is needed so that the caller can safely reverse a descending
256 sequence without violating stability (strict > ensures there are no equal
257 elements to get out of order).
258 
259 Returns -1 in case of error.
260 */
261 template <class T>
262 template <class Comp>
264 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
265  Comp comp)
266 {
267  octave_idx_type n;
268  T *hi = lo + nel;
269 
270  descending = false;
271  ++lo;
272  if (lo == hi)
273  return 1;
274 
275  n = 2;
276 
277  if (comp (*lo, *(lo-1)))
278  {
279  descending = true;
280  for (lo = lo+1; lo < hi; ++lo, ++n)
281  {
282  if (comp (*lo, *(lo-1)))
283  ;
284  else
285  break;
286  }
287  }
288  else
289  {
290  for (lo = lo+1; lo < hi; ++lo, ++n)
291  {
292  if (comp (*lo, *(lo-1)))
293  break;
294  }
295  }
296 
297  return n;
298 }
299 
300 /*
301 Locate the proper position of key in a sorted vector; if the vector contains
302 an element equal to key, return the position immediately to the left of
303 the leftmost equal element. [gallop_right() does the same except returns
304 the position to the right of the rightmost equal element (if any).]
305 
306 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
307 
308 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
309 hint is to the final result, the faster this runs.
310 
311 The return value is the int k in 0..n such that
312 
313  a[k-1] < key <= a[k]
314 
315 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
316 key belongs at index k; or, IOW, the first k elements of a should precede
317 key, and the last n-k should follow key.
318 
319 Returns -1 on error. See listsort.txt for info on the method.
320 */
321 template <class T>
322 template <class Comp>
325  octave_idx_type hint,
326  Comp comp)
327 {
328  octave_idx_type ofs;
329  octave_idx_type lastofs;
330  octave_idx_type k;
331 
332  a += hint;
333  lastofs = 0;
334  ofs = 1;
335  if (comp (*a, key))
336  {
337  /* a[hint] < key -- gallop right, until
338  * a[hint + lastofs] < key <= a[hint + ofs]
339  */
340  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
341  while (ofs < maxofs)
342  {
343  if (comp (a[ofs], key))
344  {
345  lastofs = ofs;
346  ofs = (ofs << 1) + 1;
347  if (ofs <= 0) /* int overflow */
348  ofs = maxofs;
349  }
350  else /* key <= a[hint + ofs] */
351  break;
352  }
353  if (ofs > maxofs)
354  ofs = maxofs;
355  /* Translate back to offsets relative to &a[0]. */
356  lastofs += hint;
357  ofs += hint;
358  }
359  else
360  {
361  /* key <= a[hint] -- gallop left, until
362  * a[hint - ofs] < key <= a[hint - lastofs]
363  */
364  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
365  while (ofs < maxofs)
366  {
367  if (comp (*(a-ofs), key))
368  break;
369  /* key <= a[hint - ofs] */
370  lastofs = ofs;
371  ofs = (ofs << 1) + 1;
372  if (ofs <= 0) /* int overflow */
373  ofs = maxofs;
374  }
375  if (ofs > maxofs)
376  ofs = maxofs;
377  /* Translate back to positive offsets relative to &a[0]. */
378  k = lastofs;
379  lastofs = hint - ofs;
380  ofs = hint - k;
381  }
382  a -= hint;
383 
384  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
385  * right of lastofs but no farther right than ofs. Do a binary
386  * search, with invariant a[lastofs-1] < key <= a[ofs].
387  */
388  ++lastofs;
389  while (lastofs < ofs)
390  {
391  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
392 
393  if (comp (a[m], key))
394  lastofs = m+1; /* a[m] < key */
395  else
396  ofs = m; /* key <= a[m] */
397  }
398 
399  return ofs;
400 }
401 
402 /*
403 Exactly like gallop_left(), except that if key already exists in a[0:n],
404 finds the position immediately to the right of the rightmost equal value.
405 
406 The return value is the int k in 0..n such that
407 
408  a[k-1] <= key < a[k]
409 
410 or -1 if error.
411 
412 The code duplication is massive, but this is enough different given that
413 we're sticking to "<" comparisons that it's much harder to follow if
414 written as one routine with yet another "left or right?" flag.
415 */
416 template <class T>
417 template <class Comp>
420  octave_idx_type hint,
421  Comp comp)
422 {
423  octave_idx_type ofs;
424  octave_idx_type lastofs;
425  octave_idx_type k;
426 
427  a += hint;
428  lastofs = 0;
429  ofs = 1;
430  if (comp (key, *a))
431  {
432  /* key < a[hint] -- gallop left, until
433  * a[hint - ofs] <= key < a[hint - lastofs]
434  */
435  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
436  while (ofs < maxofs)
437  {
438  if (comp (key, *(a-ofs)))
439  {
440  lastofs = ofs;
441  ofs = (ofs << 1) + 1;
442  if (ofs <= 0) /* int overflow */
443  ofs = maxofs;
444  }
445  else /* a[hint - ofs] <= key */
446  break;
447  }
448  if (ofs > maxofs)
449  ofs = maxofs;
450  /* Translate back to positive offsets relative to &a[0]. */
451  k = lastofs;
452  lastofs = hint - ofs;
453  ofs = hint - k;
454  }
455  else
456  {
457  /* a[hint] <= key -- gallop right, until
458  * a[hint + lastofs] <= key < a[hint + ofs]
459  */
460  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
461  while (ofs < maxofs)
462  {
463  if (comp (key, a[ofs]))
464  break;
465  /* a[hint + ofs] <= key */
466  lastofs = ofs;
467  ofs = (ofs << 1) + 1;
468  if (ofs <= 0) /* int overflow */
469  ofs = maxofs;
470  }
471  if (ofs > maxofs)
472  ofs = maxofs;
473  /* Translate back to offsets relative to &a[0]. */
474  lastofs += hint;
475  ofs += hint;
476  }
477  a -= hint;
478 
479  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
480  * right of lastofs but no farther right than ofs. Do a binary
481  * search, with invariant a[lastofs-1] <= key < a[ofs].
482  */
483  ++lastofs;
484  while (lastofs < ofs)
485  {
486  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
487 
488  if (comp (key, a[m]))
489  ofs = m; /* key < a[m] */
490  else
491  lastofs = m+1; /* a[m] <= key */
492  }
493 
494  return ofs;
495 }
496 
497 static inline octave_idx_type
499 {
500  unsigned int nbits = 3;
501  octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8;
502 
503  /* Round up:
504  * If n < 256, to a multiple of 8.
505  * If n < 2048, to a multiple of 64.
506  * If n < 16384, to a multiple of 512.
507  * If n < 131072, to a multiple of 4096.
508  * If n < 1048576, to a multiple of 32768.
509  * If n < 8388608, to a multiple of 262144.
510  * If n < 67108864, to a multiple of 2097152.
511  * If n < 536870912, to a multiple of 16777216.
512  * ...
513  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
514  *
515  * This over-allocates proportional to the list size, making room
516  * for additional growth. The over-allocation is mild, but is
517  * enough to give linear-time amortized behavior over a long
518  * sequence of appends() in the presence of a poorly-performing
519  * system realloc() (which is a reality, e.g., across all flavors
520  * of Windows, with Win9x behavior being particularly bad -- and
521  * we've still got address space fragmentation problems on Win9x
522  * even with this scheme, although it requires much longer lists to
523  * provoke them than it used to).
524  */
525  while (n2)
526  {
527  n2 >>= 3;
528  nbits += 3;
529  }
530 
531  return ((n >> nbits) + 1) << nbits;
532 }
533 
534 /* Ensure enough temp memory for 'need' array slots is available.
535  * Returns 0 on success and -1 if the memory can't be gotten.
536  */
537 template <class T>
538 void
540 {
541  if (need <= alloced)
542  return;
543 
544  need = roundupsize (need);
545  /* Don't realloc! That can cost cycles to copy the old data, but
546  * we don't care what's in the block.
547  */
548  delete [] a;
549  delete [] ia; // Must do this or fool possible next getmemi.
550  a = new T [need];
551  alloced = need;
552 
553 }
554 
555 template <class T>
556 void
558 {
559  if (ia && need <= alloced)
560  return;
561 
562  need = roundupsize (need);
563  /* Don't realloc! That can cost cycles to copy the old data, but
564  * we don't care what's in the block.
565  */
566  delete [] a;
567  delete [] ia;
568 
569  a = new T [need];
570  ia = new octave_idx_type [need];
571  alloced = need;
572 }
573 
574 /* Merge the na elements starting at pa with the nb elements starting at pb
575  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
576  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
577  * merge, and should have na <= nb. See listsort.txt for more info.
578  * Return 0 if successful, -1 if error.
579  */
580 template <class T>
581 template <class Comp>
582 int
584  T *pb, octave_idx_type nb,
585  Comp comp)
586 {
587  octave_idx_type k;
588  T *dest;
589  int result = -1; /* guilty until proved innocent */
590  octave_idx_type min_gallop = ms->min_gallop;
591 
592  ms->getmem (na);
593 
594  std::copy (pa, pa + na, ms->a);
595  dest = pa;
596  pa = ms->a;
597 
598  *dest++ = *pb++;
599  --nb;
600  if (nb == 0)
601  goto Succeed;
602  if (na == 1)
603  goto CopyB;
604 
605  for (;;)
606  {
607  octave_idx_type acount = 0; /* # of times A won in a row */
608  octave_idx_type bcount = 0; /* # of times B won in a row */
609 
610  /* Do the straightforward thing until (if ever) one run
611  * appears to win consistently.
612  */
613  for (;;)
614  {
615 
616  // FIXME: these loops are candidates for further optimizations.
617  // Rather than testing everything in each cycle, it may be more
618  // efficient to do it in hunks.
619  if (comp (*pb, *pa))
620  {
621  *dest++ = *pb++;
622  ++bcount;
623  acount = 0;
624  --nb;
625  if (nb == 0)
626  goto Succeed;
627  if (bcount >= min_gallop)
628  break;
629  }
630  else
631  {
632  *dest++ = *pa++;
633  ++acount;
634  bcount = 0;
635  --na;
636  if (na == 1)
637  goto CopyB;
638  if (acount >= min_gallop)
639  break;
640  }
641  }
642 
643  /* One run is winning so consistently that galloping may
644  * be a huge win. So try that, and continue galloping until
645  * (if ever) neither run appears to be winning consistently
646  * anymore.
647  */
648  ++min_gallop;
649  do
650  {
651  min_gallop -= min_gallop > 1;
652  ms->min_gallop = min_gallop;
653  k = gallop_right (*pb, pa, na, 0, comp);
654  acount = k;
655  if (k)
656  {
657  if (k < 0)
658  goto Fail;
659  dest = std::copy (pa, pa + k, dest);
660  pa += k;
661  na -= k;
662  if (na == 1)
663  goto CopyB;
664  /* na==0 is impossible now if the comparison
665  * function is consistent, but we can't assume
666  * that it is.
667  */
668  if (na == 0)
669  goto Succeed;
670  }
671  *dest++ = *pb++;
672  --nb;
673  if (nb == 0)
674  goto Succeed;
675 
676  k = gallop_left (*pa, pb, nb, 0, comp);
677  bcount = k;
678  if (k)
679  {
680  if (k < 0)
681  goto Fail;
682  dest = std::copy (pb, pb + k, dest);
683  pb += k;
684  nb -= k;
685  if (nb == 0)
686  goto Succeed;
687  }
688  *dest++ = *pa++;
689  --na;
690  if (na == 1)
691  goto CopyB;
692  }
693  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
694 
695  ++min_gallop; /* penalize it for leaving galloping mode */
696  ms->min_gallop = min_gallop;
697  }
698 
699 Succeed:
700  result = 0;
701 
702 Fail:
703  if (na)
704  std::copy (pa, pa + na, dest);
705  return result;
706 
707 CopyB:
708  /* The last element of pa belongs at the end of the merge. */
709  std::copy (pb, pb + nb, dest);
710  dest[nb] = *pa;
711 
712  return 0;
713 }
714 
715 template <class T>
716 template <class Comp>
717 int
719  T *pb, octave_idx_type *ipb, octave_idx_type nb,
720  Comp comp)
721 {
722  octave_idx_type k;
723  T *dest;
724  octave_idx_type *idest;
725  int result = -1; /* guilty until proved innocent */
726  octave_idx_type min_gallop = ms->min_gallop;
727 
728  ms->getmemi (na);
729 
730  std::copy (pa, pa + na, ms->a);
731  std::copy (ipa, ipa + na, ms->ia);
732  dest = pa; idest = ipa;
733  pa = ms->a; ipa = ms->ia;
734 
735  *dest++ = *pb++; *idest++ = *ipb++;
736  --nb;
737  if (nb == 0)
738  goto Succeed;
739  if (na == 1)
740  goto CopyB;
741 
742  for (;;)
743  {
744  octave_idx_type acount = 0; /* # of times A won in a row */
745  octave_idx_type bcount = 0; /* # of times B won in a row */
746 
747  /* Do the straightforward thing until (if ever) one run
748  * appears to win consistently.
749  */
750  for (;;)
751  {
752 
753  if (comp (*pb, *pa))
754  {
755  *dest++ = *pb++; *idest++ = *ipb++;
756  ++bcount;
757  acount = 0;
758  --nb;
759  if (nb == 0)
760  goto Succeed;
761  if (bcount >= min_gallop)
762  break;
763  }
764  else
765  {
766  *dest++ = *pa++; *idest++ = *ipa++;
767  ++acount;
768  bcount = 0;
769  --na;
770  if (na == 1)
771  goto CopyB;
772  if (acount >= min_gallop)
773  break;
774  }
775  }
776 
777  /* One run is winning so consistently that galloping may
778  * be a huge win. So try that, and continue galloping until
779  * (if ever) neither run appears to be winning consistently
780  * anymore.
781  */
782  ++min_gallop;
783  do
784  {
785  min_gallop -= min_gallop > 1;
786  ms->min_gallop = min_gallop;
787  k = gallop_right (*pb, pa, na, 0, comp);
788  acount = k;
789  if (k)
790  {
791  if (k < 0)
792  goto Fail;
793  dest = std::copy (pa, pa + k, dest);
794  idest = std::copy (ipa, ipa + k, idest);
795  pa += k; ipa += k;
796  na -= k;
797  if (na == 1)
798  goto CopyB;
799  /* na==0 is impossible now if the comparison
800  * function is consistent, but we can't assume
801  * that it is.
802  */
803  if (na == 0)
804  goto Succeed;
805  }
806  *dest++ = *pb++; *idest++ = *ipb++;
807  --nb;
808  if (nb == 0)
809  goto Succeed;
810 
811  k = gallop_left (*pa, pb, nb, 0, comp);
812  bcount = k;
813  if (k)
814  {
815  if (k < 0)
816  goto Fail;
817  dest = std::copy (pb, pb + k, dest);
818  idest = std::copy (ipb, ipb + k, idest);
819  pb += k; ipb += k;
820  nb -= k;
821  if (nb == 0)
822  goto Succeed;
823  }
824  *dest++ = *pa++; *idest++ = *ipa++;
825  --na;
826  if (na == 1)
827  goto CopyB;
828  }
829  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
830 
831  ++min_gallop; /* penalize it for leaving galloping mode */
832  ms->min_gallop = min_gallop;
833  }
834 
835 Succeed:
836  result = 0;
837 
838 Fail:
839  if (na)
840  {
841  std::copy (pa, pa + na, dest);
842  std::copy (ipa, ipa + na, idest);
843  }
844  return result;
845 
846 CopyB:
847  /* The last element of pa belongs at the end of the merge. */
848  std::copy (pb, pb + nb, dest);
849  std::copy (ipb, ipb + nb, idest);
850  dest[nb] = *pa;
851  idest[nb] = *ipa;
852 
853  return 0;
854 }
855 
856 /* Merge the na elements starting at pa with the nb elements starting at pb
857  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
858  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
859  * merge, and should have na >= nb. See listsort.txt for more info.
860  * Return 0 if successful, -1 if error.
861  */
862 template <class T>
863 template <class Comp>
864 int
866  T *pb, octave_idx_type nb,
867  Comp comp)
868 {
869  octave_idx_type k;
870  T *dest;
871  int result = -1; /* guilty until proved innocent */
872  T *basea, *baseb;
873  octave_idx_type min_gallop = ms->min_gallop;
874 
875  ms->getmem (nb);
876 
877  dest = pb + nb - 1;
878  std::copy (pb, pb + nb, ms->a);
879  basea = pa;
880  baseb = ms->a;
881  pb = ms->a + nb - 1;
882  pa += na - 1;
883 
884  *dest-- = *pa--;
885  --na;
886  if (na == 0)
887  goto Succeed;
888  if (nb == 1)
889  goto CopyA;
890 
891  for (;;)
892  {
893  octave_idx_type acount = 0; /* # of times A won in a row */
894  octave_idx_type bcount = 0; /* # of times B won in a row */
895 
896  /* Do the straightforward thing until (if ever) one run
897  * appears to win consistently.
898  */
899  for (;;)
900  {
901  if (comp (*pb, *pa))
902  {
903  *dest-- = *pa--;
904  ++acount;
905  bcount = 0;
906  --na;
907  if (na == 0)
908  goto Succeed;
909  if (acount >= min_gallop)
910  break;
911  }
912  else
913  {
914  *dest-- = *pb--;
915  ++bcount;
916  acount = 0;
917  --nb;
918  if (nb == 1)
919  goto CopyA;
920  if (bcount >= min_gallop)
921  break;
922  }
923  }
924 
925  /* One run is winning so consistently that galloping may
926  * be a huge win. So try that, and continue galloping until
927  * (if ever) neither run appears to be winning consistently
928  * anymore.
929  */
930  ++min_gallop;
931  do
932  {
933  min_gallop -= min_gallop > 1;
934  ms->min_gallop = min_gallop;
935  k = gallop_right (*pb, basea, na, na-1, comp);
936  if (k < 0)
937  goto Fail;
938  k = na - k;
939  acount = k;
940  if (k)
941  {
942  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
943  pa -= k;
944  na -= k;
945  if (na == 0)
946  goto Succeed;
947  }
948  *dest-- = *pb--;
949  --nb;
950  if (nb == 1)
951  goto CopyA;
952 
953  k = gallop_left (*pa, baseb, nb, nb-1, comp);
954  if (k < 0)
955  goto Fail;
956  k = nb - k;
957  bcount = k;
958  if (k)
959  {
960  dest -= k;
961  pb -= k;
962  std::copy (pb+1, pb+1 + k, dest+1);
963  nb -= k;
964  if (nb == 1)
965  goto CopyA;
966  /* nb==0 is impossible now if the comparison
967  * function is consistent, but we can't assume
968  * that it is.
969  */
970  if (nb == 0)
971  goto Succeed;
972  }
973  *dest-- = *pa--;
974  --na;
975  if (na == 0)
976  goto Succeed;
977  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
978  ++min_gallop; /* penalize it for leaving galloping mode */
979  ms->min_gallop = min_gallop;
980  }
981 
982 Succeed:
983  result = 0;
984 
985 Fail:
986  if (nb)
987  std::copy (baseb, baseb + nb, dest-(nb-1));
988  return result;
989 
990 CopyA:
991  /* The first element of pb belongs at the front of the merge. */
992  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
993  pa -= na;
994  *dest = *pb;
995 
996  return 0;
997 }
998 
999 template <class T>
1000 template <class Comp>
1001 int
1003  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1004  Comp comp)
1005 {
1006  octave_idx_type k;
1007  T *dest;
1008  octave_idx_type *idest;
1009  int result = -1; /* guilty until proved innocent */
1010  T *basea, *baseb;
1011  octave_idx_type *ibaseb;
1012  octave_idx_type min_gallop = ms->min_gallop;
1013 
1014  ms->getmemi (nb);
1015 
1016  dest = pb + nb - 1;
1017  idest = ipb + nb - 1;
1018  std::copy (pb, pb + nb, ms->a);
1019  std::copy (ipb, ipb + nb, ms->ia);
1020  basea = pa;
1021  baseb = ms->a; ibaseb = ms->ia;
1022  pb = ms->a + nb - 1; ipb = ms->ia + nb - 1;
1023  pa += na - 1; ipa += na - 1;
1024 
1025  *dest-- = *pa--; *idest-- = *ipa--;
1026  --na;
1027  if (na == 0)
1028  goto Succeed;
1029  if (nb == 1)
1030  goto CopyA;
1031 
1032  for (;;)
1033  {
1034  octave_idx_type acount = 0; /* # of times A won in a row */
1035  octave_idx_type bcount = 0; /* # of times B won in a row */
1036 
1037  /* Do the straightforward thing until (if ever) one run
1038  * appears to win consistently.
1039  */
1040  for (;;)
1041  {
1042  if (comp (*pb, *pa))
1043  {
1044  *dest-- = *pa--; *idest-- = *ipa--;
1045  ++acount;
1046  bcount = 0;
1047  --na;
1048  if (na == 0)
1049  goto Succeed;
1050  if (acount >= min_gallop)
1051  break;
1052  }
1053  else
1054  {
1055  *dest-- = *pb--; *idest-- = *ipb--;
1056  ++bcount;
1057  acount = 0;
1058  --nb;
1059  if (nb == 1)
1060  goto CopyA;
1061  if (bcount >= min_gallop)
1062  break;
1063  }
1064  }
1065 
1066  /* One run is winning so consistently that galloping may
1067  * be a huge win. So try that, and continue galloping until
1068  * (if ever) neither run appears to be winning consistently
1069  * anymore.
1070  */
1071  ++min_gallop;
1072  do
1073  {
1074  min_gallop -= min_gallop > 1;
1075  ms->min_gallop = min_gallop;
1076  k = gallop_right (*pb, basea, na, na-1, comp);
1077  if (k < 0)
1078  goto Fail;
1079  k = na - k;
1080  acount = k;
1081  if (k)
1082  {
1083  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1084  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1085  pa -= k; ipa -= k;
1086  na -= k;
1087  if (na == 0)
1088  goto Succeed;
1089  }
1090  *dest-- = *pb--; *idest-- = *ipb--;
1091  --nb;
1092  if (nb == 1)
1093  goto CopyA;
1094 
1095  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1096  if (k < 0)
1097  goto Fail;
1098  k = nb - k;
1099  bcount = k;
1100  if (k)
1101  {
1102  dest -= k; idest -= k;
1103  pb -= k; ipb -= k;
1104  std::copy (pb+1, pb+1 + k, dest+1);
1105  std::copy (ipb+1, ipb+1 + k, idest+1);
1106  nb -= k;
1107  if (nb == 1)
1108  goto CopyA;
1109  /* nb==0 is impossible now if the comparison
1110  * function is consistent, but we can't assume
1111  * that it is.
1112  */
1113  if (nb == 0)
1114  goto Succeed;
1115  }
1116  *dest-- = *pa--; *idest-- = *ipa--;
1117  --na;
1118  if (na == 0)
1119  goto Succeed;
1120  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1121  ++min_gallop; /* penalize it for leaving galloping mode */
1122  ms->min_gallop = min_gallop;
1123  }
1124 
1125 Succeed:
1126  result = 0;
1127 
1128 Fail:
1129  if (nb)
1130  {
1131  std::copy (baseb, baseb + nb, dest-(nb-1));
1132  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1133  }
1134  return result;
1135 
1136 CopyA:
1137  /* The first element of pb belongs at the front of the merge. */
1138  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1139  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1140  pa -= na; ipa -= na;
1141  *dest = *pb; *idest = *ipb;
1142 
1143  return 0;
1144 }
1145 
1146 /* Merge the two runs at stack indices i and i+1.
1147  * Returns 0 on success, -1 on error.
1148  */
1149 template <class T>
1150 template <class Comp>
1151 int
1153  Comp comp)
1154 {
1155  T *pa, *pb;
1156  octave_idx_type na, nb;
1157  octave_idx_type k;
1158 
1159  pa = data + ms->pending[i].base;
1160  na = ms->pending[i].len;
1161  pb = data + ms->pending[i+1].base;
1162  nb = ms->pending[i+1].len;
1163 
1164  /* Record the length of the combined runs; if i is the 3rd-last
1165  * run now, also slide over the last run (which isn't involved
1166  * in this merge). The current run i+1 goes away in any case.
1167  */
1168  ms->pending[i].len = na + nb;
1169  if (i == ms->n - 3)
1170  ms->pending[i+1] = ms->pending[i+2];
1171  ms->n--;
1172 
1173  /* Where does b start in a? Elements in a before that can be
1174  * ignored (already in place).
1175  */
1176  k = gallop_right (*pb, pa, na, 0, comp);
1177  if (k < 0)
1178  return -1;
1179  pa += k;
1180  na -= k;
1181  if (na == 0)
1182  return 0;
1183 
1184  /* Where does a end in b? Elements in b after that can be
1185  * ignored (already in place).
1186  */
1187  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1188  if (nb <= 0)
1189  return nb;
1190 
1191  /* Merge what remains of the runs, using a temp array with
1192  * min (na, nb) elements.
1193  */
1194  if (na <= nb)
1195  return merge_lo (pa, na, pb, nb, comp);
1196  else
1197  return merge_hi (pa, na, pb, nb, comp);
1198 }
1199 
1200 template <class T>
1201 template <class Comp>
1202 int
1204  Comp comp)
1205 {
1206  T *pa, *pb;
1207  octave_idx_type *ipa, *ipb;
1208  octave_idx_type na, nb;
1209  octave_idx_type k;
1210 
1211  pa = data + ms->pending[i].base;
1212  ipa = idx + ms->pending[i].base;
1213  na = ms->pending[i].len;
1214  pb = data + ms->pending[i+1].base;
1215  ipb = idx + ms->pending[i+1].base;
1216  nb = ms->pending[i+1].len;
1217 
1218  /* Record the length of the combined runs; if i is the 3rd-last
1219  * run now, also slide over the last run (which isn't involved
1220  * in this merge). The current run i+1 goes away in any case.
1221  */
1222  ms->pending[i].len = na + nb;
1223  if (i == ms->n - 3)
1224  ms->pending[i+1] = ms->pending[i+2];
1225  ms->n--;
1226 
1227  /* Where does b start in a? Elements in a before that can be
1228  * ignored (already in place).
1229  */
1230  k = gallop_right (*pb, pa, na, 0, comp);
1231  if (k < 0)
1232  return -1;
1233  pa += k; ipa += k;
1234  na -= k;
1235  if (na == 0)
1236  return 0;
1237 
1238  /* Where does a end in b? Elements in b after that can be
1239  * ignored (already in place).
1240  */
1241  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1242  if (nb <= 0)
1243  return nb;
1244 
1245  /* Merge what remains of the runs, using a temp array with
1246  * min (na, nb) elements.
1247  */
1248  if (na <= nb)
1249  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1250  else
1251  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1252 }
1253 
1254 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1255  * until the stack invariants are re-established:
1256  *
1257  * 1. len[-3] > len[-2] + len[-1]
1258  * 2. len[-2] > len[-1]
1259  *
1260  * See listsort.txt for more info.
1261  *
1262  * Returns 0 on success, -1 on error.
1263  */
1264 template <class T>
1265 template <class Comp>
1266 int
1268 {
1269  struct s_slice *p = ms->pending;
1270 
1271  while (ms->n > 1)
1272  {
1273  octave_idx_type n = ms->n - 2;
1274  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1275  {
1276  if (p[n-1].len < p[n+1].len)
1277  --n;
1278  if (merge_at (n, data, comp) < 0)
1279  return -1;
1280  }
1281  else if (p[n].len <= p[n+1].len)
1282  {
1283  if (merge_at (n, data, comp) < 0)
1284  return -1;
1285  }
1286  else
1287  break;
1288  }
1289 
1290  return 0;
1291 }
1292 
1293 template <class T>
1294 template <class Comp>
1295 int
1297 {
1298  struct s_slice *p = ms->pending;
1299 
1300  while (ms->n > 1)
1301  {
1302  octave_idx_type n = ms->n - 2;
1303  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1304  {
1305  if (p[n-1].len < p[n+1].len)
1306  --n;
1307  if (merge_at (n, data, idx, comp) < 0)
1308  return -1;
1309  }
1310  else if (p[n].len <= p[n+1].len)
1311  {
1312  if (merge_at (n, data, idx, comp) < 0)
1313  return -1;
1314  }
1315  else
1316  break;
1317  }
1318 
1319  return 0;
1320 }
1321 
1322 /* Regardless of invariants, merge all runs on the stack until only one
1323  * remains. This is used at the end of the mergesort.
1324  *
1325  * Returns 0 on success, -1 on error.
1326  */
1327 template <class T>
1328 template <class Comp>
1329 int
1331 {
1332  struct s_slice *p = ms->pending;
1333 
1334  while (ms->n > 1)
1335  {
1336  octave_idx_type n = ms->n - 2;
1337  if (n > 0 && p[n-1].len < p[n+1].len)
1338  --n;
1339  if (merge_at (n, data, comp) < 0)
1340  return -1;
1341  }
1342 
1343  return 0;
1344 }
1345 
1346 template <class T>
1347 template <class Comp>
1348 int
1350 {
1351  struct s_slice *p = ms->pending;
1352 
1353  while (ms->n > 1)
1354  {
1355  octave_idx_type n = ms->n - 2;
1356  if (n > 0 && p[n-1].len < p[n+1].len)
1357  --n;
1358  if (merge_at (n, data, idx, comp) < 0)
1359  return -1;
1360  }
1361 
1362  return 0;
1363 }
1364 
1365 /* Compute a good value for the minimum run length; natural runs shorter
1366  * than this are boosted artificially via binary insertion.
1367  *
1368  * If n < 64, return n (it's too small to bother with fancy stuff).
1369  * Else if n is an exact power of 2, return 32.
1370  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1371  * strictly less than, an exact power of 2.
1372  *
1373  * See listsort.txt for more info.
1374  */
1375 template <class T>
1378 {
1379  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1380 
1381  while (n >= 64)
1382  {
1383  r |= n & 1;
1384  n >>= 1;
1385  }
1386 
1387  return n + r;
1388 }
1389 
1390 template <class T>
1391 template <class Comp>
1392 void
1393 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1394 {
1395  /* Re-initialize the Mergestate as this might be the second time called */
1396  if (! ms) ms = new MergeState;
1397 
1398  ms->reset ();
1399  ms->getmem (1024);
1400 
1401  if (nel > 1)
1402  {
1403  octave_idx_type nremaining = nel;
1404  octave_idx_type lo = 0;
1405 
1406  /* March over the array once, left to right, finding natural runs,
1407  * and extending short natural runs to minrun elements.
1408  */
1409  octave_idx_type minrun = merge_compute_minrun (nremaining);
1410  do
1411  {
1412  bool descending;
1413  octave_idx_type n;
1414 
1415  /* Identify next run. */
1416  n = count_run (data + lo, nremaining, descending, comp);
1417  if (n < 0)
1418  goto fail;
1419  if (descending)
1420  std::reverse (data + lo, data + lo + n);
1421  /* If short, extend to min (minrun, nremaining). */
1422  if (n < minrun)
1423  {
1424  const octave_idx_type force = nremaining <= minrun ? nremaining
1425  : minrun;
1426  binarysort (data + lo, force, n, comp);
1427  n = force;
1428  }
1429  /* Push run onto pending-runs stack, and maybe merge. */
1430  assert (ms->n < MAX_MERGE_PENDING);
1431  ms->pending[ms->n].base = lo;
1432  ms->pending[ms->n].len = n;
1433  ms->n++;
1434  if (merge_collapse (data, comp) < 0)
1435  goto fail;
1436  /* Advance to find next run. */
1437  lo += n;
1438  nremaining -= n;
1439  }
1440  while (nremaining);
1441 
1442  merge_force_collapse (data, comp);
1443  }
1444 
1445 fail:
1446  return;
1447 }
1448 
1449 template <class T>
1450 template <class Comp>
1451 void
1453  Comp comp)
1454 {
1455  /* Re-initialize the Mergestate as this might be the second time called */
1456  if (! ms) ms = new MergeState;
1457 
1458  ms->reset ();
1459  ms->getmemi (1024);
1460 
1461  if (nel > 1)
1462  {
1463  octave_idx_type nremaining = nel;
1464  octave_idx_type lo = 0;
1465 
1466  /* March over the array once, left to right, finding natural runs,
1467  * and extending short natural runs to minrun elements.
1468  */
1469  octave_idx_type minrun = merge_compute_minrun (nremaining);
1470  do
1471  {
1472  bool descending;
1473  octave_idx_type n;
1474 
1475  /* Identify next run. */
1476  n = count_run (data + lo, nremaining, descending, comp);
1477  if (n < 0)
1478  goto fail;
1479  if (descending)
1480  {
1481  std::reverse (data + lo, data + lo + n);
1482  std::reverse (idx + lo, idx + lo + n);
1483  }
1484  /* If short, extend to min (minrun, nremaining). */
1485  if (n < minrun)
1486  {
1487  const octave_idx_type force = nremaining <= minrun ? nremaining
1488  : minrun;
1489  binarysort (data + lo, idx + lo, force, n, comp);
1490  n = force;
1491  }
1492  /* Push run onto pending-runs stack, and maybe merge. */
1493  assert (ms->n < MAX_MERGE_PENDING);
1494  ms->pending[ms->n].base = lo;
1495  ms->pending[ms->n].len = n;
1496  ms->n++;
1497  if (merge_collapse (data, idx, comp) < 0)
1498  goto fail;
1499  /* Advance to find next run. */
1500  lo += n;
1501  nremaining -= n;
1502  }
1503  while (nremaining);
1504 
1505  merge_force_collapse (data, idx, comp);
1506  }
1507 
1508 fail:
1509  return;
1510 }
1511 
1512 template <class T>
1513 void
1515 {
1516 #ifdef INLINE_ASCENDING_SORT
1517  if (compare == ascending_compare)
1518  sort (data, nel, std::less<T> ());
1519  else
1520 #endif
1521 #ifdef INLINE_DESCENDING_SORT
1522  if (compare == descending_compare)
1523  sort (data, nel, std::greater<T> ());
1524  else
1525 #endif
1526  if (compare)
1527  sort (data, nel, compare);
1528 }
1529 
1530 template <class T>
1531 void
1533 {
1534 #ifdef INLINE_ASCENDING_SORT
1535  if (compare == ascending_compare)
1536  sort (data, idx, nel, std::less<T> ());
1537  else
1538 #endif
1539 #ifdef INLINE_DESCENDING_SORT
1540  if (compare == descending_compare)
1541  sort (data, idx, nel, std::greater<T> ());
1542  else
1543 #endif
1544  if (compare)
1545  sort (data, idx, nel, compare);
1546 }
1547 
1548 template <class T> template <class Comp>
1549 bool
1550 octave_sort<T>::is_sorted (const T *data, octave_idx_type nel, Comp comp)
1551 {
1552  const T *end = data + nel;
1553  if (data != end)
1554  {
1555  const T *next = data;
1556  while (++next != end)
1557  {
1558  if (comp (*next, *data))
1559  break;
1560  data = next;
1561  }
1562  data = next;
1563  }
1564 
1565  return data == end;
1566 }
1567 
1568 template <class T>
1569 bool
1571 {
1572  bool retval = false;
1573 #ifdef INLINE_ASCENDING_SORT
1574  if (compare == ascending_compare)
1575  retval = is_sorted (data, nel, std::less<T> ());
1576  else
1577 #endif
1578 #ifdef INLINE_DESCENDING_SORT
1579  if (compare == descending_compare)
1580  retval = is_sorted (data, nel, std::greater<T> ());
1581  else
1582 #endif
1583  if (compare)
1584  retval = is_sorted (data, nel, compare);
1585 
1586  return retval;
1587 }
1588 
1589 // FIXME: is there really no way to make this local to the following function?
1591 {
1593  : col (c), ofs (o), nel (n) { }
1595 };
1596 
1597 
1598 template <class T> template <class Comp>
1599 void
1601  octave_idx_type rows, octave_idx_type cols,
1602  Comp comp)
1603 {
1604  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1605  for (octave_idx_type i = 0; i < rows; i++)
1606  idx[i] = i;
1607 
1608  if (cols == 0 || rows <= 1)
1609  return;
1610 
1611 
1612  // This is a breadth-first traversal.
1613  typedef sortrows_run_t run_t;
1614  std::stack<run_t> runs;
1615 
1616  runs.push (run_t (0, 0, rows));
1617 
1618  while (! runs.empty ())
1619  {
1620  octave_idx_type col = runs.top ().col;
1621  octave_idx_type ofs = runs.top ().ofs;
1622  octave_idx_type nel = runs.top ().nel;
1623  runs.pop ();
1624  assert (nel > 1);
1625 
1626  T *lbuf = buf + ofs;
1627  const T *ldata = data + rows*col;
1628  octave_idx_type *lidx = idx + ofs;
1629 
1630  // Gather.
1631  for (octave_idx_type i = 0; i < nel; i++)
1632  lbuf[i] = ldata[lidx[i]];
1633 
1634  // Sort.
1635  sort (lbuf, lidx, nel, comp);
1636 
1637  // Identify constant runs and schedule subsorts.
1638  if (col < cols-1)
1639  {
1640  octave_idx_type lst = 0;
1641  for (octave_idx_type i = 0; i < nel; i++)
1642  {
1643  if (comp (lbuf[lst], lbuf[i]))
1644  {
1645  if (i > lst + 1)
1646  runs.push (run_t (col+1, ofs + lst, i - lst));
1647  lst = i;
1648  }
1649  }
1650  if (nel > lst + 1)
1651  runs.push (run_t (col+1, ofs + lst, nel - lst));
1652  }
1653  }
1654 }
1655 
1656 template <class T>
1657 void
1659  octave_idx_type rows, octave_idx_type cols)
1660 {
1661 #ifdef INLINE_ASCENDING_SORT
1662  if (compare == ascending_compare)
1663  sort_rows (data, idx, rows, cols, std::less<T> ());
1664  else
1665 #endif
1666 #ifdef INLINE_DESCENDING_SORT
1667  if (compare == descending_compare)
1668  sort_rows (data, idx, rows, cols, std::greater<T> ());
1669  else
1670 #endif
1671  if (compare)
1672  sort_rows (data, idx, rows, cols, compare);
1673 }
1674 
1675 template <class T> template <class Comp>
1676 bool
1678  octave_idx_type cols, Comp comp)
1679 {
1680  if (rows <= 1 || cols == 0)
1681  return true;
1682 
1683  // This is a breadth-first traversal.
1684  const T *lastrow = data + rows*(cols - 1);
1685  typedef std::pair<const T *, octave_idx_type> run_t;
1686  std::stack<run_t> runs;
1687 
1688  bool sorted = true;
1689  runs.push (run_t (data, rows));
1690  while (sorted && ! runs.empty ())
1691  {
1692  const T *lo = runs.top ().first;
1693  octave_idx_type n = runs.top ().second;
1694  runs.pop ();
1695  if (lo < lastrow)
1696  {
1697  // Not the final column.
1698  assert (n > 1);
1699  const T *hi = lo + n;
1700  const T *lst = lo;
1701  for (lo++; lo < hi; lo++)
1702  {
1703  if (comp (*lst, *lo))
1704  {
1705  if (lo > lst + 1)
1706  runs.push (run_t (lst + rows, lo - lst));
1707  lst = lo;
1708  }
1709  else if (comp (*lo, *lst))
1710  break;
1711 
1712  }
1713  if (lo == hi)
1714  {
1715  if (lo > lst + 1)
1716  runs.push (run_t (lst + rows, lo - lst));
1717  }
1718  else
1719  {
1720  sorted = false;
1721  break;
1722  }
1723  }
1724  else
1725  // The final column - use fast code.
1726  sorted = is_sorted (lo, n, comp);
1727  }
1728 
1729  return sorted;
1730 }
1731 
1732 template <class T>
1733 bool
1735  octave_idx_type cols)
1736 {
1737  bool retval = false;
1738 
1739 #ifdef INLINE_ASCENDING_SORT
1740  if (compare == ascending_compare)
1741  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1742  else
1743 #endif
1744 #ifdef INLINE_DESCENDING_SORT
1745  if (compare == descending_compare)
1746  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1747  else
1748 #endif
1749  if (compare)
1750  retval = is_sorted_rows (data, rows, cols, compare);
1751 
1752  return retval;
1753 }
1754 
1755 // The simple binary lookup.
1756 
1757 template <class T> template <class Comp>
1760  const T& value, Comp comp)
1761 {
1762  octave_idx_type lo = 0;
1763  octave_idx_type hi = nel;
1764 
1765  while (lo < hi)
1766  {
1767  octave_idx_type mid = lo + ((hi-lo) >> 1);
1768  if (comp (value, data[mid]))
1769  hi = mid;
1770  else
1771  lo = mid + 1;
1772  }
1773 
1774  return lo;
1775 }
1776 
1777 template <class T>
1780  const T& value)
1781 {
1782  octave_idx_type retval = 0;
1783 
1784 #ifdef INLINE_ASCENDING_SORT
1785  if (compare == ascending_compare)
1786  retval = lookup (data, nel, value, std::less<T> ());
1787  else
1788 #endif
1789 #ifdef INLINE_DESCENDING_SORT
1790  if (compare == descending_compare)
1791  retval = lookup (data, nel, value, std::greater<T> ());
1792  else
1793 #endif
1794  if (compare)
1795  retval = lookup (data, nel, value, std::ptr_fun (compare));
1796 
1797  return retval;
1798 }
1799 
1800 template <class T> template <class Comp>
1801 void
1803  const T *values, octave_idx_type nvalues,
1804  octave_idx_type *idx, Comp comp)
1805 {
1806  // Use a sequence of binary lookups.
1807  // TODO: Can this be sped up generally? The sorted merge case is dealt with
1808  // elsewhere.
1809  for (octave_idx_type j = 0; j < nvalues; j++)
1810  idx[j] = lookup (data, nel, values[j], comp);
1811 }
1812 
1813 template <class T>
1814 void
1816  const T* values, octave_idx_type nvalues,
1817  octave_idx_type *idx)
1818 {
1819 #ifdef INLINE_ASCENDING_SORT
1820  if (compare == ascending_compare)
1821  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1822  else
1823 #endif
1824 #ifdef INLINE_DESCENDING_SORT
1825  if (compare == descending_compare)
1826  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1827  else
1828 #endif
1829  if (compare)
1830  lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare));
1831 }
1832 
1833 template <class T> template <class Comp>
1834 void
1836  const T *values, octave_idx_type nvalues,
1837  octave_idx_type *idx, bool rev, Comp comp)
1838 {
1839  if (rev)
1840  {
1841  octave_idx_type i = 0;
1842  octave_idx_type j = nvalues - 1;
1843 
1844  if (nvalues > 0 && nel > 0)
1845  {
1846  while (true)
1847  {
1848  if (comp (values[j], data[i]))
1849  {
1850  idx[j] = i;
1851  if (--j < 0)
1852  break;
1853  }
1854  else if (++i == nel)
1855  break;
1856  }
1857  }
1858 
1859  for (; j >= 0; j--)
1860  idx[j] = i;
1861  }
1862  else
1863  {
1864  octave_idx_type i = 0;
1865  octave_idx_type j = 0;
1866 
1867  if (nvalues > 0 && nel > 0)
1868  {
1869  while (true)
1870  {
1871  if (comp (values[j], data[i]))
1872  {
1873  idx[j] = i;
1874  if (++j == nvalues)
1875  break;
1876  }
1877  else if (++i == nel)
1878  break;
1879  }
1880  }
1881 
1882  for (; j != nvalues; j++)
1883  idx[j] = i;
1884  }
1885 }
1886 
1887 template <class T>
1888 void
1890  const T* values, octave_idx_type nvalues,
1891  octave_idx_type *idx, bool rev)
1892 {
1893 #ifdef INLINE_ASCENDING_SORT
1894  if (compare == ascending_compare)
1895  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1896  else
1897 #endif
1898 #ifdef INLINE_DESCENDING_SORT
1899  if (compare == descending_compare)
1900  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1901  else
1902 #endif
1903  if (compare)
1904  lookup_sorted (data, nel, values, nvalues, idx, rev,
1905  std::ptr_fun (compare));
1906 }
1907 
1908 template <class T> template <class Comp>
1909 void
1912  Comp comp)
1913 {
1914  // Simply wrap the STL algorithms.
1915  // FIXME: this will fail if we attempt to inline <,> for Complex.
1916  if (up == lo+1)
1917  std::nth_element (data, data + lo, data + nel, comp);
1918  else if (lo == 0)
1919  std::partial_sort (data, data + up, data + nel, comp);
1920  else
1921  {
1922  std::nth_element (data, data + lo, data + nel, comp);
1923  if (up == lo + 2)
1924  {
1925  // Finding two subsequent elements.
1926  std::swap (data[lo+1],
1927  *std::min_element (data + lo + 1, data + nel, comp));
1928  }
1929  else
1930  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1931  }
1932 }
1933 
1934 template <class T>
1935 void
1938 {
1939  if (up < 0)
1940  up = lo + 1;
1941 #ifdef INLINE_ASCENDING_SORT
1942  if (compare == ascending_compare)
1943  nth_element (data, nel, lo, up, std::less<T> ());
1944  else
1945 #endif
1946 #ifdef INLINE_DESCENDING_SORT
1947  if (compare == descending_compare)
1948  nth_element (data, nel, lo, up, std::greater<T> ());
1949  else
1950 #endif
1951  if (compare)
1952  nth_element (data, nel, lo, up, std::ptr_fun (compare));
1953 }
1954 
1955 template <class T>
1956 bool
1958  typename ref_param<T>::type y)
1959 {
1960  return x < y;
1961 }
1962 
1963 template <class T>
1964 bool
1966  typename ref_param<T>::type y)
1967 {
1968  return x > y;
1969 }
int merge_at(octave_idx_type i, T *data, Comp comp)
Definition: oct-sort.cc:1152
MergeState * ms
Definition: oct-sort.h:231
octave_idx_type len
Definition: oct-sort.h:180
octave_idx_type * ia
Definition: oct-sort.h:208
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1658
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1779
octave_idx_type gallop_left(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:324
octave_idx_type ofs
Definition: oct-sort.cc:1594
sortmode
Definition: oct-sort.h:103
octave_idx_type base
Definition: oct-sort.h:180
void set_compare(compare_fcn_type comp)
Definition: oct-sort.h:120
sortrows_run_t(octave_idx_type c, octave_idx_type o, octave_idx_type n)
Definition: oct-sort.cc:1592
void binarysort(T *data, octave_idx_type nel, octave_idx_type start, Comp comp)
Definition: oct-sort.cc:152
octave_idx_type count_run(T *lo, octave_idx_type n, bool &descending, Comp comp)
Definition: oct-sort.cc:264
octave_idx_type alloced
Definition: oct-sort.h:209
int merge_hi(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:865
static uint32_t * next
Definition: randmtzig.c:187
void getmemi(octave_idx_type need)
Definition: oct-sort.cc:557
compare_fcn_type compare
Definition: oct-sort.h:229
static octave_idx_type roundupsize(octave_idx_type n)
Definition: oct-sort.cc:498
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1965
#define MAX_MERGE_PENDING
Definition: oct-sort.h:93
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1734
bool is_sorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1570
if_then_else< is_class_type< T >::no, T, T const & >::result type
Definition: lo-traits.h:116
octave_idx_type gallop_right(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:419
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1514
int merge_force_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1330
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1889
octave_sort(void)
Definition: oct-sort.cc:120
void getmem(octave_idx_type need)
Definition: oct-sort.cc:539
#define MIN_GALLOP
Definition: oct-sort.h:97
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:197
~octave_sort(void)
Definition: oct-sort.cc:132
struct s_slice pending[85]
Definition: oct-sort.h:220
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1936
octave_idx_type n
Definition: oct-sort.h:219
int merge_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1267
octave_idx_type merge_compute_minrun(octave_idx_type n)
Definition: oct-sort.cc:1377
octave_idx_type min_gallop
Definition: oct-sort.h:203
F77_RET_T const double * x
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1957
int merge_lo(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:583