GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lookup.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2008-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <cctype>
31 #include <functional>
32 #include <algorithm>
33 
34 #include "dNDArray.h"
35 #include "CNDArray.h"
36 
37 #include "Cell.h"
38 #include "defun.h"
39 #include "error.h"
40 #include "errwarn.h"
41 #include "ovl.h"
42 #include "ov.h"
43 
44 static
45 bool
46 contains_char (const std::string& str, char c)
47 {
48  return (str.find (c) != std::string::npos
49  || str.find (std::toupper (c)) != std::string::npos);
50 }
51 
52 template <typename T>
53 inline sortmode
54 get_sort_mode (const Array<T>& array,
55  typename octave_sort<T>::compare_fcn_type desc_comp
57 {
58  octave_idx_type n = array.numel ();
59  if (n > 1 && desc_comp (array (0), array (n-1)))
60  return DESCENDING;
61  else
62  return ASCENDING;
63 }
64 
65 // FIXME: perhaps there should be octave_value::lookup?
66 // The question is, how should it behave w.r.t. the second argument's type.
67 // We'd need a dispatch on two arguments. Hmmm...
68 
69 #define INT_ARRAY_LOOKUP(TYPE) \
70  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
71  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
72  y.TYPE ## _array_value (), \
73  left_inf, right_inf, \
74  match_idx, match_bool);
75 template <typename ArrayT>
76 static octave_value
77 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
78  bool left_inf, bool right_inf,
79  bool match_idx, bool match_bool)
80 {
82 
83  Array<octave_idx_type> idx = array.lookup (values);
84  octave_idx_type n = array.numel ();
85  octave_idx_type nval = values.numel ();
86 
87  // Post-process.
88  if (match_bool)
89  {
90  boolNDArray match (idx.dims ());
91  for (octave_idx_type i = 0; i < nval; i++)
92  {
93  octave_idx_type j = idx.xelem (i);
94  match.xelem (i) = j != 0 && values(i) == array(j-1);
95  }
96 
97  retval = match;
98  }
99  else if (match_idx || left_inf || right_inf)
100  {
101  if (match_idx)
102  {
103  NDArray ridx (idx.dims ());
104 
105  for (octave_idx_type i = 0; i < nval; i++)
106  {
107  octave_idx_type j = idx.xelem (i);
108  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
109  }
110 
111  retval = ridx;
112  }
113  else if (left_inf && right_inf)
114  {
115  // Results in valid indices. Optimize using lazy index.
116  octave_idx_type zero = 0;
117  for (octave_idx_type i = 0; i < nval; i++)
118  {
119  octave_idx_type j = idx.xelem (i) - 1;
120  idx.xelem (i) = std::max (zero, std::min (j, n-2));
121  }
122 
123  retval = idx_vector (idx);
124  }
125  else if (left_inf)
126  {
127  // Results in valid indices. Optimize using lazy index.
128  octave_idx_type zero = 0;
129  for (octave_idx_type i = 0; i < nval; i++)
130  {
131  octave_idx_type j = idx.xelem (i) - 1;
132  idx.xelem (i) = std::max (zero, j);
133  }
134 
135  retval = idx_vector (idx);
136  }
137  else if (right_inf)
138  {
139  NDArray ridx (idx.dims ());
140 
141  for (octave_idx_type i = 0; i < nval; i++)
142  {
143  octave_idx_type j = idx.xelem (i);
144  ridx.xelem (i) = std::min (j, n-1);
145  }
146 
147  retval = ridx;
148  }
149  }
150  else
151  retval = idx;
152 
153  return retval;
154 }
155 
156 DEFUN (lookup, args, ,
157  doc: /* -*- texinfo -*-
158 @deftypefn {} {@var{idx} =} lookup (@var{table}, @var{y})
159 @deftypefnx {} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})
160 Lookup values in a @strong{sorted} table.
161 
162 This function is usually used as a prelude to interpolation.
163 
164 If table is increasing, of length N and @code{idx = lookup (table, y)}, then
165 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within the
166 table. If @code{y(i) < table(1)} then @code{idx(i)} is 0. If
167 @code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is N.
168 
169 If the table is decreasing, then the tests are reversed. For non-strictly
170 monotonic tables, empty intervals are always skipped. The result is undefined
171 if @var{table} is not monotonic, or if @var{table} contains a NaN.
172 
173 The complexity of the lookup is O(M*log(N)) where M is the size of @var{y}.
174 In the special case when @var{y} is also sorted, the complexity is
175 O(min (M*log(N), M+N)).
176 
177 @var{table} and @var{y} can also be cell arrays of strings (or @var{y} can be a
178 single string). In this case, string lookup is performed using lexicographical
179 comparison.
180 
181 If @var{opts} is specified, it must be a string with letters indicating
182 additional options.
183 
184 @table @code
185 @item m
186 Match. @code{table(idx(i)) == y(i)} if @code{y(i)} occurs in table;
187 otherwise, @code{idx(i)} is zero.
188 
189 @item b
190 Boolean. @code{idx(i)} is a logical 1 or 0, indicating whether @code{y(i)}
191 is contained in table or not.
192 
193 @item l
194 Left. For numeric lookups the leftmost subinterval shall be extended to
195 minus infinity (i.e., all indices at least 1).
196 
197 @item r
198 Right. For numeric lookups the rightmost subinterval shall be extended to
199 infinity (i.e., all indices at most N-1).
200 @end table
201 
202 @strong{Note}: If @var{table} is not sorted the results from @code{lookup}
203 will be unpredictable.
204 @end deftypefn */)
205 {
206  int nargin = args.length ();
207 
208  if (nargin < 2 || nargin > 3)
209  print_usage ();
210 
211  octave_value table = args(0);
212  octave_value y = args(1);
213  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
214  warning ("lookup: table is not a vector");
215 
217 
218  bool num_case = ((table.isnumeric () && y.isnumeric ())
219  || (table.is_char_matrix () && y.is_char_matrix ()));
220  bool str_case = table.iscellstr () && (y.is_string () || y.iscellstr ());
221  bool left_inf = false;
222  bool right_inf = false;
223  bool match_idx = false;
224  bool match_bool = false;
225 
226  if (nargin == 3)
227  {
228  std::string opt = args(2).xstring_value ("lookup: OPT must be a string");
229  left_inf = contains_char (opt, 'l');
230  right_inf = contains_char (opt, 'r');
231  match_idx = contains_char (opt, 'm');
232  match_bool = contains_char (opt, 'b');
233  if (opt.find_first_not_of ("lrmb") != std::string::npos)
234  error ("lookup: unrecognized option: %c",
235  opt[opt.find_first_not_of ("lrmb")]);
236  }
237 
238  if ((match_idx || match_bool) && (left_inf || right_inf))
239  error ("lookup: m, b cannot be specified with l or r");
240  else if (match_idx && match_bool)
241  error ("lookup: only one of m or b can be specified");
242  else if (str_case && (left_inf || right_inf))
243  error ("lookup: l, r are not recognized for string lookups");
244 
245  if (num_case)
246  {
247  // In the case of a complex array, absolute values will be used for
248  // compatibility (though it's not too meaningful).
249  if (table.iscomplex ())
250  table = table.abs ();
251 
252  if (y.iscomplex ())
253  y = y.abs ();
254 
256 
257  // PS: I learned this from data.cc
258  if INT_ARRAY_LOOKUP (int8)
259  else if INT_ARRAY_LOOKUP (int16)
260  else if INT_ARRAY_LOOKUP (int32)
261  else if INT_ARRAY_LOOKUP (int64)
262  else if INT_ARRAY_LOOKUP (uint8)
263  else if INT_ARRAY_LOOKUP (uint16)
264  else if INT_ARRAY_LOOKUP (uint32)
265  else if INT_ARRAY_LOOKUP (uint64)
266  else if (table.is_char_matrix () && y.is_char_matrix ())
268  y.char_array_value (),
269  left_inf, right_inf,
270  match_idx, match_bool);
271  else if (table.is_single_type () || y.is_single_type ())
273  y.float_array_value (),
274  left_inf, right_inf,
275  match_idx, match_bool);
276  else
278  y.array_value (),
279  left_inf, right_inf,
280  match_idx, match_bool);
281  }
282  else if (str_case)
283  {
284  Array<std::string> str_table = table.cellstr_value ();
285  Array<std::string> str_y (dim_vector (1, 1));
286 
287  if (y.iscellstr ())
288  str_y = y.cellstr_value ();
289  else
290  str_y(0) = y.string_value ();
291 
292  Array<octave_idx_type> idx = str_table.lookup (str_y);
293  octave_idx_type nval = str_y.numel ();
294 
295  // Post-process.
296  if (match_bool)
297  {
298  boolNDArray match (idx.dims ());
299  for (octave_idx_type i = 0; i < nval; i++)
300  {
301  octave_idx_type j = idx.xelem (i);
302  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
303  }
304 
305  retval = match;
306  }
307  else if (match_idx)
308  {
309  NDArray ridx (idx.dims ());
310  for (octave_idx_type i = 0; i < nval; i++)
311  {
312  octave_idx_type j = idx.xelem (i);
313  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1) ? j : 0);
314  }
315 
316  retval = ridx;
317  }
318  else
319  retval = idx;
320  }
321  else
322  print_usage ();
323 
324  return retval;
325 }
326 
327 /*
328 %!assert (lookup (1:3, 0.5), 0) # value before table
329 %!assert (lookup (1:3, 3.5), 3) # value after table error
330 %!assert (lookup (1:3, 1.5), 1) # value within table error
331 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
332 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
333 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
334 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
335 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
336 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
337 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
338 %!assert (isempty (lookup ([1:3], [])))
339 %!assert (isempty (lookup ([1:3]', [])))
340 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
341 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
342 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
343 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
344 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
345 %!
346 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
347 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
348 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
349 */
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
N Dimensional Array with copy-on-write semantics.
Definition: Array.h:128
T & xelem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:469
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:377
octave_idx_type lookup(const T &value, sortmode mode=UNSORTED) const
Do a binary lookup in a sorted array.
Definition: Array.cc:2145
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:453
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:95
bool iscellstr(void) const
Definition: ov.h:563
bool is_char_matrix(void) const
Definition: ov.h:584
octave_idx_type rows(void) const
Definition: ov.h:504
bool isnumeric(void) const
Definition: ov.h:703
bool is_string(void) const
Definition: ov.h:593
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:850
octave_idx_type columns(void) const
Definition: ov.h:506
int ndims(void) const
Definition: ov.h:510
octave_value abs(void) const
Definition: ov.h:1370
std::string string_value(bool force=false) const
Definition: ov.h:927
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:812
bool is_single_type(void) const
Definition: ov.h:651
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:815
Array< std::string > cellstr_value(void) const
Definition: ov.h:935
bool iscomplex(void) const
Definition: ov.h:694
OCTINTERP_API void print_usage(void)
Definition: defun.cc:53
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void warning(const char *fmt,...)
Definition: error.cc:1050
void error(const char *fmt,...)
Definition: error.cc:968
#define INT_ARRAY_LOOKUP(TYPE)
Definition: lookup.cc:69
static octave_value do_numeric_lookup(const ArrayT &array, const ArrayT &values, bool left_inf, bool right_inf, bool match_idx, bool match_bool)
Definition: lookup.cc:77
static bool contains_char(const std::string &str, char c)
Definition: lookup.cc:46
sortmode get_sort_mode(const Array< T > &array, typename octave_sort< T >::compare_fcn_type desc_comp=octave_sort< T >::descending_compare)
Definition: lookup.cc:54
octave_idx_type n
Definition: mx-inlines.cc:753
sortmode
Definition: oct-sort.h:95
@ ASCENDING
Definition: oct-sort.h:95
@ DESCENDING
Definition: oct-sort.h:95
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811