GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lookup.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2008-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <cctype>
31 #include <functional>
32 #include <algorithm>
33 
34 #include "dNDArray.h"
35 #include "CNDArray.h"
36 
37 #include "Cell.h"
38 #include "defun.h"
39 #include "error.h"
40 #include "errwarn.h"
41 #include "ovl.h"
42 #include "ov.h"
43 
45 
46 static
47 bool
48 contains_char (const std::string& str, char c)
49 {
50  return (str.find (c) != std::string::npos
51  || str.find (std::toupper (c)) != std::string::npos);
52 }
53 
54 template <typename T>
55 inline sortmode
56 get_sort_mode (const Array<T>& array,
57  typename octave_sort<T>::compare_fcn_type desc_comp
59 {
60  octave_idx_type n = array.numel ();
61  if (n > 1 && desc_comp (array (0), array (n-1)))
62  return DESCENDING;
63  else
64  return ASCENDING;
65 }
66 
67 // FIXME: perhaps there should be octave_value::lookup?
68 // The question is, how should it behave w.r.t. the second argument's type.
69 // We'd need a dispatch on two arguments. Hmmm...
70 
71 #define INT_ARRAY_LOOKUP(TYPE) \
72  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
73  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
74  y.TYPE ## _array_value (), \
75  left_inf, right_inf, \
76  match_idx, match_bool);
77 template <typename ArrayT>
78 static octave_value
79 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
80  bool left_inf, bool right_inf,
81  bool match_idx, bool match_bool)
82 {
83  octave_value retval;
84 
85  Array<octave_idx_type> idx = array.lookup (values);
86  octave_idx_type n = array.numel ();
87  octave_idx_type nval = values.numel ();
88 
89  // Post-process.
90  if (match_bool)
91  {
92  boolNDArray match (idx.dims ());
93  for (octave_idx_type i = 0; i < nval; i++)
94  {
95  octave_idx_type j = idx.xelem (i);
96  match.xelem (i) = j != 0 && values(i) == array(j-1);
97  }
98 
99  retval = match;
100  }
101  else if (match_idx || left_inf || right_inf)
102  {
103  if (match_idx)
104  {
105  NDArray ridx (idx.dims ());
106 
107  for (octave_idx_type i = 0; i < nval; i++)
108  {
109  octave_idx_type j = idx.xelem (i);
110  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
111  }
112 
113  retval = ridx;
114  }
115  else if (left_inf && right_inf)
116  {
117  // Results in valid indices. Optimize using lazy index.
118  octave_idx_type zero = 0;
119  for (octave_idx_type i = 0; i < nval; i++)
120  {
121  octave_idx_type j = idx.xelem (i) - 1;
122  idx.xelem (i) = std::max (zero, std::min (j, n-2));
123  }
124 
125  retval = idx_vector (idx);
126  }
127  else if (left_inf)
128  {
129  // Results in valid indices. Optimize using lazy index.
130  octave_idx_type zero = 0;
131  for (octave_idx_type i = 0; i < nval; i++)
132  {
133  octave_idx_type j = idx.xelem (i) - 1;
134  idx.xelem (i) = std::max (zero, j);
135  }
136 
137  retval = idx_vector (idx);
138  }
139  else if (right_inf)
140  {
141  NDArray ridx (idx.dims ());
142 
143  for (octave_idx_type i = 0; i < nval; i++)
144  {
145  octave_idx_type j = idx.xelem (i);
146  ridx.xelem (i) = std::min (j, n-1);
147  }
148 
149  retval = ridx;
150  }
151  }
152  else
153  retval = idx;
154 
155  return retval;
156 }
157 
158 DEFUN (lookup, args, ,
159  doc: /* -*- texinfo -*-
160 @deftypefn {} {@var{idx} =} lookup (@var{table}, @var{y})
161 @deftypefnx {} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})
162 Lookup values in a @strong{sorted} table.
163 
164 This function is usually used as a prelude to interpolation.
165 
166 If table is increasing, of length N and @code{idx = lookup (table, y)}, then
167 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within the
168 table. If @code{y(i) < table(1)} then @code{idx(i)} is 0. If
169 @code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is N.
170 
171 If the table is decreasing, then the tests are reversed. For non-strictly
172 monotonic tables, empty intervals are always skipped. The result is undefined
173 if @var{table} is not monotonic, or if @var{table} contains a NaN.
174 
175 The complexity of the lookup is O(M*log(N)) where M is the size of @var{y}.
176 In the special case when @var{y} is also sorted, the complexity is
177 O(min (M*log(N), M+N)).
178 
179 @var{table} and @var{y} can also be cell arrays of strings (or @var{y} can be a
180 single string). In this case, string lookup is performed using lexicographical
181 comparison.
182 
183 If @var{opts} is specified, it must be a string with letters indicating
184 additional options.
185 
186 @table @code
187 @item m
188 Match. @code{table(idx(i)) == y(i)} if @code{y(i)} occurs in table;
189 otherwise, @code{idx(i)} is zero.
190 
191 @item b
192 Boolean. @code{idx(i)} is a logical 1 or 0, indicating whether @code{y(i)}
193 is contained in table or not.
194 
195 @item l
196 Left. For numeric lookups the leftmost subinterval shall be extended to
197 minus infinity (i.e., all indices at least 1).
198 
199 @item r
200 Right. For numeric lookups the rightmost subinterval shall be extended to
201 infinity (i.e., all indices at most N-1).
202 @end table
203 
204 @strong{Note}: If @var{table} is not sorted the results from @code{lookup}
205 will be unpredictable.
206 @end deftypefn */)
207 {
208  int nargin = args.length ();
209 
210  if (nargin < 2 || nargin > 3)
211  print_usage ();
212 
213  octave_value table = args(0);
214  octave_value y = args(1);
215  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
216  warning ("lookup: table is not a vector");
217 
218  octave_value retval;
219 
220  bool num_case = ((table.isnumeric () && y.isnumeric ())
221  || (table.is_char_matrix () && y.is_char_matrix ()));
222  bool str_case = table.iscellstr () && (y.is_string () || y.iscellstr ());
223  bool left_inf = false;
224  bool right_inf = false;
225  bool match_idx = false;
226  bool match_bool = false;
227 
228  if (nargin == 3)
229  {
230  std::string opt = args(2).xstring_value ("lookup: OPT must be a string");
231  left_inf = contains_char (opt, 'l');
232  right_inf = contains_char (opt, 'r');
233  match_idx = contains_char (opt, 'm');
234  match_bool = contains_char (opt, 'b');
235  if (opt.find_first_not_of ("lrmb") != std::string::npos)
236  error ("lookup: unrecognized option: %c",
237  opt[opt.find_first_not_of ("lrmb")]);
238  }
239 
240  if ((match_idx || match_bool) && (left_inf || right_inf))
241  error ("lookup: m, b cannot be specified with l or r");
242  else if (match_idx && match_bool)
243  error ("lookup: only one of m or b can be specified");
244  else if (str_case && (left_inf || right_inf))
245  error ("lookup: l, r are not recognized for string lookups");
246 
247  if (num_case)
248  {
249  // In the case of a complex array, absolute values will be used for
250  // compatibility (though it's not too meaningful).
251  if (table.iscomplex ())
252  table = table.abs ();
253 
254  if (y.iscomplex ())
255  y = y.abs ();
256 
258 
259  // PS: I learned this from data.cc
260  if INT_ARRAY_LOOKUP (int8)
261  else if INT_ARRAY_LOOKUP (int16)
262  else if INT_ARRAY_LOOKUP (int32)
263  else if INT_ARRAY_LOOKUP (int64)
264  else if INT_ARRAY_LOOKUP (uint8)
265  else if INT_ARRAY_LOOKUP (uint16)
266  else if INT_ARRAY_LOOKUP (uint32)
267  else if INT_ARRAY_LOOKUP (uint64)
268  else if (table.is_char_matrix () && y.is_char_matrix ())
269  retval = do_numeric_lookup (table.char_array_value (),
270  y.char_array_value (),
271  left_inf, right_inf,
272  match_idx, match_bool);
273  else if (table.is_single_type () || y.is_single_type ())
274  retval = do_numeric_lookup (table.float_array_value (),
275  y.float_array_value (),
276  left_inf, right_inf,
277  match_idx, match_bool);
278  else
279  retval = do_numeric_lookup (table.array_value (),
280  y.array_value (),
281  left_inf, right_inf,
282  match_idx, match_bool);
283  }
284  else if (str_case)
285  {
286  Array<std::string> str_table = table.cellstr_value ();
287  Array<std::string> str_y (dim_vector (1, 1));
288 
289  if (y.iscellstr ())
290  str_y = y.cellstr_value ();
291  else
292  str_y(0) = y.string_value ();
293 
294  Array<octave_idx_type> idx = str_table.lookup (str_y);
295  octave_idx_type nval = str_y.numel ();
296 
297  // Post-process.
298  if (match_bool)
299  {
300  boolNDArray match (idx.dims ());
301  for (octave_idx_type i = 0; i < nval; i++)
302  {
303  octave_idx_type j = idx.xelem (i);
304  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
305  }
306 
307  retval = match;
308  }
309  else if (match_idx)
310  {
311  NDArray ridx (idx.dims ());
312  for (octave_idx_type i = 0; i < nval; i++)
313  {
314  octave_idx_type j = idx.xelem (i);
315  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1) ? j : 0);
316  }
317 
318  retval = ridx;
319  }
320  else
321  retval = idx;
322  }
323  else
324  print_usage ();
325 
326  return retval;
327 }
328 
329 /*
330 %!assert (lookup (1:3, 0.5), 0) # value before table
331 %!assert (lookup (1:3, 3.5), 3) # value after table error
332 %!assert (lookup (1:3, 1.5), 1) # value within table error
333 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
334 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
335 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
336 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
337 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
338 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
339 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
340 %!assert (isempty (lookup ([1:3], [])))
341 %!assert (isempty (lookup ([1:3]', [])))
342 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
343 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
344 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
345 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
346 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
347 %!
348 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
349 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
350 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
351 */
352 
353 OCTAVE_END_NAMESPACE(octave)
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:503
T & xelem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:524
octave_idx_type lookup(const T &value, sortmode mode=UNSORTED) const
Do a binary lookup in a sorted array.
Definition: Array-base.cc:2158
octave_idx_type numel() const
Number of elements in the array.
Definition: Array.h:414
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
std::function< bool(typename ref_param< T >::type, typename ref_param< T >::type)> compare_fcn_type
Definition: oct-sort.h:107
int ndims() const
Definition: ov.h:551
octave_idx_type rows() const
Definition: ov.h:545
bool is_string() const
Definition: ov.h:637
bool isnumeric() const
Definition: ov.h:750
bool is_single_type() const
Definition: ov.h:698
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:897
bool is_char_matrix() const
Definition: ov.h:628
std::string string_value(bool force=false) const
Definition: ov.h:974
bool iscomplex() const
Definition: ov.h:741
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:859
bool iscellstr() const
Definition: ov.h:607
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:862
octave_idx_type columns() const
Definition: ov.h:547
octave_value abs() const
Definition: ov.h:1447
Array< std::string > cellstr_value() const
Definition: ov.h:982
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void warning(const char *fmt,...)
Definition: error.cc:1063
void() error(const char *fmt,...)
Definition: error.cc:988
octave::idx_vector idx_vector
Definition: idx-vector.h:1022
#define INT_ARRAY_LOOKUP(TYPE)
Definition: lookup.cc:71
sortmode get_sort_mode(const Array< T > &array, typename octave_sort< T >::compare_fcn_type desc_comp=octave_sort< T >::descending_compare)
Definition: lookup.cc:56
octave_idx_type n
Definition: mx-inlines.cc:761
sortmode
Definition: oct-sort.h:97
@ ASCENDING
Definition: oct-sort.h:97
@ DESCENDING
Definition: oct-sort.h:97