GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
lookup.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2008-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <cctype>
31 #include <functional>
32 #include <algorithm>
33 
34 #include "dNDArray.h"
35 #include "CNDArray.h"
36 
37 #include "Cell.h"
38 #include "defun.h"
39 #include "error.h"
40 #include "errwarn.h"
41 #include "ovl.h"
42 #include "ov.h"
43 
45 
46 static
47 bool
48 contains_char (const std::string& str, char c)
49 {
50  return (str.find (c) != std::string::npos
51  || str.find (std::toupper (c)) != std::string::npos);
52 }
53 
54 template <typename T>
55 inline sortmode
56 get_sort_mode (const Array<T>& array,
57  typename octave_sort<T>::compare_fcn_type desc_comp
59 {
60  octave_idx_type n = array.numel ();
61  if (n > 1 && desc_comp (array (0), array (n-1)))
62  return DESCENDING;
63  else
64  return ASCENDING;
65 }
66 
67 // FIXME: perhaps there should be octave_value::lookup?
68 // The question is, how should it behave w.r.t. the second argument's type.
69 // We'd need a dispatch on two arguments. Hmmm...
70 
71 #define INT_ARRAY_LOOKUP(TYPE) \
72  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
73  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
74  y.TYPE ## _array_value (), \
75  left_inf, right_inf, \
76  match_idx, match_bool);
77 template <typename ArrayT>
78 static octave_value
79 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
80  bool left_inf, bool right_inf,
81  bool match_idx, bool match_bool)
82 {
83  octave_value retval;
84 
85  Array<octave_idx_type> idx = array.lookup (values);
86  octave_idx_type n = array.numel ();
87  octave_idx_type nval = values.numel ();
88 
89  // Post-process.
90  if (match_bool)
91  {
92  boolNDArray match (idx.dims ());
93  for (octave_idx_type i = 0; i < nval; i++)
94  {
95  octave_idx_type j = idx.xelem (i);
96  match.xelem (i) = j != 0 && values(i) == array(j-1);
97  }
98 
99  retval = match;
100  }
101  else if (match_idx || left_inf || right_inf)
102  {
103  if (match_idx)
104  {
105  NDArray ridx (idx.dims ());
106 
107  for (octave_idx_type i = 0; i < nval; i++)
108  {
109  octave_idx_type j = idx.xelem (i);
110  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
111  }
112 
113  retval = ridx;
114  }
115  else if (left_inf && right_inf)
116  {
117  // Results in valid indices. Optimize using lazy index.
118  octave_idx_type zero = 0;
119  for (octave_idx_type i = 0; i < nval; i++)
120  {
121  octave_idx_type j = idx.xelem (i) - 1;
122  idx.xelem (i) = std::max (zero, std::min (j, n-2));
123  }
124 
125  retval = idx_vector (idx);
126  }
127  else if (left_inf)
128  {
129  // Results in valid indices. Optimize using lazy index.
130  octave_idx_type zero = 0;
131  for (octave_idx_type i = 0; i < nval; i++)
132  {
133  octave_idx_type j = idx.xelem (i) - 1;
134  idx.xelem (i) = std::max (zero, j);
135  }
136 
137  retval = idx_vector (idx);
138  }
139  else if (right_inf)
140  {
141  NDArray ridx (idx.dims ());
142 
143  for (octave_idx_type i = 0; i < nval; i++)
144  {
145  octave_idx_type j = idx.xelem (i);
146  ridx.xelem (i) = std::min (j, n-1);
147  }
148 
149  retval = ridx;
150  }
151  }
152  else
153  retval = idx;
154 
155  return retval;
156 }
157 
158 DEFUN (lookup, args, ,
159  doc: /* -*- texinfo -*-
160 @deftypefn {} {@var{idx} =} lookup (@var{table}, @var{y})
161 @deftypefnx {} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})
162 Lookup values in a @strong{sorted} table.
163 
164 This function is usually used as a prelude to interpolation.
165 
166 If table is increasing, of length N and @code{idx = lookup (table, y)}, then
167 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within the
168 table. If @code{y(i) < table(1)} then @code{idx(i)} is 0. If
169 @code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is N.
170 
171 If the table is decreasing, then the tests are reversed. For non-strictly
172 monotonic tables, empty intervals are always skipped. The result is undefined
173 if @var{table} is not monotonic, or if @var{table} contains a NaN.
174 
175 The complexity of the lookup is O(M*log(N)) where M is the size of @var{y}.
176 In the special case when @var{y} is also sorted, the complexity is
177 O(min (M*log(N), M+N)).
178 
179 @var{table} and @var{y} can also be cell arrays of strings (or @var{y} can be a
180 single string). In this case, string lookup is performed using lexicographical
181 comparison.
182 
183 If @var{opts} is specified, it must be a string with letters indicating
184 additional options.
185 
186 @table @code
187 @item m
188 Match. @code{table(idx(i)) == y(i)} if @code{y(i)} occurs in table;
189 otherwise, @code{idx(i)} is zero.
190 
191 @item b
192 Boolean. @code{idx(i)} is a logical 1 or 0, indicating whether @code{y(i)}
193 is contained in table or not.
194 
195 @item l
196 Left. For numeric lookups the leftmost subinterval shall be extended to
197 minus infinity (i.e., all indices at least 1).
198 
199 @item r
200 Right. For numeric lookups the rightmost subinterval shall be extended to
201 infinity (i.e., all indices at most N-1).
202 @end table
203 
204 @strong{Note}: If @var{table} is not sorted the results from @code{lookup}
205 will be unpredictable.
206 @end deftypefn */)
207 {
208  int nargin = args.length ();
209 
210  if (nargin < 2 || nargin > 3)
211  print_usage ();
212 
213  octave_value table = args(0);
214  octave_value y = args(1);
215  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
216  warning ("lookup: table is not a vector");
217 
218  octave_value retval;
219 
220  bool num_case = ((table.isnumeric () && y.isnumeric ())
221  || (table.is_char_matrix () && y.is_char_matrix ()));
222  bool str_case = table.iscellstr () && (y.is_string () || y.iscellstr ());
223  bool left_inf = false;
224  bool right_inf = false;
225  bool match_idx = false;
226  bool match_bool = false;
227 
228  if (nargin == 3)
229  {
230  std::string opt = args(2).xstring_value ("lookup: OPT must be a string");
231  left_inf = contains_char (opt, 'l');
232  right_inf = contains_char (opt, 'r');
233  match_idx = contains_char (opt, 'm');
234  match_bool = contains_char (opt, 'b');
235  if (opt.find_first_not_of ("lrmb") != std::string::npos)
236  error ("lookup: unrecognized option: %c",
237  opt[opt.find_first_not_of ("lrmb")]);
238  }
239 
240  if ((match_idx || match_bool) && (left_inf || right_inf))
241  error ("lookup: m, b cannot be specified with l or r");
242  else if (match_idx && match_bool)
243  error ("lookup: only one of m or b can be specified");
244  else if (str_case && (left_inf || right_inf))
245  error ("lookup: l, r are not recognized for string lookups");
246 
247  if (num_case)
248  {
249  // In the case of a complex array, absolute values will be used for
250  // compatibility (though it's not too meaningful).
251  if (table.iscomplex ())
252  table = table.abs ();
253 
254  if (y.iscomplex ())
255  y = y.abs ();
256 
258 
259  // PS: I learned this from data.cc
260  if INT_ARRAY_LOOKUP (int8)
261  else if INT_ARRAY_LOOKUP (int16)
262  else if INT_ARRAY_LOOKUP (int32)
263  else if INT_ARRAY_LOOKUP (int64)
264  else if INT_ARRAY_LOOKUP (uint8)
265  else if INT_ARRAY_LOOKUP (uint16)
266  else if INT_ARRAY_LOOKUP (uint32)
267  else if INT_ARRAY_LOOKUP (uint64)
268  else if (table.is_char_matrix () && y.is_char_matrix ())
269  retval = do_numeric_lookup (table.char_array_value (),
270  y.char_array_value (),
271  left_inf, right_inf,
272  match_idx, match_bool);
273  else if (table.is_single_type () || y.is_single_type ())
274  retval = do_numeric_lookup (table.float_array_value (),
275  y.float_array_value (),
276  left_inf, right_inf,
277  match_idx, match_bool);
278  else
279  retval = do_numeric_lookup (table.array_value (),
280  y.array_value (),
281  left_inf, right_inf,
282  match_idx, match_bool);
283  }
284  else if (str_case)
285  {
286  Array<std::string> str_table = table.cellstr_value ();
287  Array<std::string> str_y (dim_vector (1, 1));
288 
289  if (y.iscellstr ())
290  str_y = y.cellstr_value ();
291  else
292  str_y(0) = y.string_value ();
293 
294  Array<octave_idx_type> idx = str_table.lookup (str_y);
295  octave_idx_type nval = str_y.numel ();
296 
297  // Post-process.
298  if (match_bool)
299  {
300  boolNDArray match (idx.dims ());
301  for (octave_idx_type i = 0; i < nval; i++)
302  {
303  octave_idx_type j = idx.xelem (i);
304  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
305  }
306 
307  retval = match;
308  }
309  else if (match_idx)
310  {
311  NDArray ridx (idx.dims ());
312  for (octave_idx_type i = 0; i < nval; i++)
313  {
314  octave_idx_type j = idx.xelem (i);
315  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1) ? j : 0);
316  }
317 
318  retval = ridx;
319  }
320  else
321  retval = idx;
322  }
323  else
324  print_usage ();
325 
326  return retval;
327 }
328 
329 /*
330 %!assert (lookup (1:3, 0.5), 0) # value before table
331 %!assert (lookup (1:3, 3.5), 3) # value after table error
332 %!assert (lookup (1:3, 1.5), 1) # value within table error
333 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
334 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
335 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
336 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
337 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
338 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
339 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
340 %!assert (isempty (lookup ([1:3], [])))
341 %!assert (isempty (lookup ([1:3]', [])))
342 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
343 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
344 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
345 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
346 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
347 %!
348 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
349 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
350 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
351 */
352 
353 OCTAVE_END_NAMESPACE(octave)
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:503
T & xelem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:524
octave_idx_type lookup(const T &value, sortmode mode=UNSORTED) const
Do a binary lookup in a sorted array.
Definition: Array-base.cc:2158
octave_idx_type numel() const
Number of elements in the array.
Definition: Array.h:414
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
std::function< bool(typename ref_param< T >::type, typename ref_param< T >::type)> compare_fcn_type
Definition: oct-sort.h:107
int ndims() const
Definition: ov.h:551
octave_idx_type rows() const
Definition: ov.h:545
bool is_string() const
Definition: ov.h:637
bool isnumeric() const
Definition: ov.h:750
bool is_single_type() const
Definition: ov.h:698
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:897
bool is_char_matrix() const
Definition: ov.h:628
std::string string_value(bool force=false) const
Definition: ov.h:974
bool iscomplex() const
Definition: ov.h:741
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:859
bool iscellstr() const
Definition: ov.h:607
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:862
octave_idx_type columns() const
Definition: ov.h:547
octave_value abs() const
Definition: ov.h:1447
Array< std::string > cellstr_value() const
Definition: ov.h:982
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
void warning(const char *fmt,...)
Definition: error.cc:1063
void() error(const char *fmt,...)
Definition: error.cc:988
octave::idx_vector idx_vector
Definition: idx-vector.h:1022
#define INT_ARRAY_LOOKUP(TYPE)
Definition: lookup.cc:71
sortmode get_sort_mode(const Array< T > &array, typename octave_sort< T >::compare_fcn_type desc_comp=octave_sort< T >::descending_compare)
Definition: lookup.cc:56
octave_idx_type n
Definition: mx-inlines.cc:761
sortmode
Definition: oct-sort.h:97
@ ASCENDING
Definition: oct-sort.h:97
@ DESCENDING
Definition: oct-sort.h:97