lookup.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2008-2012 VZLU Prague a.s., Czech Republic
00004 
00005 This file is part of Octave.
00006 
00007 Octave is free software; you can redistribute it and/or modify it
00008 under the terms of the GNU General Public License as published by
00009 the Free Software Foundation; either version 3 of the License, or (at
00010 your option) any later version.
00011 
00012 Octave is distributed in the hope that it will be useful, but
00013 WITHOUT ANY WARRANTY; without even the implied warranty of
00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015 General Public License for more details.
00016 
00017 You should have received a copy of the GNU General Public License
00018 along with Octave; see the file COPYING.  If not, see
00019 <http://www.gnu.org/licenses/>.
00020 
00021 */
00022 
00023 // Author: Jaroslav Hajek <highegg@gmail.com>
00024 
00025 #ifdef HAVE_CONFIG_H
00026 #include <config.h>
00027 #endif
00028 
00029 #include <cctype>
00030 #include <functional>
00031 #include <algorithm>
00032 
00033 #include "dNDArray.h"
00034 #include "CNDArray.h"
00035 
00036 #include "Cell.h"
00037 #include "defun-dld.h"
00038 #include "error.h"
00039 #include "gripes.h"
00040 #include "oct-obj.h"
00041 #include "ov.h"
00042 
00043 static
00044 bool
00045 contains_char (const std::string& str, char c)
00046 {
00047   return (str.find (c) != std::string::npos
00048           || str.find (std::toupper (c)) != std::string::npos);
00049 }
00050 
00051 // case-insensitive character comparison functors
00052 struct icmp_char_lt : public std::binary_function<char, char, bool>
00053 {
00054   bool operator () (char x, char y) const
00055     { return std::toupper (x) < std::toupper (y); }
00056 };
00057 
00058 struct icmp_char_gt : public std::binary_function<char, char, bool>
00059 {
00060   bool operator () (char x, char y) const
00061     { return std::toupper (x) > std::toupper (y); }
00062 };
00063 
00064 // FIXME -- maybe these should go elsewhere?
00065 // FIXME -- are they even needed now?
00066 // case-insensitive ascending comparator
00067 #if 0
00068 static bool
00069 stri_comp_lt (const std::string& a, const std::string& b)
00070 {
00071   return std::lexicographical_compare (a.begin (), a.end (),
00072                                        b.begin (), b.end (),
00073                                        icmp_char_lt());
00074 }
00075 
00076 // case-insensitive descending comparator
00077 static bool
00078 stri_comp_gt (const std::string& a, const std::string& b)
00079 {
00080   return std::lexicographical_compare (a.begin (), a.end (),
00081                                        b.begin (), b.end (),
00082                                        icmp_char_gt());
00083 }
00084 #endif
00085 
00086 template <class T>
00087 inline sortmode
00088 get_sort_mode (const Array<T>& array,
00089                typename octave_sort<T>::compare_fcn_type desc_comp
00090                = octave_sort<T>::descending_compare)
00091 {
00092   octave_idx_type n = array.numel ();
00093   if (n > 1 && desc_comp (array (0), array (n-1)))
00094     return DESCENDING;
00095   else
00096     return ASCENDING;
00097 }
00098 
00099 // FIXME: perhaps there should be octave_value::lookup?
00100 // The question is, how should it behave w.r.t. the second argument's type.
00101 // We'd need a dispatch on two arguments. Hmmm...
00102 
00103 #define INT_ARRAY_LOOKUP(TYPE) \
00104   (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
00105     retval = do_numeric_lookup (table.TYPE ## _array_value (), \
00106                                 y.TYPE ## _array_value (), \
00107                                 left_inf, right_inf, \
00108                                 match_idx, match_bool);
00109 template <class ArrayT>
00110 static octave_value
00111 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
00112                    bool left_inf, bool right_inf,
00113                    bool match_idx, bool match_bool)
00114 {
00115   octave_value retval;
00116 
00117   Array<octave_idx_type> idx = array.lookup (values);
00118   octave_idx_type n = array.numel (), nval = values.numel ();
00119 
00120   // Post-process.
00121   if (match_bool)
00122     {
00123       boolNDArray match (idx.dims ());
00124       for (octave_idx_type i = 0; i < nval; i++)
00125         {
00126           octave_idx_type j = idx.xelem (i);
00127           match.xelem (i) = j != 0 && values(i) == array(j-1);
00128         }
00129 
00130       retval = match;
00131     }
00132   else if (match_idx || left_inf || right_inf)
00133     {
00134       if (match_idx)
00135         {
00136           NDArray ridx (idx.dims ());
00137 
00138           for (octave_idx_type i = 0; i < nval; i++)
00139             {
00140               octave_idx_type j = idx.xelem (i);
00141               ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
00142             }
00143 
00144           retval = ridx;
00145         }
00146       else if (left_inf && right_inf)
00147         {
00148           // Results in valid indices. Optimize using lazy index.
00149           octave_idx_type zero = 0;
00150           for (octave_idx_type i = 0; i < nval; i++)
00151             {
00152               octave_idx_type j = idx.xelem (i) - 1;
00153               idx.xelem (i) = std::max (zero, std::min (j, n-2));
00154             }
00155 
00156           retval = idx_vector (idx);
00157         }
00158       else if (left_inf)
00159         {
00160           // Results in valid indices. Optimize using lazy index.
00161           octave_idx_type zero = 0;
00162           for (octave_idx_type i = 0; i < nval; i++)
00163             {
00164               octave_idx_type j = idx.xelem (i) - 1;
00165               idx.xelem (i) = std::max (zero, j);
00166             }
00167 
00168           retval = idx_vector (idx);
00169         }
00170       else if (right_inf)
00171         {
00172           NDArray ridx (idx.dims ());
00173 
00174           for (octave_idx_type i = 0; i < nval; i++)
00175             {
00176               octave_idx_type j = idx.xelem (i);
00177               ridx.xelem (i) = std::min (j, n-1);
00178             }
00179 
00180           retval = ridx;
00181         }
00182     }
00183   else
00184     retval = idx;
00185 
00186   return retval;
00187 }
00188 
00189 DEFUN_DLD (lookup, args, ,
00190   "-*- texinfo -*-\n\
00191 @deftypefn  {Loadable Function} {@var{idx} =} lookup (@var{table}, @var{y})\n\
00192 @deftypefnx {Loadable Function} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})\n\
00193 Lookup values in a sorted table.  Usually used as a prelude to\n\
00194 interpolation.\n\
00195 \n\
00196 If table is increasing and @code{idx = lookup (table, y)}, then\n\
00197 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\
00198 within the table.  If @code{y(i) < table(1)} then\n\
00199 @code{idx(i)} is 0. If @code{y(i) >= table(end)} or @code{isnan (y(i))} then\n\
00200 @code{idx(i)} is @code{n}.\n\
00201 \n\
00202 If the table is decreasing, then the tests are reversed.\n\
00203 For non-strictly monotonic tables, empty intervals are always skipped.\n\
00204 The result is undefined if @var{table} is not monotonic, or if\n\
00205 @var{table} contains a NaN.\n\
00206 \n\
00207 The complexity of the lookup is O(M*log(N)) where N is the size of\n\
00208 @var{table} and M is the size of @var{y}.  In the special case when @var{y}\n\
00209 is also sorted, the complexity is O(min(M*log(N),M+N)).\n\
00210 \n\
00211 @var{table} and @var{y} can also be cell arrays of strings\n\
00212 (or @var{y} can be a single string).  In this case, string lookup\n\
00213 is performed using lexicographical comparison.\n\
00214 \n\
00215 If @var{opts} is specified, it must be a string with letters indicating\n\
00216 additional options.\n\
00217 \n\
00218 @table @code\n\
00219 @item m\n\
00220 @code{table(idx(i)) == val(i)} if @code{val(i)}\n\
00221 occurs in table; otherwise, @code{idx(i)} is zero.\n\
00222 \n\
00223 @item b\n\
00224 @code{idx(i)} is a logical 1 or 0, indicating whether\n\
00225 @code{val(i)} is contained in table or not.\n\
00226 \n\
00227 @item l\n\
00228 For numeric lookups\n\
00229 the leftmost subinterval shall be extended to infinity (i.e., all indices\n\
00230 at least 1)\n\
00231 \n\
00232 @item r\n\
00233 For numeric lookups\n\
00234 the rightmost subinterval shall be extended to infinity (i.e., all indices\n\
00235 at most n-1).\n\
00236 @end table\n\
00237 @end deftypefn")
00238 {
00239   octave_value retval;
00240 
00241   int nargin = args.length ();
00242 
00243   if (nargin < 2 || nargin > 3 || (nargin == 3 && ! args(2).is_string ()))
00244     {
00245       print_usage ();
00246       return retval;
00247     }
00248 
00249   octave_value table = args(0), y = args(1);
00250   if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
00251     warning ("lookup: table is not a vector");
00252 
00253   bool num_case = ((table.is_numeric_type () && y.is_numeric_type ())
00254                    || (table.is_char_matrix () && y.is_char_matrix ()));
00255   bool str_case = table.is_cellstr () && (y.is_string () || y.is_cellstr ());
00256   bool left_inf = false;
00257   bool right_inf = false;
00258   bool match_idx = false;
00259   bool match_bool = false;
00260 
00261   if (nargin == 3)
00262     {
00263       std::string opt = args(2).string_value ();
00264       left_inf = contains_char (opt, 'l');
00265       right_inf = contains_char (opt, 'r');
00266       match_idx = contains_char (opt, 'm');
00267       match_bool = contains_char (opt, 'b');
00268       if (opt.find_first_not_of ("lrmb") != std::string::npos)
00269         {
00270           error ("lookup: unrecognized option: %c",
00271                  opt[opt.find_first_not_of ("lrmb")]);
00272           return retval;
00273         }
00274     }
00275 
00276   if ((match_idx || match_bool) && (left_inf || right_inf))
00277     error ("lookup: m, b cannot be specified with l or r");
00278   else if (match_idx && match_bool)
00279     error ("lookup: only one of m or b can be specified");
00280   else if (str_case && (left_inf || right_inf))
00281     error ("lookup: l, r are not recognized for string lookups");
00282 
00283   if (error_state)
00284     return retval;
00285 
00286   if (num_case)
00287     {
00288 
00289       // In the case of a complex array, absolute values will be used for compatibility
00290       // (though it's not too meaningful).
00291 
00292       if (table.is_complex_type ())
00293         table = table.abs ();
00294 
00295       if (y.is_complex_type ())
00296         y = y.abs ();
00297 
00298       Array<octave_idx_type> idx;
00299 
00300       // PS: I learned this from data.cc
00301       if INT_ARRAY_LOOKUP (int8)
00302       else if INT_ARRAY_LOOKUP (int16)
00303       else if INT_ARRAY_LOOKUP (int32)
00304       else if INT_ARRAY_LOOKUP (int64)
00305       else if INT_ARRAY_LOOKUP (uint8)
00306       else if INT_ARRAY_LOOKUP (uint16)
00307       else if INT_ARRAY_LOOKUP (uint32)
00308       else if INT_ARRAY_LOOKUP (uint64)
00309       else if (table.is_char_matrix () && y.is_char_matrix ())
00310         retval = do_numeric_lookup (table.char_array_value (),
00311                                     y.char_array_value (),
00312                                     left_inf, right_inf,
00313                                     match_idx, match_bool);
00314       else if (table.is_single_type () || y.is_single_type ())
00315         retval = do_numeric_lookup (table.float_array_value (),
00316                                     y.float_array_value (),
00317                                     left_inf, right_inf,
00318                                     match_idx, match_bool);
00319       else
00320         retval = do_numeric_lookup (table.array_value (),
00321                                     y.array_value (),
00322                                     left_inf, right_inf,
00323                                     match_idx, match_bool);
00324 
00325     }
00326   else if (str_case)
00327     {
00328       Array<std::string> str_table = table.cellstr_value ();
00329       Array<std::string> str_y (dim_vector (1, 1));
00330 
00331       if (y.is_cellstr ())
00332         str_y = y.cellstr_value ();
00333       else
00334         str_y(0) = y.string_value ();
00335 
00336       Array<octave_idx_type> idx = str_table.lookup (str_y);
00337       octave_idx_type nval = str_y.numel ();
00338 
00339       // Post-process.
00340       if (match_bool)
00341         {
00342           boolNDArray match (idx.dims ());
00343           for (octave_idx_type i = 0; i < nval; i++)
00344             {
00345               octave_idx_type j = idx.xelem (i);
00346               match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
00347             }
00348 
00349           retval = match;
00350         }
00351       else if (match_idx)
00352         {
00353           NDArray ridx (idx.dims ());
00354           if (match_idx)
00355             {
00356               for (octave_idx_type i = 0; i < nval; i++)
00357                 {
00358                   octave_idx_type j = idx.xelem (i);
00359                   ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j : 0;
00360                 }
00361             }
00362 
00363           retval = ridx;
00364         }
00365       else
00366         retval = idx;
00367     }
00368   else
00369     print_usage ();
00370 
00371   return retval;
00372 
00373 }
00374 
00375 /*
00376 %!assert (lookup(1:3, 0.5), 0)     # value before table
00377 %!assert (lookup(1:3, 3.5), 3)     # value after table error
00378 %!assert (lookup(1:3, 1.5), 1)     # value within table error
00379 %!assert (lookup(1:3, [3,2,1]), [3,2,1])
00380 %!assert (lookup([1:4]', [1.2, 3.5]'), [1, 3]');
00381 %!assert (lookup([1:4], [1.2, 3.5]'), [1, 3]');
00382 %!assert (lookup([1:4]', [1.2, 3.5]), [1, 3]);
00383 %!assert (lookup([1:4], [1.2, 3.5]), [1, 3]);
00384 %!assert (lookup(1:3, [3, 2, 1]), [3, 2, 1]);
00385 %!assert (lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
00386 %!assert (isempty(lookup([1:3], [])))
00387 %!assert (isempty(lookup([1:3]', [])))
00388 %!assert (lookup(1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0]);
00389 %!assert (lookup(1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0]);
00390 %!assert (lookup(4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0]);
00391 %!assert (lookup(1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]));
00392 %!assert (lookup(4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]));
00393 %!
00394 %!assert (lookup({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
00395 %!assert (lookup({"apple","lemon","orange"}, "potato"), 3)
00396 %!assert (lookup({"orange","lemon","apple"}, "potato"), 0)
00397 */
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines