00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027
00028 #include <string>
00029 #include <climits>
00030 #include <algorithm>
00031 #include <deque>
00032
00033 #include "Cell.h"
00034 #include "ov.h"
00035 #include "defun-dld.h"
00036 #include "unwind-prot.h"
00037 #include "gripes.h"
00038 #include "utils.h"
00039
00040
00041 #define ORD(ch) static_cast<unsigned char>(ch)
00042 #define TABSIZE (UCHAR_MAX + 1)
00043
00044
00045
00046 static void
00047 qs_preprocess (const Array<char>& needle,
00048 octave_idx_type table[TABSIZE])
00049 {
00050 const char *x = needle.data ();
00051 octave_idx_type m = needle.numel ();
00052
00053 for (octave_idx_type i = 0; i < TABSIZE; i++)
00054 table[i] = m + 1;
00055 for (octave_idx_type i = 0; i < m; i++)
00056 table[ORD(x[i])] = m - i;
00057 }
00058
00059
00060 static Array<octave_idx_type>
00061 qs_search (const Array<char>& needle,
00062 const Array<char>& haystack,
00063 const octave_idx_type table[TABSIZE],
00064 bool overlaps = true)
00065 {
00066 const char *x = needle.data ();
00067 octave_idx_type m = needle.numel ();
00068 const char *y = haystack.data ();
00069 octave_idx_type n = haystack.numel ();
00070
00071
00072
00073 std::deque<octave_idx_type> accum;
00074 if (m == 1)
00075 {
00076
00077 for (octave_idx_type i = 0; i < n; i++)
00078 {
00079 if (y[i] == x[0])
00080 accum.push_back (i);
00081 }
00082 }
00083 else if (m == 2)
00084 {
00085
00086 if (overlaps)
00087 {
00088 for (octave_idx_type i = 0; i < n-1; i++)
00089 {
00090 if (y[i] == x[0] && y[i+1] == x[1])
00091 accum.push_back (i);
00092 }
00093 }
00094 else
00095 {
00096 for (octave_idx_type i = 0; i < n-1; i++)
00097 {
00098 if (y[i] == x[0] && y[i+1] == x[1])
00099 accum.push_back (i++);
00100 }
00101 }
00102 }
00103 else if (n >= m)
00104 {
00105
00106 octave_idx_type j = 0;
00107
00108 if (overlaps)
00109 {
00110 while (j < n - m)
00111 {
00112 if (std::equal (x, x + m, y + j))
00113 accum.push_back (j);
00114 j += table[ORD(y[j + m])];
00115 }
00116 }
00117 else
00118 {
00119 while (j < n - m)
00120 {
00121 if (std::equal (x, x + m, y + j))
00122 {
00123 accum.push_back (j);
00124 j += m;
00125 }
00126 else
00127 j += table[ORD(y[j + m])];
00128 }
00129 }
00130
00131 if (j == n - m && std::equal (x, x + m, y + j))
00132 accum.push_back (j);
00133 }
00134
00135 octave_idx_type nmatch = accum.size ();
00136 octave_idx_type one = 1;
00137 Array<octave_idx_type> result (dim_vector (std::min (one, nmatch), nmatch));
00138 octave_idx_type k = 0;
00139 for (std::deque<octave_idx_type>::const_iterator iter = accum.begin ();
00140 iter != accum.end (); iter++)
00141 {
00142 result.xelem (k++) = *iter;
00143 }
00144
00145 return result;
00146 }
00147
00148 DEFUN_DLD (strfind, args, ,
00149 "-*- texinfo -*-\n\
00150 @deftypefn {Loadable Function} {@var{idx} =} strfind (@var{str}, @var{pattern})\n\
00151 @deftypefnx {Loadable Function} {@var{idx} =} strfind (@var{cellstr}, @var{pattern})\n\
00152 Search for @var{pattern} in the string @var{str} and return the\n\
00153 starting index of every such occurrence in the vector @var{idx}.\n\
00154 If there is no such occurrence, or if @var{pattern} is longer\n\
00155 than @var{str}, then @var{idx} is the empty array @code{[]}.\n\
00156 \n\
00157 If a cell array of strings @var{cellstr} is specified\n\
00158 then @var{idx} is a cell array of vectors, as specified\n\
00159 above. Examples:\n\
00160 \n\
00161 @example\n\
00162 @group\n\
00163 strfind (\"abababa\", \"aba\")\n\
00164 @result{} [1, 3, 5]\n\
00165 \n\
00166 strfind (@{\"abababa\", \"bebebe\", \"ab\"@}, \"aba\")\n\
00167 @result{} ans =\n\
00168 @{\n\
00169 [1,1] =\n\
00170 \n\
00171 1 3 5\n\
00172 \n\
00173 [1,2] = [](1x0)\n\
00174 [1,3] = [](1x0)\n\
00175 @}\n\
00176 @end group\n\
00177 @end example\n\
00178 @seealso{findstr, strmatch, regexp, regexpi, find}\n\
00179 @end deftypefn")
00180 {
00181 octave_value retval;
00182 int nargin = args.length ();
00183 bool overlaps = true;
00184
00185 if (nargin == 4 && args(2).is_string () && args(3).is_scalar_type ())
00186 {
00187 std::string opt = args(2).string_value ();
00188 if (opt == "overlaps")
00189 {
00190 overlaps = args(3).bool_value ();
00191 nargin = 2;
00192 }
00193 else
00194 {
00195 error ("strfind: unknown option: %s", opt.c_str ());
00196 return retval;
00197 }
00198 }
00199
00200 if (nargin == 2)
00201 {
00202 octave_value argstr = args(0), argpat = args(1);
00203 if (argpat.is_string ())
00204 {
00205 Array<char> needle = argpat.char_array_value ();
00206 OCTAVE_LOCAL_BUFFER (octave_idx_type, table, TABSIZE);
00207 qs_preprocess (needle, table);
00208
00209 if (argstr.is_string ())
00210 retval = octave_value (qs_search (needle, argstr.char_array_value (),
00211 table, overlaps),
00212 true, true);
00213 else if (argstr.is_cell ())
00214 {
00215 const Cell argsc = argstr.cell_value ();
00216 Cell retc (argsc.dims ());
00217 octave_idx_type ns = argsc.numel ();
00218
00219 for (octave_idx_type i = 0; i < ns; i++)
00220 {
00221 octave_value argse = argsc(i);
00222 if (argse.is_string ())
00223 retc(i) = octave_value (qs_search (needle, argse.char_array_value (),
00224 table, overlaps),
00225 true, true);
00226 else
00227 {
00228 error ("strfind: each element of CELLSTR must be a string");
00229 break;
00230 }
00231 }
00232
00233 retval = retc;
00234 }
00235 else
00236 error ("strfind: first argument must be a string or cell array of strings");
00237 }
00238 else if (argpat.is_cell ())
00239 retval = do_simple_cellfun (Fstrfind, "strfind", args);
00240 else
00241 error ("strfind: PATTERN must be a string or cell array of strings");
00242 }
00243 else
00244 print_usage ();
00245
00246 return retval;
00247 }
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263 static Array<char>
00264 qs_replace (const Array<char>& str, const Array<char>& pat,
00265 const Array<char>& rep,
00266 const octave_idx_type table[TABSIZE],
00267 bool overlaps = true)
00268 {
00269 Array<char> ret = str;
00270
00271 octave_idx_type siz = str.numel (), psiz = pat.numel (), rsiz = rep.numel ();
00272
00273 if (psiz != 0)
00274 {
00275
00276 const Array<octave_idx_type> idx = qs_search (pat, str, table, overlaps);
00277 octave_idx_type nidx = idx.numel ();
00278
00279 if (nidx)
00280 {
00281
00282 octave_idx_type retsiz;
00283 if (overlaps)
00284 {
00285 retsiz = 0;
00286
00287
00288 octave_idx_type k = 0;
00289 for (octave_idx_type i = 0; i < nidx; i++)
00290 {
00291 octave_idx_type j = idx(i);
00292 if (j >= k)
00293 retsiz += j - k;
00294 retsiz += rsiz;
00295 k = j + psiz;
00296 }
00297
00298 retsiz += siz - k;
00299 }
00300 else
00301 retsiz = siz + nidx * (rsiz - psiz);
00302
00303 ret.clear (dim_vector (1, retsiz));
00304 const char *src = str.data (), *reps = rep.data ();
00305 char *dest = ret.fortran_vec ();
00306
00307 octave_idx_type k = 0;
00308 for (octave_idx_type i = 0; i < nidx; i++)
00309 {
00310 octave_idx_type j = idx(i);
00311 if (j >= k)
00312 dest = std::copy (src + k, src + j, dest);
00313 dest = std::copy (reps, reps + rsiz, dest);
00314 k = j + psiz;
00315 }
00316
00317 std::copy (src + k, src + siz, dest);
00318 }
00319 }
00320
00321 return ret;
00322 }
00323
00324 DEFUN_DLD (strrep, args, ,
00325 "-*- texinfo -*-\n\
00326 @deftypefn {Loadable Function} {} strrep (@var{s}, @var{ptn}, @var{rep})\n\
00327 @deftypefnx {Loadable Function} {} strrep (@var{s}, @var{ptn}, @var{rep}, \"overlaps\", @var{o})\n\
00328 Replace all occurrences of the substring @var{ptn} in the string @var{s}\n\
00329 with the string @var{rep} and return the result. For example:\n\
00330 \n\
00331 @example\n\
00332 @group\n\
00333 strrep (\"This is a test string\", \"is\", \"&%$\")\n\
00334 @result{} \"Th&%$ &%$ a test string\"\n\
00335 @end group\n\
00336 @end example\n\
00337 \n\
00338 @var{s} may also be a cell array of strings, in which case the replacement is\n\
00339 done for each element and a cell array is returned.\n\
00340 @seealso{regexprep, strfind, findstr}\n\
00341 @end deftypefn")
00342 {
00343 octave_value retval;
00344 int nargin = args.length ();
00345 bool overlaps = true;
00346
00347 if (nargin == 5 && args(3).is_string () && args(4).is_scalar_type ())
00348 {
00349 std::string opt = args(3).string_value ();
00350 if (opt == "overlaps")
00351 {
00352 overlaps = args(4).bool_value ();
00353 nargin = 3;
00354 }
00355 else
00356 {
00357 error ("strrep: unknown option: %s", opt.c_str ());
00358 return retval;
00359 }
00360 }
00361
00362 if (nargin == 3)
00363 {
00364 octave_value argstr = args(0), argpat = args(1), argrep = args(2);
00365 if (argpat.is_string () && argrep.is_string ())
00366 {
00367 const Array<char> pat = argpat.char_array_value ();
00368 const Array<char> rep = argrep.char_array_value ();
00369
00370 OCTAVE_LOCAL_BUFFER (octave_idx_type, table, TABSIZE);
00371 qs_preprocess (pat, table);
00372
00373 if (argstr.is_string ())
00374 retval = qs_replace (argstr.char_array_value (), pat, rep, table, overlaps);
00375 else if (argstr.is_cell ())
00376 {
00377 const Cell argsc = argstr.cell_value ();
00378 Cell retc (argsc.dims ());
00379 octave_idx_type ns = argsc.numel ();
00380
00381 for (octave_idx_type i = 0; i < ns; i++)
00382 {
00383 octave_value argse = argsc(i);
00384 if (argse.is_string ())
00385 retc(i) = qs_replace (argse.char_array_value (), pat, rep, table, overlaps);
00386 else
00387 {
00388 error ("strrep: each element of S must be a string");
00389 break;
00390 }
00391 }
00392
00393 retval = retc;
00394 }
00395 else
00396 error ("strrep: S must be a string or cell array of strings");
00397 }
00398 else if (argpat.is_cell () || argrep.is_cell ())
00399 retval = do_simple_cellfun (Fstrrep, "strrep", args);
00400 else
00401 error ("strrep: PTN and REP arguments must be strings or cell arrays of strings");
00402 }
00403 else
00404 print_usage ();
00405
00406 return retval;
00407 }
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420