GNU Octave 10.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
strfind.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2009-2025 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include <algorithm>
31#include <deque>
32#include <limits>
33#include <string>
34
35#include "oct-locbuf.h"
36
37#include "Cell.h"
38#include "builtin-defun-decls.h"
39#include "defun.h"
40#include "errwarn.h"
41#include "error.h"
42#include "ov.h"
43#include "unwind-prot.h"
44#include "utils.h"
45
47
48// This allows safe indexing with char.
49// In C++, char may be (and often is) signed!
50#define ORD(ch) static_cast<unsigned char> (ch)
51#define TABSIZE (std::numeric_limits<unsigned char>::max () + 1)
52
53// This is the quick search algorithm, as described at
54// http://www-igm.univ-mlv.fr/~lecroq/string/node19.html
55static void
56qs_preprocess (const Array<char>& needle,
57 octave_idx_type *table)
58{
59 const char *x = needle.data ();
60 octave_idx_type m = needle.numel ();
61
62 for (octave_idx_type i = 0; i < TABSIZE; i++)
63 table[i] = m + 1;
64 for (octave_idx_type i = 0; i < m; i++)
65 table[ORD(x[i])] = m - i;
66}
67
69qs_search (const Array<char>& needle,
70 const Array<char>& haystack,
71 const octave_idx_type *table,
72 bool overlaps = true)
73{
74 const char *x = needle.data ();
75 octave_idx_type m = needle.numel ();
76 const char *y = haystack.data ();
77 octave_idx_type n = haystack.numel ();
78
79 // We'll use deque because it typically has the most favorable properties for
80 // the operation we need.
81 std::deque<octave_idx_type> accum;
82 if (m == 1)
83 {
84 // Looking for a single character.
85 for (octave_idx_type i = 0; i < n; i++)
86 {
87 if (y[i] == x[0])
88 accum.push_back (i);
89 }
90 }
91 else if (m == 2)
92 {
93 // Two characters.
94 if (overlaps)
95 {
96 for (octave_idx_type i = 0; i < n-1; i++)
97 {
98 if (y[i] == x[0] && y[i+1] == x[1])
99 accum.push_back (i);
100 }
101 }
102 else
103 {
104 for (octave_idx_type i = 0; i < n-1; i++)
105 {
106 if (y[i] == x[0] && y[i+1] == x[1])
107 accum.push_back (i++);
108 }
109 }
110 }
111 else if (n >= m)
112 {
113 // General case.
114 octave_idx_type j = 0;
115
116 if (overlaps)
117 {
118 while (j < n - m)
119 {
120 if (std::equal (x, x + m, y + j))
121 accum.push_back (j);
122 j += table[ORD(y[j + m])];
123 }
124 }
125 else
126 {
127 while (j < n - m)
128 {
129 if (std::equal (x, x + m, y + j))
130 {
131 accum.push_back (j);
132 j += m;
133 }
134 else
135 j += table[ORD(y[j + m])];
136 }
137 }
138
139 if (j == n - m && std::equal (x, x + m, y + j))
140 accum.push_back (j);
141 }
142
143 octave_idx_type nmatch = accum.size ();
144 octave_idx_type one = 1;
145 Array<octave_idx_type> result (dim_vector (std::min (one, nmatch), nmatch));
146 octave_idx_type k = 0;
147 for (const auto& idx : accum)
148 result.xelem (k++) = idx;
149
150 return result;
151}
152
153DEFUN (strfind, args, ,
154 doc: /* -*- texinfo -*-
155@deftypefn {} {@var{idx} =} strfind (@var{str}, @var{pattern})
156@deftypefnx {} {@var{idx} =} strfind (@var{cellstr}, @var{pattern})
157@deftypefnx {} {@var{idx} =} strfind (@dots{}, "overlaps", @var{val})
158@deftypefnx {} {@var{idx} =} strfind (@dots{}, "forcecelloutput", @var{val})
159Search for @var{pattern} in the string @var{str} and return the starting
160index of every such occurrence in the vector @var{idx}.
161
162If there is no such occurrence, or if @var{pattern} is longer than
163@var{str}, or if @var{pattern} itself is empty, then @var{idx} is the empty
164array @code{[]}.
165
166The optional argument @qcode{"overlaps"} determines whether the pattern
167can match at every position in @var{str} (true), or only for unique
168occurrences of the complete pattern (false). The default is true.
169
170If a cell array of strings @var{cellstr} is specified then @var{idx} is a
171cell array of vectors, as specified above.
172
173The optional argument @qcode{"forcecelloutput"} forces @var{idx} to be
174returned as a cell array of vectors. The default is false.
175
176Examples:
177
178@example
179@group
180strfind ("abababa", "aba")
181 @result{} [1, 3, 5]
182@end group
183
184@group
185strfind ("abababa", "aba", "overlaps", false)
186 @result{} [1, 5]
187@end group
188
189@group
190strfind (@{"abababa", "bebebe", "ab"@}, "aba")
191 @result{}
192 @{
193 [1,1] =
194
195 1 3 5
196
197 [1,2] = [](1x0)
198 [1,3] = [](1x0)
199 @}
200@end group
201
202@group
203strfind ("abababa", "aba", "forcecelloutput", true)
204 @result{}
205 @{
206 [1,1] =
207
208 1 3 5
209 @}
210@end group
211@end example
212@seealso{regexp, regexpi, find}
213@end deftypefn */)
214{
215 int nargin = args.length ();
216
217 if (nargin != 4 && nargin != 2)
218 print_usage ();
219
220 bool overlaps = true;
221 bool forcecelloutput = false;
222 if (nargin == 4)
223 {
224 if (! args(2).is_string () || ! args(3).is_scalar_type ())
225 error ("strfind: invalid optional arguments");
226
227 std::string opt = args(2).string_value ();
228 std::transform (opt.begin (), opt.end (), opt.begin (), tolower);
229
230 if (opt == "overlaps")
231 overlaps = args(3).bool_value ();
232 else if (opt == "forcecelloutput")
233 forcecelloutput = args(3).bool_value ();
234 else
235 error ("strfind: unknown option: %s", opt.c_str ());
236 }
237
238 octave_value retval;
239
240 octave_value argstr = args(0);
241 octave_value argpat = args(1);
242
243 if (argpat.is_string ())
244 {
245 Array<char> needle = argpat.char_array_value ();
247 qs_preprocess (needle, table);
248
249 if (argstr.is_string ())
250 {
251 if (argpat.isempty ())
252 // Return a null matrix for null pattern for MW compatibility
253 retval = Matrix ();
254 else
255 retval = octave_value (qs_search (needle,
256 argstr.char_array_value (),
257 table, overlaps),
258 true, true);
259 if (forcecelloutput)
260 retval = Cell (retval);
261 }
262 else if (argstr.iscell ())
263 {
264 const Cell argsc = argstr.cell_value ();
265 Cell retc (argsc.dims ());
266 octave_idx_type ns = argsc.numel ();
267
268 for (octave_idx_type i = 0; i < ns; i++)
269 {
270 octave_value argse = argsc(i);
271 if (! argse.is_string ())
272 error ("strfind: each element of CELLSTR must be a string");
273
274 if (argpat.isempty ())
275 retc(i) = Matrix ();
276 else
277 retc(i) = octave_value (qs_search (needle,
278 argse.char_array_value (),
279 table, overlaps),
280 true, true);
281 }
282
283 retval = retc;
284 }
285 else
286 error ("strfind: first argument must be a string or cell array of strings");
287 }
288 else if (argpat.iscell ())
289 retval = do_simple_cellfun (Fstrfind, "strfind", args);
290 else
291 error ("strfind: PATTERN must be a string or cell array of strings");
292
293 return retval;
294}
295
296/*
297%!assert (strfind ("abababa", "aba"), [1, 3, 5])
298%!assert (strfind ("abababa", "aba", "overlaps", false), [1, 5])
299%!assert (strfind ("abababa", "aba", "forcecelloutput", false), [1, 3, 5])
300%!assert (strfind ("abababa", "aba", "forcecelloutput", true), {[1, 3, 5]})
301%!assert (strfind ({"abababa", "bla", "bla"}, "a"), {[1, 3, 5, 7], 3, 3})
302%!assert (strfind ({"abababa", "bla", "bla"}, "a", "forcecelloutput", false),
303%! {[1, 3, 5, 7], 3, 3})
304%!assert (strfind ({"abababa", "bla", "bla"}, "a", "forcecelloutput", true),
305%! {[1, 3, 5, 7], 3, 3})
306%!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68])
307%!assert (strfind ("abc", ""), [])
308%!assert (strfind ("abc", {"", "b", ""}), {[], 2, []})
309%!assert (strfind ({"abc", "def"}, ""), {[], []})
310
311%!error strfind ()
312%!error strfind ("foo", "bar", 1)
313%!error <unknown option: foobar> strfind ("foo", 100, "foobar", 1)
314%!error <each element of CELLSTR must be a string> strfind ({"A", 1}, "foo")
315%!error <first argument must be a string> strfind (100, "foo")
316%!error <PATTERN must be a string> strfind ("foo", 100)
317*/
318
319static Array<char>
320qs_replace (const Array<char>& str, const Array<char>& pat,
321 const Array<char>& rep,
322 const octave_idx_type *table,
323 bool overlaps = true)
324{
325 Array<char> ret = str;
326
327 octave_idx_type siz = str.numel ();
328 octave_idx_type psiz = pat.numel ();
329 octave_idx_type rsiz = rep.numel ();
330
331 if (psiz != 0)
332 {
333 // Look up matches, without overlaps.
334 const Array<octave_idx_type> idx = qs_search (pat, str, table, overlaps);
335 octave_idx_type nidx = idx.numel ();
336
337 if (nidx)
338 {
339 // Compute result size.
340 octave_idx_type retsiz;
341 if (overlaps)
342 {
343 retsiz = 0;
344 // OMG. Is this the "right answer" MW always looks for, or
345 // someone was just lazy?
346 octave_idx_type k = 0;
347 for (octave_idx_type i = 0; i < nidx; i++)
348 {
349 octave_idx_type j = idx(i);
350 if (j >= k)
351 retsiz += j - k;
352 retsiz += rsiz;
353 k = j + psiz;
354 }
355
356 retsiz += siz - k;
357 }
358 else
359 retsiz = siz + nidx * (rsiz - psiz);
360
361 if (retsiz == 0)
362 ret.clear (dim_vector (0, 0));
363 else
364 {
365 ret.clear (dim_vector (1, retsiz));
366 const char *src = str.data ();
367 const char *reps = rep.data ();
368 char *dest = ret.rwdata ();
369
370 octave_idx_type k = 0;
371 for (octave_idx_type i = 0; i < nidx; i++)
372 {
373 octave_idx_type j = idx(i);
374 if (j >= k)
375 dest = std::copy (src + k, src + j, dest);
376 dest = std::copy (reps, reps + rsiz, dest);
377 k = j + psiz;
378 }
379
380 std::copy (src + k, src + siz, dest);
381 }
382 }
383 }
384
385 return ret;
386}
387
388DEFUN (strrep, args, ,
389 doc: /* -*- texinfo -*-
390@deftypefn {} {@var{newstr} =} strrep (@var{str}, @var{ptn}, @var{rep})
391@deftypefnx {} {@var{newstr} =} strrep (@var{cellstr}, @var{ptn}, @var{rep})
392@deftypefnx {} {@var{newstr} =} strrep (@dots{}, "overlaps", @var{val})
393Replace all occurrences of the pattern @var{ptn} in the string @var{str}
394with the string @var{rep} and return the result.
395
396The optional argument @qcode{"overlaps"} determines whether the pattern
397can match at every position in @var{str} (true), or only for unique
398occurrences of the complete pattern (false). The default is true.
399
400@var{s} may also be a cell array of strings, in which case the replacement
401is done for each element and a cell array is returned.
402
403Example:
404
405@example
406@group
407strrep ("This is a test string", "is", "&%$")
408 @result{} "Th&%$ &%$ a test string"
409@end group
410@end example
411
412@seealso{regexprep, strfind}
413@end deftypefn */)
414{
415 int nargin = args.length ();
416
417 if (nargin != 3 && nargin != 5)
418 print_usage ();
419
420 bool overlaps = true;
421
422 if (nargin == 5)
423 {
424 if (! args(3).is_string () || ! args(4).is_scalar_type ())
425 error ("strrep: invalid optional argument");
426
427 std::string opt = args(3).string_value ();
428 if (opt != "overlaps")
429 error ("strrep: unknown option: %s", opt.c_str ());
430
431 overlaps = args(4).bool_value ();
432 }
433
434 octave_value retval;
435
436 // Aliasing for better code readability
437 octave_value argstr = args(0);
438 octave_value argpat = args(1);
439 octave_value argrep = args(2);
440
441 if (argpat.is_string () && argrep.is_string ())
442 {
443 const Array<char> pat = argpat.char_array_value ();
444 const Array<char> rep = argrep.char_array_value ();
445
447 qs_preprocess (pat, table);
448
449 if (argstr.is_string ())
450 if (argstr.rows () == 1) // most common case of a single string
451 retval = qs_replace (argstr.char_array_value (), pat, rep,
452 table, overlaps);
453 else
454 {
455 const charMatrix argchm = argstr.char_matrix_value ();
456 octave_idx_type nel = argchm.rows ();
457 octave_idx_type nc = argchm.columns ();
458 charMatrix retchm (nel, 0);
459
460 for (octave_idx_type i = 0; i < nel; i++)
461 {
462 charMatrix rowchm;
463 rowchm = qs_replace (argchm.extract (i, 0, i, nc-1),
464 pat, rep, table, overlaps);
465 retchm.insert (rowchm, i, 0);
466 }
467
468 retval = retchm;
469 }
470 else if (argstr.iscell ())
471 {
472 const Cell argcell = argstr.cell_value ();
473 if (! argcell.iscellstr ())
474 error ("strrep: each element of S must be a string");
475
476 Cell retcell (argcell.dims ());
477 octave_idx_type nel = argcell.numel ();
478
479 for (octave_idx_type i = 0; i < nel; i++)
480 {
481 retcell(i) = qs_replace (argcell(i).char_array_value (),
482 pat, rep, table, overlaps);
483 }
484
485 retval = retcell;
486 }
487 else
488 error ("strrep: S must be a string or cell array of strings");
489 }
490 else if (argpat.iscell () || argrep.iscell ())
491 retval = do_simple_cellfun (Fstrrep, "strrep", args);
492 else
493 error ("strrep: PTN and REP arguments must be strings or cell arrays of strings");
494
495 return retval;
496}
497
498/*
499%!assert (strrep ("This is a test string", "is", "&%$"),
500%! "Th&%$ &%$ a test string")
501%!assert (strrep ("abababc", "abab", "xyz"), "xyzxyzc")
502%!assert (strrep ("abababc", "abab", "xyz", "overlaps", false), "xyzabc")
503%!assert (strrep ({"Hello World"; "Goodbye World"}, "World", "Jane"),
504%! {"Hello Jane"; "Goodbye Jane"})
505%!assert (strrep (char ("Hello World", "Goodbye World"), "World", "Jane"),
506%! char ("Hello Jane", "Goodbye Jane"))
507
508%!assert (size (strrep ("a", "a", "")), [0 0])
509
510%!error strrep ()
511%!error strrep ("A")
512%!error strrep ("A", "B")
513%!error strrep ("A", "B", "C", "D")
514%!error strrep ("A", "B", "C", "D", "E", "F")
515%!error <invalid optional argument> strrep ("A", "B", "C", 3, true)
516%!error <invalid optional argument> strrep ("A", "B", "C", "str", ones (2,2))
517%!error <unknown option: foobar> strrep ("A", "B", "C", "foobar", true)
518%!error <each element of S must be a string> strrep ({"A", 1.0}, "B", "C")
519%!error <S must be a string or cell array of strings> strrep (1.0, "B", "C")
520%!error <PTN and REP arguments must be strings> strrep ("A", 1.0, "C")
521%!error <PTN and REP arguments must be strings> strrep ("A", "B", 1.0)
522*/
523
524OCTAVE_END_NAMESPACE(octave)
N Dimensional Array with copy-on-write semantics.
Definition Array.h:130
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition Array.h:507
void clear()
octave_idx_type rows() const
Definition Array.h:463
octave_idx_type columns() const
Definition Array.h:475
const T * data() const
Size of the specified dimension.
Definition Array.h:665
T * rwdata()
Size of the specified dimension.
octave_idx_type numel() const
Number of elements in the array.
Definition Array.h:418
Definition Cell.h:41
bool iscellstr() const
Definition Cell.cc:126
charMatrix extract(octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const
Definition chMatrix.cc:116
charMatrix & insert(const char *s, octave_idx_type r, octave_idx_type c)
Definition chMatrix.cc:59
Vector representing the dimensions (size) of an Array.
Definition dim-vector.h:90
charMatrix char_matrix_value(bool frc_str_conv=false) const
Definition ov.h:903
Cell cell_value() const
octave_idx_type rows() const
Definition ov.h:545
bool is_string() const
Definition ov.h:637
charNDArray char_array_value(bool frc_str_conv=false) const
Definition ov.h:906
bool isempty() const
Definition ov.h:601
bool iscell() const
Definition ov.h:604
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage()
Definition defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition defun.h:56
void error(const char *fmt,...)
Definition error.cc:1003
F77_RET_T const F77_DBLE * x
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
octave_value_list Fstrfind(const octave_value_list &args, int)
Definition strfind.cc:213
#define TABSIZE
Definition strfind.cc:51
octave_value_list Fstrrep(const octave_value_list &args, int)
Definition strfind.cc:413
#define ORD(ch)
Definition strfind.cc:50
octave_value_list do_simple_cellfun(octave_value_list(*fcn)(const octave_value_list &, int), const char *fcn_name, const octave_value_list &args, int nargout)
Definition utils.cc:1708