GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
regexp.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2005-2013 David Bateman
4 Copyright (C) 2002-2005 Paul Kienzle
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <list>
29 #include <sstream>
30 
31 #include <pcre.h>
32 
33 #include "base-list.h"
34 #include "oct-locbuf.h"
35 #include "quit.h"
36 #include "lo-regexp.h"
37 #include "str-vec.h"
38 
39 #include "defun.h"
40 #include "Cell.h"
41 #include "error.h"
42 #include "gripes.h"
43 #include "oct-map.h"
44 #include "oct-obj.h"
45 #include "utils.h"
46 
47 // Replace backslash escapes in a string with the real values. We need
48 // two special functions instead of the one in utils.cc because the set
49 // of escape sequences used for regexp patterns and replacement strings
50 // is different from those used in the *printf functions.
51 
52 static std::string
53 do_regexp_ptn_string_escapes (const std::string& s)
54 {
55  std::string retval;
56 
57  size_t i = 0;
58  size_t j = 0;
59  size_t len = s.length ();
60 
61  retval.resize (len);
62 
63  while (j < len)
64  {
65  if (s[j] == '\\' && j+1 < len)
66  {
67  switch (s[++j])
68  {
69  case 'b': // backspace
70  retval[i] = '\b';
71  break;
72 
73  // Translate < and > to PCRE word boundary
74  case '<': // begin word boundary
75  case '>': // end word boundary
76  retval[i] = '\\';
77  retval[++i] = 'b';
78  break;
79 
80 #if 0
81 // FIXME: To be complete, we need to handle \oN, \o{N}.
82 // The PCRE library already handles \N where N
83 // is an octal number. New code needs to merely
84 // replace \oN or \o{N} with \N.
85  case 'o': // octal number
86 #endif
87 
88  default: // pass escape sequence through
89  retval[i] = '\\';
90  retval[++i] = s[j];
91  break;
92  }
93  }
94  else
95  {
96  retval[i] = s[j];
97  }
98 
99  i++;
100  j++;
101  }
102 
103  retval.resize (i);
104 
105  return retval;
106 }
107 
108 static std::string
109 do_regexp_rep_string_escapes (const std::string& s)
110 {
111  std::string retval;
112 
113  size_t i = 0;
114  size_t j = 0;
115  size_t len = s.length ();
116 
117  retval.resize (len);
118 
119  while (j < len)
120  {
121  if (s[j] == '\\' && j+1 < len)
122  {
123  switch (s[++j])
124  {
125  case 'a': // alarm
126  retval[i] = '\a';
127  break;
128 
129  case 'b': // backspace
130  retval[i] = '\b';
131  break;
132 
133  case 'f': // formfeed
134  retval[i] = '\f';
135  break;
136 
137  case 'n': // newline
138  retval[i] = '\n';
139  break;
140 
141  case 'r': // carriage return
142  retval[i] = '\r';
143  break;
144 
145  case 't': // horizontal tab
146  retval[i] = '\t';
147  break;
148 
149  case 'v': // vertical tab
150  retval[i] = '\v';
151  break;
152 
153 #if 0
154 // FIXME: to be complete, we need to handle \oN, \o{N}, \xN, and
155 // \x{N}. Hex digits may be upper or lower case. Brackets are
156 // optional, so \x5Bz is the same as \x{5B}z.
157 
158  case 'o': // octal number
159  case 'x': // hex number
160 #endif
161 
162  default: // pass escape sequence through
163  retval[i] = '\\';
164  retval[++i] = s[j];
165  break;
166  }
167  }
168  else
169  {
170  retval[i] = s[j];
171  }
172 
173  i++;
174  j++;
175  }
176 
177  retval.resize (i);
178 
179  return retval;
180 }
181 
182 static void
184  const std::string& who, int skip, bool& extra_args)
185 {
186  int nargin = args.length ();
187 
188  extra_args = false;
189 
190  for (int i = skip; i < nargin; i++)
191  {
192  std::string str = args(i).string_value ();
193 
194  if (error_state)
195  {
196  error ("%s: optional arguments must be character strings",
197  who.c_str ());
198  break;
199  }
200 
201  std::transform (str.begin (), str.end (), str.begin (), tolower);
202 
203  if (str.find ("once", 0) == 0)
204  options.once (true);
205  else if (str.find ("matchcase", 0) == 0)
206  options.case_insensitive (false);
207  else if (str.find ("ignorecase", 0) == 0)
208  options.case_insensitive (true);
209  else if (str.find ("dotall", 0) == 0)
210  options.dotexceptnewline (false);
211  else if (str.find ("stringanchors", 0) == 0)
212  options.lineanchors (false);
213  else if (str.find ("literalspacing", 0) == 0)
214  options.freespacing (false);
215  else if (str.find ("noemptymatch", 0) == 0)
216  options.emptymatch (false);
217  else if (str.find ("dotexceptnewline", 0) == 0)
218  options.dotexceptnewline (true);
219  else if (str.find ("lineanchors", 0) == 0)
220  options.lineanchors (true);
221  else if (str.find ("freespacing", 0) == 0)
222  options.freespacing (true);
223  else if (str.find ("emptymatch", 0) == 0)
224  options.emptymatch (true);
225  else if (str.find ("start", 0) == 0
226  || str.find ("end", 0) == 0
227  || str.find ("tokenextents", 0) == 0
228  || str.find ("match", 0) == 0
229  || str.find ("tokens", 0) == 0
230  || str.find ("names", 0) == 0
231  || str.find ("split", 0) == 0)
232  extra_args = true;
233  else
234  error ("%s: unrecognized option", who.c_str ());
235  }
236 }
237 
238 static octave_value_list
239 octregexp (const octave_value_list &args, int nargout,
240  const std::string &who, bool case_insensitive = false)
241 {
242  octave_value_list retval;
243 
244  int nargin = args.length ();
245 
246  // Make sure we have string, pattern
247  const std::string buffer = args(0).string_value ();
248  if (error_state)
249  return retval;
250 
251  std::string pattern = args(1).string_value ();
252  if (error_state)
253  return retval;
254  // Matlab compatibility.
255  if (args(1).is_sq_string ())
256  pattern = do_regexp_ptn_string_escapes (pattern);
257 
258  regexp::opts options;
259  options.case_insensitive (case_insensitive);
260  bool extra_options = false;
261  parse_options (options, args, who, 2, extra_options);
262  if (error_state)
263  return retval;
264 
265  regexp::match_data rx_lst = regexp_match (pattern, buffer, options, who);
266 
267  string_vector named_pats = rx_lst.named_patterns ();
268 
269  size_t sz = rx_lst.size ();
270 
271  if (! error_state)
272  {
273  // Converted the linked list in the correct form for the return values
274 
275  octave_idx_type i = 0;
276  octave_scalar_map nmap;
277 
278  retval.resize (7);
279 
280  if (sz == 1)
281  {
282  string_vector named_tokens = rx_lst.begin ()->named_tokens ();
283 
284  for (int j = 0; j < named_pats.length (); j++)
285  nmap.assign (named_pats(j), named_tokens(j));
286 
287  retval(5) = nmap;
288  }
289  else
290  {
291  for (int j = 0; j < named_pats.length (); j++)
292  {
293  Cell tmp (dim_vector (1, sz));
294 
295  i = 0;
296  for (regexp::match_data::const_iterator p = rx_lst.begin ();
297  p != rx_lst.end (); p++)
298  {
299  string_vector named_tokens = p->named_tokens ();
300 
301  tmp(i++) = named_tokens(j);
302  }
303 
304  nmap.assign (named_pats(j), octave_value (tmp));
305  }
306 
307  retval(5) = nmap;
308  }
309 
310  if (options.once ())
311  {
313 
314  retval(4) = sz ? p->tokens () : Cell ();
315  retval(3) = sz ? p->match_string () : std::string ();
316  retval(2) = sz ? p->token_extents () : Matrix ();
317 
318  if (sz)
319  {
320  double start = p->start ();
321  double end = p->end ();
322 
323  Cell split (dim_vector (1, 2));
324  split(0) = buffer.substr (0, start-1);
325  split(1) = buffer.substr (end);
326 
327  retval(6) = split;
328  retval(1) = end;
329  retval(0) = start;
330  }
331  else
332  {
333  retval(6) = buffer;
334  retval(1) = Matrix ();
335  retval(0) = Matrix ();
336  }
337  }
338  else
339  {
340  Cell tokens (dim_vector (1, sz));
341  Cell match_string (dim_vector (1, sz));
342  Cell token_extents (dim_vector (1, sz));
343  NDArray end (dim_vector (1, sz));
344  NDArray start (dim_vector (1, sz));
345  Cell split (dim_vector (1, sz+1));
346  size_t sp_start = 0;
347 
348  i = 0;
349  for (regexp::match_data::const_iterator p = rx_lst.begin ();
350  p != rx_lst.end (); p++)
351  {
352  double s = p->start ();
353  double e = p->end ();
354 
355  string_vector tmp = p->tokens ();
356  tokens(i) = Cell (dim_vector (1, tmp.length ()), tmp);
357  match_string(i) = p->match_string ();
358  token_extents(i) = p->token_extents ();
359  end(i) = e;
360  start(i) = s;
361  split(i) = buffer.substr (sp_start, s-sp_start-1);
362  sp_start = e;
363  i++;
364  }
365 
366  split(i) = buffer.substr (sp_start);
367 
368  retval(6) = split;
369  retval(4) = tokens;
370  retval(3) = match_string;
371  retval(2) = token_extents;
372  retval(1) = end;
373  retval(0) = start;
374  }
375 
376  // Alter the order of the output arguments
377 
378  if (extra_options)
379  {
380  int n = 0;
381  octave_value_list new_retval;
382  new_retval.resize (nargout);
383 
384  OCTAVE_LOCAL_BUFFER (int, arg_used, 6);
385  for (int j = 0; j < 6; j++)
386  arg_used[j] = false;
387 
388  for (int j = 2; j < nargin; j++)
389  {
390  int k = 0;
391  std::string str = args(j).string_value ();
392  std::transform (str.begin (), str.end (), str.begin (), tolower);
393 
394  if (str.find ("once", 0) == 0
395  || str.find ("stringanchors", 0) == 0
396  || str.find ("lineanchors", 0) == 0
397  || str.find ("matchcase", 0) == 0
398  || str.find ("ignorecase", 0) == 0
399  || str.find ("dotall", 0) == 0
400  || str.find ("dotexceptnewline", 0) == 0
401  || str.find ("literalspacing", 0) == 0
402  || str.find ("freespacing", 0) == 0
403  || str.find ("noemptymatch", 0) == 0
404  || str.find ("emptymatch", 0) == 0)
405  continue;
406  else if (str.find ("start", 0) == 0)
407  k = 0;
408  else if (str.find ("end", 0) == 0)
409  k = 1;
410  else if (str.find ("tokenextents", 0) == 0)
411  k = 2;
412  else if (str.find ("match", 0) == 0)
413  k = 3;
414  else if (str.find ("tokens", 0) == 0)
415  k = 4;
416  else if (str.find ("names", 0) == 0)
417  k = 5;
418  else if (str.find ("split", 0) == 0)
419  k = 6;
420 
421  new_retval(n++) = retval(k);
422  arg_used[k] = true;
423 
424  if (n == nargout)
425  break;
426  }
427 
428  // Fill in the rest of the arguments
429  if (n < nargout)
430  {
431  for (int j = 0; j < 6; j++)
432  {
433  if (! arg_used[j])
434  new_retval(n++) = retval(j);
435  }
436  }
437 
438  retval = new_retval;
439  }
440  }
441 
442  return retval;
443 }
444 
445 static octave_value_list
446 octcellregexp (const octave_value_list &args, int nargout,
447  const std::string &who, bool case_insensitive = false)
448 {
449  octave_value_list retval;
450 
451  if (args(0).is_cell ())
452  {
453  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
454  octave_value_list new_args = args;
455  Cell cellstr = args(0).cell_value ();
456  if (args(1).is_cell ())
457  {
458  Cell cellpat = args(1).cell_value ();
459 
460  if (cellpat.numel () == 1)
461  {
462  for (int j = 0; j < nargout; j++)
463  newretval[j].resize (cellstr.dims ());
464 
465  new_args(1) = cellpat(0);
466 
467  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
468  {
469  new_args(0) = cellstr(i);
470  octave_value_list tmp = octregexp (new_args, nargout, who,
471  case_insensitive);
472 
473  if (error_state)
474  break;
475 
476  for (int j = 0; j < nargout; j++)
477  newretval[j](i) = tmp(j);
478  }
479  }
480  else if (cellstr.numel () == 1)
481  {
482  for (int j = 0; j < nargout; j++)
483  newretval[j].resize (cellpat.dims ());
484 
485  new_args(0) = cellstr(0);
486 
487  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
488  {
489  new_args(1) = cellpat(i);
490  octave_value_list tmp = octregexp (new_args, nargout, who,
491  case_insensitive);
492 
493  if (error_state)
494  break;
495 
496  for (int j = 0; j < nargout; j++)
497  newretval[j](i) = tmp(j);
498  }
499  }
500  else if (cellstr.numel () == cellpat.numel ())
501  {
502 
503  if (cellstr.dims () != cellpat.dims ())
504  error ("%s: inconsistent cell array dimensions", who.c_str ());
505  else
506  {
507  for (int j = 0; j < nargout; j++)
508  newretval[j].resize (cellstr.dims ());
509 
510  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
511  {
512  new_args(0) = cellstr(i);
513  new_args(1) = cellpat(i);
514 
515  octave_value_list tmp = octregexp (new_args, nargout, who,
516  case_insensitive);
517 
518  if (error_state)
519  break;
520 
521  for (int j = 0; j < nargout; j++)
522  newretval[j](i) = tmp(j);
523  }
524  }
525  }
526  else
527  error ("regexp: cell array arguments must be scalar or equal size");
528  }
529  else
530  {
531  for (int j = 0; j < nargout; j++)
532  newretval[j].resize (cellstr.dims ());
533 
534  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
535  {
536  new_args(0) = cellstr(i);
537  octave_value_list tmp = octregexp (new_args, nargout, who,
538  case_insensitive);
539 
540  if (error_state)
541  break;
542 
543  for (int j = 0; j < nargout; j++)
544  newretval[j](i) = tmp(j);
545  }
546  }
547 
548  if (!error_state)
549  for (int j = 0; j < nargout; j++)
550  retval(j) = octave_value (newretval[j]);
551  }
552  else if (args(1).is_cell ())
553  {
554  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
555  octave_value_list new_args = args;
556  Cell cellpat = args(1).cell_value ();
557 
558  for (int j = 0; j < nargout; j++)
559  newretval[j].resize (cellpat.dims ());
560 
561  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
562  {
563  new_args(1) = cellpat(i);
564  octave_value_list tmp = octregexp (new_args, nargout, who,
565  case_insensitive);
566 
567  if (error_state)
568  break;
569 
570  for (int j = 0; j < nargout; j++)
571  newretval[j](i) = tmp(j);
572  }
573 
574  if (!error_state)
575  {
576  for (int j = 0; j < nargout; j++)
577  retval(j) = octave_value (newretval[j]);
578  }
579  }
580  else
581  retval = octregexp (args, nargout, who, case_insensitive);
582 
583  return retval;
584 
585 }
586 
587 DEFUN (regexp, args, nargout,
588  "-*- texinfo -*-\n\
589 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})\n\
590 @deftypefnx {Built-in Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
591 Regular expression string matching. Search for @var{pat} in @var{str} and\n\
592 return the positions and substrings of any matches, or empty values if there\n\
593 are none.\n\
594 \n\
595 The matched pattern @var{pat} can include any of the standard regex\n\
596 operators, including:\n\
597 \n\
598 @table @code\n\
599 @item .\n\
600 Match any character\n\
601 \n\
602 @item * + ? @{@}\n\
603 Repetition operators, representing\n\
604 \n\
605 @table @code\n\
606 @item *\n\
607 Match zero or more times\n\
608 \n\
609 @item +\n\
610 Match one or more times\n\
611 \n\
612 @item ?\n\
613 Match zero or one times\n\
614 \n\
615 @item @{@var{n}@}\n\
616 Match exactly @var{n} times\n\
617 \n\
618 @item @{@var{n},@}\n\
619 Match @var{n} or more times\n\
620 \n\
621 @item @{@var{m},@var{n}@}\n\
622 Match between @var{m} and @var{n} times\n\
623 @end table\n\
624 \n\
625 @item [@dots{}] [^@dots{}]\n\
626 \n\
627 List operators. The pattern will match any character listed between \"[\"\n\
628 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\
629 any character except those listed between brackets will match.\n\
630 \n\
631 Escape sequences defined below can also be used inside list\n\
632 operators. For example, a template for a floating point number might be\n\
633 @code{[-+.\\d]+}.\n\
634 \n\
635 @item () (?:)\n\
636 Grouping operator. The first form, parentheses only, also creates a token.\n\
637 \n\
638 @item |\n\
639 Alternation operator. Match one of a choice of regular expressions. The\n\
640 alternatives must be delimited by the grouping operator @code{()} above.\n\
641 \n\
642 @item ^ $\n\
643 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\
644 end (@code{$}) of the string.\n\
645 @end table\n\
646 \n\
647 In addition, the following escaped characters have special meaning.\n\
648 \n\
649 @table @code\n\
650 \n\
651 @item \\d\n\
652 Match any digit\n\
653 \n\
654 @item \\D\n\
655 Match any non-digit\n\
656 \n\
657 @item \\s\n\
658 Match any whitespace character\n\
659 \n\
660 @item \\S\n\
661 Match any non-whitespace character\n\
662 \n\
663 @item \\w\n\
664 Match any word character\n\
665 \n\
666 @item \\W\n\
667 Match any non-word character\n\
668 \n\
669 @item \\<\n\
670 Match the beginning of a word\n\
671 \n\
672 @item \\>\n\
673 Match the end of a word\n\
674 \n\
675 @item \\B\n\
676 Match within a word\n\
677 @end table\n\
678 \n\
679 Implementation Note: For compatibility with @sc{matlab}, ordinary escape\n\
680 sequences (e.g., @qcode{\"\\n\"} => newline) are processed in @var{pat}\n\
681 regardless of whether @var{pat} has been defined within single quotes. Use\n\
682 a second backslash to stop interpolation of the escape sequence (e.g.,\n\
683 \"\\\\n\") or use the @code{regexptranslate} function.\n\
684 \n\
685 The outputs of @code{regexp} default to the order given below\n\
686 \n\
687 @table @var\n\
688 @item s\n\
689 The start indices of each matching substring\n\
690 \n\
691 @item e\n\
692 The end indices of each matching substring\n\
693 \n\
694 @item te\n\
695 The extents of each matched token surrounded by @code{(@dots{})} in\n\
696 @var{pat}\n\
697 \n\
698 @item m\n\
699 A cell array of the text of each match\n\
700 \n\
701 @item t\n\
702 A cell array of the text of each token matched\n\
703 \n\
704 @item nm\n\
705 A structure containing the text of each matched named token, with the name\n\
706 being used as the fieldname. A named token is denoted by\n\
707 @code{(?<name>@dots{})}.\n\
708 \n\
709 @item sp\n\
710 A cell array of the text not returned by match, i.e., what remains if you\n\
711 split the string based on @var{pat}.\n\
712 @end table\n\
713 \n\
714 Particular output arguments, or the order of the output arguments, can be\n\
715 selected by additional @var{opt} arguments. These are strings and the\n\
716 correspondence between the output arguments and the optional argument\n\
717 are\n\
718 \n\
719 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
720 @item @tab @qcode{'start'} @tab @var{s} @tab\n\
721 @item @tab @qcode{'end'} @tab @var{e} @tab\n\
722 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab\n\
723 @item @tab @qcode{'match'} @tab @var{m} @tab\n\
724 @item @tab @qcode{'tokens'} @tab @var{t} @tab\n\
725 @item @tab @qcode{'names'} @tab @var{nm} @tab\n\
726 @item @tab @qcode{'split'} @tab @var{sp} @tab\n\
727 @end multitable\n\
728 \n\
729 Additional arguments are summarized below.\n\
730 \n\
731 @table @samp\n\
732 @item once\n\
733 Return only the first occurrence of the pattern.\n\
734 \n\
735 @item matchcase\n\
736 Make the matching case sensitive. (default)\n\
737 \n\
738 Alternatively, use (?-i) in the pattern.\n\
739 \n\
740 @item ignorecase\n\
741 Ignore case when matching the pattern to the string.\n\
742 \n\
743 Alternatively, use (?i) in the pattern.\n\
744 \n\
745 @item stringanchors\n\
746 Match the anchor characters at the beginning and end of the string.\n\
747 (default)\n\
748 \n\
749 Alternatively, use (?-m) in the pattern.\n\
750 \n\
751 @item lineanchors\n\
752 Match the anchor characters at the beginning and end of the line.\n\
753 \n\
754 Alternatively, use (?m) in the pattern.\n\
755 \n\
756 @item dotall\n\
757 The pattern @code{.} matches all characters including the newline character.\n\
758  (default)\n\
759 \n\
760 Alternatively, use (?s) in the pattern.\n\
761 \n\
762 @item dotexceptnewline\n\
763 The pattern @code{.} matches all characters except the newline character.\n\
764 \n\
765 Alternatively, use (?-s) in the pattern.\n\
766 \n\
767 @item literalspacing\n\
768 All characters in the pattern, including whitespace, are significant and are\n\
769 used in pattern matching. (default)\n\
770 \n\
771 Alternatively, use (?-x) in the pattern.\n\
772 \n\
773 @item freespacing\n\
774 The pattern may include arbitrary whitespace and also comments beginning with\n\
775 the character @samp{#}.\n\
776 \n\
777 Alternatively, use (?x) in the pattern.\n\
778 \n\
779 @item noemptymatch\n\
780 Zero-length matches are not returned. (default)\n\
781 \n\
782 @item emptymatch\n\
783 Return zero-length matches.\n\
784 \n\
785 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there\n\
786 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.\n\
787 \n\
788 @end table\n\
789 @seealso{regexpi, strfind, regexprep}\n\
790 @end deftypefn")
791 {
792  octave_value_list retval;
793 
794  int nargin = args.length ();
795 
796  if (nargin < 2)
797  print_usage ();
798  else if (args(0).is_cell () || args(1).is_cell ())
799  retval = octcellregexp (args, (nargout > 0 ? nargout : 1), "regexp");
800  else
801  retval = octregexp (args, nargout, "regexp");
802 
803  return retval;
804 }
805 
806 /*
807 ## PCRE_ERROR_MATCHLIMIT test
808 %!test
809 %! s = sprintf ('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
810 %! ws = warning ("query");
811 %! unwind_protect
812 %! warning ("off");
813 %! regexp (s, '(\s*-*\d+[.]*\d*\s*)+\n');
814 %! unwind_protect_cleanup
815 %! warning (ws);
816 %! end_unwind_protect
817 
818 ## segfault test
819 %!assert (regexp ("abcde", "."), [1,2,3,4,5])
820 ## Infinite loop test
821 %!assert (isempty (regexp ("abcde", "")))
822 
823 ## Check that anchoring of pattern works correctly
824 %!assert (regexp ('abcabc', '^abc'), 1)
825 %!assert (regexp ('abcabc', 'abc$'), 4)
826 %!assert (regexp ('abcabc', '^abc$'), zeros (1,0))
827 
828 %!test
829 %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck');
830 %! assert (s, zeros (1,0));
831 %! assert (e, zeros (1,0));
832 %! assert (te, cell (1,0));
833 %! assert (m, cell (1,0));
834 %! assert (t, cell (1,0));
835 
836 %!test
837 %! [s, e, te, m, t] = regexp (' FiRetrUck ', 'f(.*)uck');
838 %! assert (s, zeros (1,0));
839 %! assert (e, zeros (1,0));
840 %! assert (te, cell (1,0));
841 %! assert (m, cell (1,0));
842 %! assert (t, cell (1,0));
843 
844 %!test
845 %! [s, e, te, m, t] = regexp (' firetruck ', 'f(.*)uck');
846 %! assert (s, 2);
847 %! assert (e, 10);
848 %! assert (te{1}, [3, 7]);
849 %! assert (m{1}, 'firetruck');
850 %! assert (t{1}{1}, 'iretr');
851 
852 %!test
853 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*');
854 %! assert (s, [1, 12]);
855 %! assert (e, [5, 17]);
856 %! assert (size (te), [1, 2]);
857 %! assert (isempty (te{1}));
858 %! assert (isempty (te{2}));
859 %! assert (m{1}, 'short');
860 %! assert (m{2}, 'string');
861 %! assert (size (t), [1, 2]);
862 %! assert (isempty (t{1}));
863 %! assert (isempty (t{2}));
864 
865 %!test
866 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*', 'once');
867 %! assert (s, 1);
868 %! assert (e, 5);
869 %! assert (isempty (te));
870 %! assert (m, 'short');
871 %! assert (isempty (t));
872 
873 %!test
874 %! [m, te, e, s, t] = regexp ('short test string', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
875 %! assert (s, 1);
876 %! assert (e, 5);
877 %! assert (isempty (te));
878 %! assert (m, 'short');
879 %! assert (isempty (t));
880 
881 %!test
882 %! [s, e, te, m, t, nm] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)');
883 %! assert (s, 1);
884 %! assert (e, 10);
885 %! assert (size (te), [1, 1]);
886 %! assert (te{1}, [1,5; 7,10]);
887 %! assert (m{1}, 'short test');
888 %! assert (size (t), [1, 1]);
889 %! assert (t{1}{1}, 'short');
890 %! assert (t{1}{2}, 'test');
891 %! assert (size (nm), [1, 1]);
892 %! assert (! isempty (fieldnames (nm)));
893 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
894 %! assert (nm.word1, 'short');
895 %! assert (nm.word2, 'test');
896 
897 %!test
898 %! [nm, m, te, e, s, t] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
899 %! assert (s, 1);
900 %! assert (e, 10);
901 %! assert (size (te), [1, 1]);
902 %! assert (te{1}, [1,5; 7,10]);
903 %! assert (m{1}, 'short test');
904 %! assert (size (t), [1, 1]);
905 %! assert (t{1}{1}, 'short');
906 %! assert (t{1}{2}, 'test');
907 %! assert (size (nm), [1, 1]);
908 %! assert (!isempty (fieldnames (nm)));
909 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
910 %! assert (nm.word1, 'short');
911 %! assert (nm.word2, 'test');
912 
913 %!test
914 %! [t, nm] = regexp ("John Davis\nRogers, James", '(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)', 'tokens', 'names');
915 %! assert (size (t), [1, 2]);
916 %! assert (t{1}{1}, 'John');
917 %! assert (t{1}{2}, 'Davis');
918 %! assert (t{2}{1}, 'Rogers');
919 %! assert (t{2}{2}, 'James');
920 %! assert (size (nm), [1, 1]);
921 %! assert (nm.first{1}, 'John');
922 %! assert (nm.first{2}, 'James');
923 %! assert (nm.last{1}, 'Davis');
924 %! assert (nm.last{2}, 'Rogers');
925 
926 ## Tests for named tokens
927 %!test
928 %! ## Parenthesis in named token (ie (int)) causes a problem
929 %! assert (regexp ('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
930 
931 %!test
932 %! ## Mix of named and unnamed tokens can cause segfault (bug #35683)
933 %! str = "abcde";
934 %! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
935 %! tokens = regexp (str, ptn, "names");
936 %! assert (isstruct (tokens) && numel (tokens) == 1);
937 %! assert (tokens.T1, "a");
938 %! assert (tokens.T2, "de");
939 
940 %!assert (regexp ("abc\nabc", '.'), [1:7])
941 %!assert (regexp ("abc\nabc", '.', 'dotall'), [1:7])
942 %!test
943 %! assert (regexp ("abc\nabc", '(?s).'), [1:7]);
944 %! assert (regexp ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
945 %! assert (regexp ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
946 
947 %!assert (regexp ("caseCaSe", 'case'), 1)
948 %!assert (regexp ("caseCaSe", 'case', "matchcase"), 1)
949 %!assert (regexp ("caseCaSe", 'case', "ignorecase"), [1,5])
950 %!test
951 %! assert (regexp ("caseCaSe", '(?-i)case'), 1);
952 %! assert (regexp ("caseCaSe", '(?i)case'), [1, 5]);
953 
954 %!assert (regexp ("abc\nabc", 'c$'), 7)
955 %!assert (regexp ("abc\nabc", 'c$', "stringanchors"), 7)
956 %!test
957 %! assert (regexp ("abc\nabc", '(?-m)c$'), 7);
958 %! assert (regexp ("abc\nabc", 'c$',"lineanchors"), [3, 7]);
959 %! assert (regexp ("abc\nabc", '(?m)c$'), [3,7]);
960 
961 %!assert (regexp ("this word", 's w'), 4)
962 %!assert (regexp ("this word", 's w', 'literalspacing'), 4)
963 %!test
964 %! assert (regexp ("this word", '(?-x)s w', 'literalspacing'), 4);
965 %! assert (regexp ("this word", 's w', 'freespacing'), zeros (1,0));
966 %! assert (regexp ("this word", '(?x)s w'), zeros (1,0));
967 
968 %!test
969 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'noemptymatch');
970 %! assert (s, [1 5]);
971 %! assert (e, [3 5]);
972 %! assert (te, { zeros(0,2), zeros(0,2) });
973 %! assert (m, { "OCT", "V" });
974 %! assert (t, { cell(1,0), cell(1,0) });
975 %! assert (isempty (fieldnames (nm)));
976 %! assert (sp, { "", "A", "E" });
977 
978 %!test
979 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'noemptymatch');
980 %! assert (s, [1 5]);
981 %! assert (e, [3 5]);
982 %! assert (te, { [1 3], [5 5] });
983 %! assert (m, { "OCT", "V" });
984 %! assert (t, { {"OCT"}, {"V"} });
985 %! assert (isempty (fieldnames (nm)));
986 %! assert (sp, { "", "A", "E" });
987 
988 %!test
989 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'emptymatch');
990 %! assert (s, [1 4 5 6 7]);
991 %! assert (e, [3 3 5 5 6]);
992 %! assert (te, repmat ({zeros(0,2)}, [1, 5]));
993 %! assert (m, { "OCT", "", "V", "", "" });
994 %! assert (t, repmat({cell(1,0)}, [1, 5]));
995 %! assert (isempty (fieldnames (nm)));
996 %! assert (sp, { "", "", "A", "", "E", "" });
997 
998 %!test
999 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'emptymatch');
1000 %! assert (s, [1 4 5 6 7]);
1001 %! assert (e, [3 3 5 5 6]);
1002 %! assert (te, { [1 3], [4 3], [5 5], [6 5], [7 6] });
1003 %! assert (m, { "OCT", "", "V", "", "" });
1004 %! assert (t, { {"OCT"}, {""}, {"V"}, {""}, {""} });
1005 %! assert (isempty (fieldnames (nm)));
1006 %! assert (sp, { "", "", "A", "", "E", "" });
1007 
1008 %!error regexp ('string', 'tri', 'BadArg')
1009 %!error regexp ('string')
1010 
1011 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1,0)})
1012 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1013 %!assert (regexp ('Strings', {'t','s'}), {2, 7})
1014 
1015 ## Test case for lookaround operators
1016 %!test
1017 %! assert (regexp ('Iraq', 'q(?!u)'), 4);
1018 %! assert (regexp ('quit', 'q(?!u)'), zeros (1, 0));
1019 %! assert (regexp ('quit', 'q(?=u)' , 'match'), {'q'});
1020 %! assert (regexp ("quit", 'q(?=u+)', 'match'), {'q'});
1021 %! assert (regexp ("qit", 'q(?=u+)', 'match'), cell (1, 0));
1022 %! assert (regexp ("qit", 'q(?=u*)', 'match'), {'q'});
1023 %! assert (regexp ('thingamabob', '(?<=a)b'), 9);
1024 
1025 ## Tests for split option.
1026 %!shared str
1027 %! str = "foo bar foo";
1028 %!test
1029 %! [a, b] = regexp (str, "f..", "match", "split");
1030 %! assert (a, {"foo", "foo"});
1031 %! assert (b, {"", " bar ", ""});
1032 %!test
1033 %! [a, b] = regexp (str, "f..", "match", "split", "once");
1034 %! assert (a, "foo");
1035 %! assert (b, {"", " bar foo"});
1036 %!test
1037 %! [a, b] = regexp (str, "fx.", "match", "split");
1038 %! assert (a, cell (1, 0));
1039 %! assert (b, {"foo bar foo"});
1040 %!test
1041 %! [a, b] = regexp (str, "fx.", "match", "split", "once");
1042 %! assert (a, "");;
1043 %! assert (b, "foo bar foo");
1044 
1045 %!shared str
1046 %! str = "foo bar";
1047 %!test
1048 %! [a, b] = regexp (str, "f..", "match", "split");
1049 %! assert (a, {"foo"});
1050 %! assert (b, {"", " bar"});
1051 %!test
1052 %! [a, b] = regexp (str, "b..", "match", "split");
1053 %! assert (a, {"bar"});
1054 %! assert (b, {"foo ", ""});
1055 %!test
1056 %! [a, b] = regexp (str, "x", "match", "split");
1057 %! assert (a, cell (1, 0));
1058 %! assert (b, {"foo bar"});
1059 %!test
1060 %! [a, b] = regexp (str, "[o]+", "match", "split");
1061 %! assert (a, {"oo"});
1062 %! assert (b, {"f", " bar"});
1063 
1064 %!assert (regexp ("\n", '\n'), 1);
1065 %!assert (regexp ("\n", "\n"), 1);
1066 */
1067 
1068 DEFUN (regexpi, args, nargout,
1069  "-*- texinfo -*-\n\
1070 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})\n\
1071 @deftypefnx {Built-in Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
1072 \n\
1073 Case insensitive regular expression string matching. Search for @var{pat} in\n\
1074 @var{str} and return the positions and substrings of any matches, or empty\n\
1075 values if there are none. @xref{XREFregexp,,regexp}, for details on the\n\
1076 syntax of the search pattern.\n\
1077 @seealso{regexp}\n\
1078 @end deftypefn")
1079 {
1080  octave_value_list retval;
1081 
1082  int nargin = args.length ();
1083 
1084  if (nargin < 2)
1085  print_usage ();
1086  else if (args(0).is_cell () || args(1).is_cell ())
1087  retval = octcellregexp (args, (nargout > 0 ? nargout : 1), "regexpi", true);
1088  else
1089  retval = octregexp (args, nargout, "regexpi", true);
1090 
1091  return retval;
1092 }
1093 
1094 /*
1095 ## segfault test
1096 %!assert (regexpi ("abcde", "."), [1,2,3,4,5])
1097 
1098 ## Check that anchoring of pattern works correctly
1099 %!assert (regexpi ('abcabc', '^ABC'), 1)
1100 %!assert (regexpi ('abcabc', 'ABC$'), 4)
1101 %!assert (regexpi ('abcabc', '^ABC$'), zeros (1,0))
1102 
1103 %!test
1104 %! [s, e, te, m, t] = regexpi (' No Match ', 'f(.*)uck');
1105 %! assert (s, zeros (1,0));
1106 %! assert (e, zeros (1,0));
1107 %! assert (te, cell (1,0));
1108 %! assert (m, cell (1,0));
1109 %! assert (t, cell (1,0));
1110 
1111 %!test
1112 %! [s, e, te, m, t] = regexpi (' FiRetrUck ', 'f(.*)uck');
1113 %! assert (s, 2);
1114 %! assert (e, 10);
1115 %! assert (te{1}, [3, 7]);
1116 %! assert (m{1}, 'FiRetrUck');
1117 %! assert (t{1}{1}, 'iRetr');
1118 
1119 %!test
1120 %! [s, e, te, m, t] = regexpi (' firetruck ', 'f(.*)uck');
1121 %! assert (s, 2);
1122 %! assert (e, 10);
1123 %! assert (te{1}, [3, 7]);
1124 %! assert (m{1}, 'firetruck');
1125 %! assert (t{1}{1}, 'iretr');
1126 
1127 %!test
1128 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*');
1129 %! assert (s, [1, 12]);
1130 %! assert (e, [5, 17]);
1131 %! assert (size (te), [1, 2]);
1132 %! assert (isempty (te{1}));
1133 %! assert (isempty (te{2}));
1134 %! assert (m{1}, 'ShoRt');
1135 %! assert (m{2}, 'String');
1136 %! assert (size (t), [1, 2]);
1137 %! assert (isempty (t{1}));
1138 %! assert (isempty (t{2}));
1139 
1140 %!test
1141 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once');
1142 %! assert (s, 1);
1143 %! assert (e, 5);
1144 %! assert (isempty (te));
1145 %! assert (m, 'ShoRt');
1146 %! assert (isempty (t));
1147 
1148 %!test
1149 %! [m, te, e, s, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1150 %! assert (s, 1);
1151 %! assert (e, 5);
1152 %! assert (isempty (te));
1153 %! assert (m, 'ShoRt');
1154 %! assert (isempty (t));
1155 
1156 %!test
1157 %! [s, e, te, m, t, nm] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)');
1158 %! assert (s, 1);
1159 %! assert (e, 10);
1160 %! assert (size (te), [1, 1]);
1161 %! assert (te{1}, [1,5; 7,10]);
1162 %! assert (m{1}, 'ShoRt Test');
1163 %! assert (size (t), [1, 1]);
1164 %! assert (t{1}{1}, 'ShoRt');
1165 %! assert (t{1}{2}, 'Test');
1166 %! assert (size (nm), [1, 1]);
1167 %! assert (! isempty (fieldnames (nm)));
1168 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1169 %! assert (nm.word1, 'ShoRt');
1170 %! assert (nm.word2, 'Test');
1171 
1172 %!test
1173 %! [nm, m, te, e, s, t] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1174 %! assert (s, 1);
1175 %! assert (e, 10);
1176 %! assert (size (te), [1, 1]);
1177 %! assert (te{1}, [1,5; 7,10]);
1178 %! assert (m{1}, 'ShoRt Test');
1179 %! assert (size (t), [1, 1]);
1180 %! assert (t{1}{1}, 'ShoRt');
1181 %! assert (t{1}{2}, 'Test');
1182 %! assert (size (nm), [1, 1]);
1183 %! assert (!isempty (fieldnames (nm)));
1184 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1185 %! assert (nm.word1, 'ShoRt');
1186 %! assert (nm.word2, 'Test');
1187 
1188 %!assert (regexpi ("abc\nabc", '.'), [1:7])
1189 %!assert (regexpi ("abc\nabc", '.', 'dotall'), [1:7])
1190 %!test
1191 %! assert (regexpi ("abc\nabc", '(?s).'), [1:7]);
1192 %! assert (regexpi ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1193 %! assert (regexpi ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1194 
1195 %!assert (regexpi ("caseCaSe", 'case'), [1, 5])
1196 %!assert (regexpi ("caseCaSe", 'case', "matchcase"), 1)
1197 %!assert (regexpi ("caseCaSe", 'case', "ignorecase"), [1, 5])
1198 %!test
1199 %! assert (regexpi ("caseCaSe", '(?-i)case'), 1);
1200 %! assert (regexpi ("caseCaSe", '(?i)case'), [1, 5]);
1201 
1202 %!assert (regexpi ("abc\nabc", 'C$'), 7)
1203 %!assert (regexpi ("abc\nabc", 'C$', "stringanchors"), 7)
1204 %!test
1205 %! assert (regexpi ("abc\nabc", '(?-m)C$'), 7);
1206 %! assert (regexpi ("abc\nabc", 'C$', "lineanchors"), [3, 7]);
1207 %! assert (regexpi ("abc\nabc", '(?m)C$'), [3, 7]);
1208 
1209 %!assert (regexpi ("this word", 'S w'), 4)
1210 %!assert (regexpi ("this word", 'S w', 'literalspacing'), 4)
1211 %!test
1212 %! assert (regexpi ("this word", '(?-x)S w', 'literalspacing'), 4);
1213 %! assert (regexpi ("this word", 'S w', 'freespacing'), zeros (1,0));
1214 %! assert (regexpi ("this word", '(?x)S w'), zeros (1,0));
1215 
1216 %!error regexpi ('string', 'tri', 'BadArg')
1217 %!error regexpi ('string')
1218 
1219 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1, 0)})
1220 %!assert (regexpi ({'asdfg-dfd', '-dfd-dfd-', 'qasfdfdaq'}, '-'), {6, [1,5,9], zeros(1,0)})
1221 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1222 %!assert (regexpi ('Strings', {'t', 's'}), {2, [1, 7]})
1223 
1224 %!assert (regexpi ("\n", '\n'), 1);
1225 %!assert (regexpi ("\n", "\n"), 1);
1226 */
1227 
1228 static octave_value
1229 octregexprep (const octave_value_list &args, const std::string &who)
1230 {
1231  octave_value retval;
1232 
1233  int nargin = args.length ();
1234 
1235  // Make sure we have string, pattern, replacement
1236  const std::string buffer = args(0).string_value ();
1237  if (error_state)
1238  return retval;
1239 
1240  std::string pattern = args(1).string_value ();
1241  if (error_state)
1242  return retval;
1243  // Matlab compatibility.
1244  if (args(1).is_sq_string ())
1245  pattern = do_regexp_ptn_string_escapes (pattern);
1246 
1247  std::string replacement = args(2).string_value ();
1248  if (error_state)
1249  return retval;
1250  // Matlab compatibility.
1251  if (args(2).is_sq_string ())
1252  replacement = do_regexp_rep_string_escapes (replacement);
1253 
1254  // Pack options excluding 'tokenize' and various output
1255  // reordering strings into regexp arg list
1256  octave_value_list regexpargs (nargin-3, octave_value ());
1257 
1258  int len = 0;
1259  for (int i = 3; i < nargin; i++)
1260  {
1261  const std::string opt = args(i).string_value ();
1262  if (opt != "tokenize" && opt != "start" && opt != "end"
1263  && opt != "tokenextents" && opt != "match" && opt != "tokens"
1264  && opt != "names" && opt != "split" && opt != "warnings")
1265  {
1266  regexpargs(len++) = args(i);
1267  }
1268  }
1269  regexpargs.resize (len);
1270 
1271  regexp::opts options;
1272  bool extra_args = false;
1273  parse_options (options, regexpargs, who, 0, extra_args);
1274  if (error_state)
1275  return retval;
1276 
1277  return regexp_replace (pattern, buffer, replacement, options, who);
1278 }
1279 
1280 DEFUN (regexprep, args, ,
1281  "-*- texinfo -*-\n\
1282 @deftypefn {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
1283 @deftypefnx {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
1284 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
1285 \n\
1286 The pattern is a regular expression as documented for @code{regexp}.\n\
1287 @xref{XREFregexp,,regexp}.\n\
1288 \n\
1289 The replacement string may contain @code{$i}, which substitutes\n\
1290 for the ith set of parentheses in the match string. For example,\n\
1291 \n\
1292 @example\n\
1293 regexprep (\"Bill Dunn\", '(\\w+) (\\w+)', '$2, $1')\n\
1294 @end example\n\
1295 \n\
1296 @noindent\n\
1297 returns \"Dunn, Bill\"\n\
1298 \n\
1299 Options in addition to those of @code{regexp} are\n\
1300 \n\
1301 @table @samp\n\
1302 \n\
1303 @item once\n\
1304 Replace only the first occurrence of @var{pat} in the result.\n\
1305 \n\
1306 @item warnings\n\
1307 This option is present for compatibility but is ignored.\n\
1308 \n\
1309 @end table\n\
1310 \n\
1311 Implementation Note: For compatibility with @sc{matlab}, ordinary escape\n\
1312 sequences (e.g., @qcode{\"\\n\"} => newline) are processed in both @var{pat}\n\
1313 and @var{repstr} regardless of whether they were defined within single\n\
1314 quotes. Use a second backslash to stop interpolation of the escape sequence\n\
1315 (e.g., \"\\\\n\") or use the @code{regexptranslate} function.\n\
1316 @seealso{regexp, regexpi, strrep}\n\
1317 @end deftypefn")
1318 {
1319  octave_value_list retval;
1320  int nargin = args.length ();
1321 
1322  if (nargin < 3)
1323  {
1324  print_usage ();
1325  return retval;
1326  }
1327 
1328  if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
1329  {
1330  Cell str;
1331  Cell pat;
1332  Cell rep;
1333  dim_vector dv0;
1334  dim_vector dv1 (1, 1);
1335 
1336  if (args(0).is_cell ())
1337  str = args(0).cell_value ();
1338  else
1339  str = Cell (args(0));
1340 
1341  if (args(1).is_cell ())
1342  pat = args(1).cell_value ();
1343  else
1344  pat = Cell (args(1));
1345 
1346  if (args(2).is_cell ())
1347  rep = args(2).cell_value ();
1348  else
1349  rep = Cell (args(2));
1350 
1351  dv0 = str.dims ();
1352  if (pat.numel () != 1)
1353  {
1354  dv1 = pat.dims ();
1355  if (rep.numel () != 1 && dv1 != rep.dims ())
1356  error ("regexprep: inconsistent cell array dimensions");
1357  }
1358  else if (rep.numel () != 1)
1359  dv1 = rep.dims ();
1360 
1361  if (!error_state)
1362  {
1363  Cell ret (dv0);
1364  octave_value_list new_args = args;
1365 
1366  for (octave_idx_type i = 0; i < dv0.numel (); i++)
1367  {
1368  new_args(0) = str(i);
1369  if (pat.numel () == 1)
1370  new_args(1) = pat(0);
1371  if (rep.numel () == 1)
1372  new_args(2) = rep(0);
1373 
1374  for (octave_idx_type j = 0; j < dv1.numel (); j++)
1375  {
1376  if (pat.numel () != 1)
1377  new_args(1) = pat(j);
1378  if (rep.numel () != 1)
1379  new_args(2) = rep(j);
1380  new_args(0) = octregexprep (new_args, "regexprep");
1381 
1382  if (error_state)
1383  break;
1384  }
1385 
1386  if (error_state)
1387  break;
1388 
1389  ret(i) = new_args(0);
1390  }
1391 
1392  if (!error_state)
1393  retval = args(0).is_cell () ? octave_value (ret)
1394  : octave_value (ret(0));
1395  }
1396  }
1397  else
1398  retval = octregexprep (args, "regexprep");
1399 
1400  return retval;
1401 }
1402 
1403 /*
1404 %!test # Replace with empty
1405 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1406 %! t = regexprep (xml, '<[!?][^>]*>', '');
1407 %! assert (t, ' <tag v="hello">some stuff</tag>');
1408 
1409 %!test # Replace with non-empty
1410 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1411 %! t = regexprep (xml, '<[!?][^>]*>', '?');
1412 %! assert (t, '? <tag v="hello">some stuff?</tag>');
1413 
1414 %!test # Check that 'tokenize' is ignored
1415 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1416 %! t = regexprep (xml, '<[!?][^>]*>', '', 'tokenize');
1417 %! assert (t, ' <tag v="hello">some stuff</tag>');
1418 
1419 ## Test capture replacement
1420 %!test
1421 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
1422 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
1423 %! t = regexprep (data, '(?m)^(\w+)\s+(\w+)$', '$2, $1');
1424 %! assert (t, result);
1425 
1426 ## Return the original if no match
1427 %!assert (regexprep ('hello', 'world', 'earth'), 'hello')
1428 
1429 ## Test emptymatch
1430 %!assert (regexprep ('World', '^', 'Hello '), 'World')
1431 %!assert (regexprep ('World', '^', 'Hello ', 'emptymatch'), 'Hello World')
1432 
1433 ## Test a general replacement
1434 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g")
1435 
1436 ## Make sure it works at the beginning and end
1437 %!assert (regexprep ("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g")
1438 %!assert (regexprep ("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_")
1439 
1440 ## Options
1441 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g")
1442 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g")
1443 
1444 ## Option combinations
1445 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g")
1446 
1447 ## End conditions on replacement
1448 %!assert (regexprep ("abc", "(b)", ".$1"), "a.bc");
1449 %!assert (regexprep ("abc", "(b)", "$1"), "abc");
1450 %!assert (regexprep ("abc", "(b)", "$1."), "ab.c");
1451 %!assert (regexprep ("abc", "(b)", "$1.."), "ab..c");
1452 
1453 ## Test cell array arguments
1454 %!assert (regexprep ("abc", {"b","a"}, "?"), "??c")
1455 %!assert (regexprep ({"abc","cba"}, "b", "?"), {"a?c","c?a"})
1456 %!assert (regexprep ({"abc","cba"}, {"b","a"}, {"?","!"}), {"!?c","c?!"})
1457 
1458 # Nasty lookbehind expression
1459 %!assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)', '_minus1'),'x^(-1)+y_minus1+z_minus1=0')
1460 
1461 %!assert (regexprep ("\n", '\n', "X"), "X");
1462 %!assert (regexprep ("\n", "\n", "X"), "X");
1463 */