59 size_t len = s.length ();
65 if (s[j] ==
'\\' && j+1 < len)
115 size_t len = s.length ();
121 if (s[j] ==
'\\' && j+1 < len)
184 const std::string& who,
int skip,
bool& extra_args)
186 int nargin = args.
length ();
190 for (
int i = skip; i < nargin; i++)
192 std::string str = args(i).string_value ();
196 error (
"%s: optional arguments must be character strings",
203 if (str.find (
"once", 0) == 0)
205 else if (str.find (
"matchcase", 0) == 0)
207 else if (str.find (
"ignorecase", 0) == 0)
209 else if (str.find (
"dotall", 0) == 0)
211 else if (str.find (
"stringanchors", 0) == 0)
213 else if (str.find (
"literalspacing", 0) == 0)
215 else if (str.find (
"noemptymatch", 0) == 0)
217 else if (str.find (
"dotexceptnewline", 0) == 0)
219 else if (str.find (
"lineanchors", 0) == 0)
221 else if (str.find (
"freespacing", 0) == 0)
223 else if (str.find (
"emptymatch", 0) == 0)
225 else if (str.find (
"start", 0) == 0
226 || str.find (
"end", 0) == 0
227 || str.find (
"tokenextents", 0) == 0
228 || str.find (
"match", 0) == 0
229 || str.find (
"tokens", 0) == 0
230 || str.find (
"names", 0) == 0
231 || str.find (
"split", 0) == 0)
234 error (
"%s: unrecognized option", who.c_str ());
240 const std::string &who,
bool case_insensitive =
false)
244 int nargin = args.
length ();
247 const std::string buffer = args(0).string_value ();
251 std::string pattern = args(1).string_value ();
255 if (args(1).is_sq_string ())
260 bool extra_options =
false;
269 size_t sz = rx_lst.
size ();
284 for (
int j = 0; j < named_pats.
length (); j++)
285 nmap.
assign (named_pats(j), named_tokens(j));
291 for (
int j = 0; j < named_pats.
length (); j++)
297 p != rx_lst.
end (); p++)
301 tmp(i++) = named_tokens(j);
314 retval(4) = sz ? p->tokens () :
Cell ();
315 retval(3) = sz ? p->match_string () : std::string ();
316 retval(2) = sz ? p->token_extents () :
Matrix ();
320 double start = p->start ();
321 double end = p->end ();
324 split(0) = buffer.substr (0, start-1);
325 split(1) = buffer.substr (end);
350 p != rx_lst.
end (); p++)
352 double s = p->start ();
353 double e = p->end ();
357 match_string(i) = p->match_string ();
358 token_extents(i) = p->token_extents ();
361 split(i) = buffer.substr (sp_start, s-sp_start-1);
366 split(i) = buffer.substr (sp_start);
370 retval(3) = match_string;
371 retval(2) = token_extents;
382 new_retval.
resize (nargout);
385 for (
int j = 0; j < 6; j++)
388 for (
int j = 2; j < nargin; j++)
391 std::string str = args(j).string_value ();
394 if (str.find (
"once", 0) == 0
395 || str.find (
"stringanchors", 0) == 0
396 || str.find (
"lineanchors", 0) == 0
397 || str.find (
"matchcase", 0) == 0
398 || str.find (
"ignorecase", 0) == 0
399 || str.find (
"dotall", 0) == 0
400 || str.find (
"dotexceptnewline", 0) == 0
401 || str.find (
"literalspacing", 0) == 0
402 || str.find (
"freespacing", 0) == 0
403 || str.find (
"noemptymatch", 0) == 0
404 || str.find (
"emptymatch", 0) == 0)
406 else if (str.find (
"start", 0) == 0)
408 else if (str.find (
"end", 0) == 0)
410 else if (str.find (
"tokenextents", 0) == 0)
412 else if (str.find (
"match", 0) == 0)
414 else if (str.find (
"tokens", 0) == 0)
416 else if (str.find (
"names", 0) == 0)
418 else if (str.find (
"split", 0) == 0)
421 new_retval(n++) = retval(k);
431 for (
int j = 0; j < 6; j++)
434 new_retval(n++) = retval(j);
447 const std::string &who,
bool case_insensitive =
false)
451 if (args(0).is_cell ())
456 if (args(1).is_cell ())
460 if (cellpat.
numel () == 1)
462 for (
int j = 0; j < nargout; j++)
463 newretval[j].resize (cellstr.
dims ());
465 new_args(1) = cellpat(0);
469 new_args(0) = cellstr(i);
476 for (
int j = 0; j < nargout; j++)
477 newretval[j](i) = tmp(j);
480 else if (cellstr.
numel () == 1)
482 for (
int j = 0; j < nargout; j++)
483 newretval[j].resize (cellpat.
dims ());
485 new_args(0) = cellstr(0);
489 new_args(1) = cellpat(i);
496 for (
int j = 0; j < nargout; j++)
497 newretval[j](i) = tmp(j);
500 else if (cellstr.
numel () == cellpat.
numel ())
503 if (cellstr.
dims () != cellpat.
dims ())
504 error (
"%s: inconsistent cell array dimensions", who.c_str ());
507 for (
int j = 0; j < nargout; j++)
508 newretval[j].resize (cellstr.
dims ());
512 new_args(0) = cellstr(i);
513 new_args(1) = cellpat(i);
521 for (
int j = 0; j < nargout; j++)
522 newretval[j](i) = tmp(j);
527 error (
"regexp: cell array arguments must be scalar or equal size");
531 for (
int j = 0; j < nargout; j++)
532 newretval[j].resize (cellstr.
dims ());
536 new_args(0) = cellstr(i);
543 for (
int j = 0; j < nargout; j++)
544 newretval[j](i) = tmp(j);
549 for (
int j = 0; j < nargout; j++)
552 else if (args(1).is_cell ())
558 for (
int j = 0; j < nargout; j++)
559 newretval[j].resize (cellpat.
dims ());
563 new_args(1) = cellpat(i);
570 for (
int j = 0; j < nargout; j++)
571 newretval[j](i) = tmp(j);
576 for (
int j = 0; j < nargout; j++)
581 retval =
octregexp (args, nargout, who, case_insensitive);
589 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})\n\
590 @deftypefnx {Built-in Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
591 Regular expression string matching. Search for @var{pat} in @var{str} and\n\
592 return the positions and substrings of any matches, or empty values if there\n\
595 The matched pattern @var{pat} can include any of the standard regex\n\
596 operators, including:\n\
600 Match any character\n\
603 Repetition operators, representing\n\
607 Match zero or more times\n\
610 Match one or more times\n\
613 Match zero or one times\n\
616 Match exactly @var{n} times\n\
618 @item @{@var{n},@}\n\
619 Match @var{n} or more times\n\
621 @item @{@var{m},@var{n}@}\n\
622 Match between @var{m} and @var{n} times\n\
625 @item [@dots{}] [^@dots{}]\n\
627 List operators. The pattern will match any character listed between \"[\"\n\
628 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\
629 any character except those listed between brackets will match.\n\
631 Escape sequences defined below can also be used inside list\n\
632 operators. For example, a template for a floating point number might be\n\
636 Grouping operator. The first form, parentheses only, also creates a token.\n\
639 Alternation operator. Match one of a choice of regular expressions. The\n\
640 alternatives must be delimited by the grouping operator @code{()} above.\n\
643 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\
644 end (@code{$}) of the string.\n\
647 In addition, the following escaped characters have special meaning.\n\
655 Match any non-digit\n\
658 Match any whitespace character\n\
661 Match any non-whitespace character\n\
664 Match any word character\n\
667 Match any non-word character\n\
670 Match the beginning of a word\n\
673 Match the end of a word\n\
676 Match within a word\n\
679 Implementation Note: For compatibility with @sc{matlab}, ordinary escape\n\
680 sequences (e.g., @qcode{\"\\n\"} => newline) are processed in @var{pat}\n\
681 regardless of whether @var{pat} has been defined within single quotes. Use\n\
682 a second backslash to stop interpolation of the escape sequence (e.g.,\n\
683 \"\\\\n\") or use the @code{regexptranslate} function.\n\
685 The outputs of @code{regexp} default to the order given below\n\
689 The start indices of each matching substring\n\
692 The end indices of each matching substring\n\
695 The extents of each matched token surrounded by @code{(@dots{})} in\n\
699 A cell array of the text of each match\n\
702 A cell array of the text of each token matched\n\
705 A structure containing the text of each matched named token, with the name\n\
706 being used as the fieldname. A named token is denoted by\n\
707 @code{(?<name>@dots{})}.\n\
710 A cell array of the text not returned by match, i.e., what remains if you\n\
711 split the string based on @var{pat}.\n\
714 Particular output arguments, or the order of the output arguments, can be\n\
715 selected by additional @var{opt} arguments. These are strings and the\n\
716 correspondence between the output arguments and the optional argument\n\
719 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
720 @item @tab @qcode{'start'} @tab @var{s} @tab\n\
721 @item @tab @qcode{'end'} @tab @var{e} @tab\n\
722 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab\n\
723 @item @tab @qcode{'match'} @tab @var{m} @tab\n\
724 @item @tab @qcode{'tokens'} @tab @var{t} @tab\n\
725 @item @tab @qcode{'names'} @tab @var{nm} @tab\n\
726 @item @tab @qcode{'split'} @tab @var{sp} @tab\n\
729 Additional arguments are summarized below.\n\
733 Return only the first occurrence of the pattern.\n\
736 Make the matching case sensitive. (default)\n\
738 Alternatively, use (?-i) in the pattern.\n\
741 Ignore case when matching the pattern to the string.\n\
743 Alternatively, use (?i) in the pattern.\n\
745 @item stringanchors\n\
746 Match the anchor characters at the beginning and end of the string.\n\
749 Alternatively, use (?-m) in the pattern.\n\
752 Match the anchor characters at the beginning and end of the line.\n\
754 Alternatively, use (?m) in the pattern.\n\
757 The pattern @code{.} matches all characters including the newline character.\n\
760 Alternatively, use (?s) in the pattern.\n\
762 @item dotexceptnewline\n\
763 The pattern @code{.} matches all characters except the newline character.\n\
765 Alternatively, use (?-s) in the pattern.\n\
767 @item literalspacing\n\
768 All characters in the pattern, including whitespace, are significant and are\n\
769 used in pattern matching. (default)\n\
771 Alternatively, use (?-x) in the pattern.\n\
774 The pattern may include arbitrary whitespace and also comments beginning with\n\
775 the character @samp{#}.\n\
777 Alternatively, use (?x) in the pattern.\n\
779 @item noemptymatch\n\
780 Zero-length matches are not returned. (default)\n\
783 Return zero-length matches.\n\
785 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there\n\
786 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.\n\
789 @seealso{regexpi, strfind, regexprep}\n\
794 int nargin = args.
length ();
798 else if (args(0).is_cell () || args(1).is_cell ())
799 retval =
octcellregexp (args, (nargout > 0 ? nargout : 1),
"regexp");
801 retval =
octregexp (args, nargout,
"regexp");
1068 DEFUN (regexpi, args, nargout,
1070 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})\n\
1071 @deftypefnx {Built-in Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
1073 Case insensitive regular expression string matching. Search for @var{pat} in\n\
1074 @var{str} and return the positions and substrings of any matches, or empty\n\
1075 values if there are none. @xref{XREFregexp,,regexp}, for details on the\n\
1076 syntax of the search pattern.\n\
1082 int nargin = args.
length ();
1086 else if (args(0).is_cell () || args(1).is_cell ())
1087 retval =
octcellregexp (args, (nargout > 0 ? nargout : 1),
"regexpi",
true);
1089 retval =
octregexp (args, nargout,
"regexpi",
true);
1233 int nargin = args.
length ();
1236 const std::string buffer = args(0).string_value ();
1240 std::string pattern = args(1).string_value ();
1244 if (args(1).is_sq_string ())
1247 std::string replacement = args(2).string_value ();
1251 if (args(2).is_sq_string ())
1259 for (
int i = 3; i < nargin; i++)
1261 const std::string opt = args(i).string_value ();
1262 if (opt !=
"tokenize" && opt !=
"start" && opt !=
"end"
1263 && opt !=
"tokenextents" && opt !=
"match" && opt !=
"tokens"
1264 && opt !=
"names" && opt !=
"split" && opt !=
"warnings")
1266 regexpargs(len++) = args(i);
1272 bool extra_args =
false;
1277 return regexp_replace (pattern, buffer, replacement, options, who);
1280 DEFUN (regexprep, args, ,
1282 @deftypefn {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
1283 @deftypefnx {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
1284 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
1286 The pattern is a regular expression as documented for @code{regexp}.\n\
1287 @xref{XREFregexp,,regexp}.\n\
1289 The replacement string may contain @code{$i}, which substitutes\n\
1290 for the ith set of parentheses in the match string. For example,\n\
1293 regexprep (\"Bill Dunn\", '(\\w+) (\\w+)', '$2, $1')\n\
1297 returns \"Dunn, Bill\"\n\
1299 Options in addition to those of @code{regexp} are\n\
1304 Replace only the first occurrence of @var{pat} in the result.\n\
1307 This option is present for compatibility but is ignored.\n\
1311 Implementation Note: For compatibility with @sc{matlab}, ordinary escape\n\
1312 sequences (e.g., @qcode{\"\\n\"} => newline) are processed in both @var{pat}\n\
1313 and @var{repstr} regardless of whether they were defined within single\n\
1314 quotes. Use a second backslash to stop interpolation of the escape sequence\n\
1315 (e.g., \"\\\\n\") or use the @code{regexptranslate} function.\n\
1316 @seealso{regexp, regexpi, strrep}\n\
1320 int nargin = args.
length ();
1328 if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
1336 if (args(0).is_cell ())
1337 str = args(0).cell_value ();
1339 str =
Cell (args(0));
1341 if (args(1).is_cell ())
1342 pat = args(1).cell_value ();
1344 pat =
Cell (args(1));
1346 if (args(2).is_cell ())
1347 rep = args(2).cell_value ();
1349 rep =
Cell (args(2));
1352 if (pat.
numel () != 1)
1355 if (rep.
numel () != 1 && dv1 != rep.
dims ())
1356 error (
"regexprep: inconsistent cell array dimensions");
1358 else if (rep.
numel () != 1)
1368 new_args(0) = str(i);
1369 if (pat.
numel () == 1)
1370 new_args(1) = pat(0);
1371 if (rep.
numel () == 1)
1372 new_args(2) = rep(0);
1376 if (pat.
numel () != 1)
1377 new_args(1) = pat(j);
1378 if (rep.
numel () != 1)
1379 new_args(2) = rep(j);
1389 ret(i) = new_args(0);