00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027
00028 #include <list>
00029 #include <sstream>
00030
00031 #include <pcre.h>
00032
00033 #include "base-list.h"
00034 #include "oct-locbuf.h"
00035 #include "quit.h"
00036 #include "regexp.h"
00037 #include "str-vec.h"
00038
00039 #include "defun-dld.h"
00040 #include "Cell.h"
00041 #include "error.h"
00042 #include "gripes.h"
00043 #include "oct-map.h"
00044 #include "oct-obj.h"
00045 #include "utils.h"
00046
00047 static void
00048 parse_options (regexp::opts& options, const octave_value_list& args,
00049 const std::string& who, int skip, bool& extra_args)
00050 {
00051 int nargin = args.length ();
00052
00053 extra_args = false;
00054
00055 for (int i = skip; i < nargin; i++)
00056 {
00057 std::string str = args(i).string_value ();
00058
00059 if (error_state)
00060 {
00061 error ("%s: optional arguments must be character strings",
00062 who.c_str ());
00063 break;
00064 }
00065
00066 std::transform (str.begin (), str.end (), str.begin (), tolower);
00067
00068 if (str.find ("once", 0) == 0)
00069 options.once (true);
00070 else if (str.find ("matchcase", 0) == 0)
00071 options.case_insensitive (false);
00072 else if (str.find ("ignorecase", 0) == 0)
00073 options.case_insensitive (true);
00074 else if (str.find ("dotall", 0) == 0)
00075 options.dotexceptnewline (false);
00076 else if (str.find ("stringanchors", 0) == 0)
00077 options.lineanchors (false);
00078 else if (str.find ("literalspacing", 0) == 0)
00079 options.freespacing (false);
00080 else if (str.find ("dotexceptnewline", 0) == 0)
00081 options.dotexceptnewline (true);
00082 else if (str.find ("lineanchors", 0) == 0)
00083 options.lineanchors (true);
00084 else if (str.find ("freespacing", 0) == 0)
00085 options.freespacing (true);
00086 else if (str.find ("start", 0) == 0
00087 || str.find ("end", 0) == 0
00088 || str.find ("tokenextents", 0) == 0
00089 || str.find ("match", 0) == 0
00090 || str.find ("tokens", 0) == 0
00091 || str.find ("names", 0) == 0
00092 || str.find ("split", 0) == 0)
00093 extra_args = true;
00094 else
00095 error ("%s: unrecognized option", who.c_str ());
00096 }
00097 }
00098
00099 static octave_value_list
00100 octregexp (const octave_value_list &args, int nargout,
00101 const std::string &who, bool case_insensitive = false)
00102 {
00103 octave_value_list retval;
00104
00105 int nargin = args.length ();
00106
00107
00108 const std::string buffer = args(0).string_value ();
00109 if (error_state)
00110 return retval;
00111
00112 const std::string pattern = args(1).string_value ();
00113 if (error_state)
00114 return retval;
00115
00116 regexp::opts options;
00117 options.case_insensitive (case_insensitive);
00118 bool extra_options = false;
00119 parse_options (options, args, who, 2, extra_options);
00120 if (error_state)
00121 return retval;
00122
00123 regexp::match_data rx_lst = regexp_match (pattern, buffer, options, who);
00124
00125 string_vector named_pats = rx_lst.named_patterns ();
00126
00127 size_t sz = rx_lst.size ();
00128
00129 if (! error_state)
00130 {
00131
00132
00133 octave_idx_type i = 0;
00134 octave_scalar_map nmap;
00135
00136 retval.resize (7);
00137
00138 if (sz == 1)
00139 {
00140 string_vector named_tokens = rx_lst.begin()->named_tokens ();
00141
00142 for (int j = 0; j < named_pats.length (); j++)
00143 nmap.assign (named_pats(j), named_tokens(j));
00144
00145 retval(5) = nmap;
00146 }
00147 else
00148 {
00149 for (int j = 0; j < named_pats.length (); j++)
00150 {
00151 Cell tmp (dim_vector (1, sz));
00152
00153 i = 0;
00154 for (regexp::match_data::const_iterator p = rx_lst.begin ();
00155 p != rx_lst.end (); p++)
00156 {
00157 string_vector named_tokens = p->named_tokens ();
00158
00159 tmp(i++) = named_tokens(j);
00160 }
00161
00162 nmap.assign (named_pats(j), octave_value (tmp));
00163 }
00164
00165 retval(5) = nmap;
00166 }
00167
00168 if (options.once ())
00169 {
00170 regexp::match_data::const_iterator p = rx_lst.begin ();
00171
00172 retval(4) = sz ? p->tokens () : Cell ();
00173 retval(3) = sz ? p->match_string () : std::string ();
00174 retval(2) = sz ? p->token_extents () : Matrix ();
00175
00176 if (sz)
00177 {
00178 double start = p->start ();
00179 double end = p->end ();
00180
00181 Cell split (dim_vector (1, 2));
00182 split(0) = buffer.substr (0, start-1);
00183 split(1) = buffer.substr (end);
00184
00185 retval(6) = split;
00186 retval(1) = end;
00187 retval(0) = start;
00188 }
00189 else
00190 {
00191 retval(6) = buffer;
00192 retval(1) = Matrix ();
00193 retval(0) = Matrix ();
00194 }
00195 }
00196 else
00197 {
00198 Cell tokens (dim_vector (1, sz));
00199 Cell match_string (dim_vector (1, sz));
00200 Cell token_extents (dim_vector (1, sz));
00201 NDArray end (dim_vector (1, sz));
00202 NDArray start (dim_vector (1, sz));
00203 Cell split (dim_vector (1, sz+1));
00204 size_t sp_start = 0;
00205
00206 i = 0;
00207 for (regexp::match_data::const_iterator p = rx_lst.begin ();
00208 p != rx_lst.end (); p++)
00209 {
00210 double s = p->start ();
00211 double e = p->end ();
00212
00213 string_vector tmp = p->tokens ();
00214 tokens(i) = Cell (dim_vector (1, tmp.length ()), tmp);
00215 match_string(i) = p->match_string ();
00216 token_extents(i) = p->token_extents ();
00217 end(i) = e;
00218 start(i) = s;
00219 split(i) = buffer.substr (sp_start, s-sp_start-1);
00220 sp_start = e;
00221 i++;
00222 }
00223
00224 split(i) = buffer.substr (sp_start);
00225
00226 retval(6) = split;
00227 retval(4) = tokens;
00228 retval(3) = match_string;
00229 retval(2) = token_extents;
00230 retval(1) = end;
00231 retval(0) = start;
00232 }
00233
00234
00235
00236 if (extra_options)
00237 {
00238 int n = 0;
00239 octave_value_list new_retval;
00240 new_retval.resize (nargout);
00241
00242 OCTAVE_LOCAL_BUFFER (int, arg_used, 6);
00243 for (int j = 0; j < 6; j++)
00244 arg_used[j] = false;
00245
00246 for (int j = 2; j < nargin; j++)
00247 {
00248 int k = 0;
00249 std::string str = args(j).string_value ();
00250 std::transform (str.begin (), str.end (), str.begin (), tolower);
00251
00252 if (str.find ("once", 0) == 0
00253 || str.find ("stringanchors", 0) == 0
00254 || str.find ("lineanchors", 0) == 0
00255 || str.find ("matchcase", 0) == 0
00256 || str.find ("ignorecase", 0) == 0
00257 || str.find ("dotall", 0) == 0
00258 || str.find ("dotexceptnewline", 0) == 0
00259 || str.find ("literalspacing", 0) == 0
00260 || str.find ("freespacing", 0) == 0)
00261 continue;
00262 else if (str.find ("start", 0) == 0)
00263 k = 0;
00264 else if (str.find ("end", 0) == 0)
00265 k = 1;
00266 else if (str.find ("tokenextents", 0) == 0)
00267 k = 2;
00268 else if (str.find ("match", 0) == 0)
00269 k = 3;
00270 else if (str.find ("tokens", 0) == 0)
00271 k = 4;
00272 else if (str.find ("names", 0) == 0)
00273 k = 5;
00274 else if (str.find ("split", 0) == 0)
00275 k = 6;
00276
00277 new_retval(n++) = retval(k);
00278 arg_used[k] = true;
00279
00280 if (n == nargout)
00281 break;
00282 }
00283
00284
00285 if (n < nargout)
00286 {
00287 for (int j = 0; j < 6; j++)
00288 {
00289 if (! arg_used[j])
00290 new_retval(n++) = retval(j);
00291 }
00292 }
00293
00294 retval = new_retval;
00295 }
00296 }
00297
00298 return retval;
00299 }
00300
00301 static octave_value_list
00302 octcellregexp (const octave_value_list &args, int nargout,
00303 const std::string &who, bool case_insensitive = false)
00304 {
00305 octave_value_list retval;
00306
00307 if (args(0).is_cell ())
00308 {
00309 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
00310 octave_value_list new_args = args;
00311 Cell cellstr = args(0).cell_value ();
00312 if (args(1).is_cell ())
00313 {
00314 Cell cellpat = args(1).cell_value ();
00315
00316 if (cellpat.numel () == 1)
00317 {
00318 for (int j = 0; j < nargout; j++)
00319 newretval[j].resize (cellstr.dims ());
00320
00321 new_args(1) = cellpat(0);
00322
00323 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00324 {
00325 new_args(0) = cellstr(i);
00326 octave_value_list tmp = octregexp (new_args, nargout, who,
00327 case_insensitive);
00328
00329 if (error_state)
00330 break;
00331
00332 for (int j = 0; j < nargout; j++)
00333 newretval[j](i) = tmp(j);
00334 }
00335 }
00336 else if (cellstr.numel () == 1)
00337 {
00338 for (int j = 0; j < nargout; j++)
00339 newretval[j].resize (cellpat.dims ());
00340
00341 new_args(0) = cellstr(0);
00342
00343 for (octave_idx_type i = 0; i < cellpat.numel (); i++)
00344 {
00345 new_args(1) = cellpat(i);
00346 octave_value_list tmp = octregexp (new_args, nargout, who,
00347 case_insensitive);
00348
00349 if (error_state)
00350 break;
00351
00352 for (int j = 0; j < nargout; j++)
00353 newretval[j](i) = tmp(j);
00354 }
00355 }
00356 else if (cellstr.numel () == cellpat.numel ())
00357 {
00358
00359 if (cellstr.dims () != cellpat.dims ())
00360 error ("%s: inconsistent cell array dimensions", who.c_str ());
00361 else
00362 {
00363 for (int j = 0; j < nargout; j++)
00364 newretval[j].resize (cellstr.dims ());
00365
00366 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00367 {
00368 new_args(0) = cellstr(i);
00369 new_args(1) = cellpat(i);
00370
00371 octave_value_list tmp = octregexp (new_args, nargout, who,
00372 case_insensitive);
00373
00374 if (error_state)
00375 break;
00376
00377 for (int j = 0; j < nargout; j++)
00378 newretval[j](i) = tmp(j);
00379 }
00380 }
00381 }
00382 else
00383 error ("regexp: cell array arguments must be scalar or equal size");
00384 }
00385 else
00386 {
00387 for (int j = 0; j < nargout; j++)
00388 newretval[j].resize (cellstr.dims ());
00389
00390 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00391 {
00392 new_args(0) = cellstr(i);
00393 octave_value_list tmp = octregexp (new_args, nargout, who,
00394 case_insensitive);
00395
00396 if (error_state)
00397 break;
00398
00399 for (int j = 0; j < nargout; j++)
00400 newretval[j](i) = tmp(j);
00401 }
00402 }
00403
00404 if (!error_state)
00405 for (int j = 0; j < nargout; j++)
00406 retval(j) = octave_value (newretval[j]);
00407 }
00408 else if (args(1).is_cell ())
00409 {
00410 OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
00411 octave_value_list new_args = args;
00412 Cell cellpat = args(1).cell_value ();
00413
00414 for (int j = 0; j < nargout; j++)
00415 newretval[j].resize(cellpat.dims ());
00416
00417 for (octave_idx_type i = 0; i < cellpat.numel (); i++)
00418 {
00419 new_args(1) = cellpat(i);
00420 octave_value_list tmp = octregexp (new_args, nargout, who,
00421 case_insensitive);
00422
00423 if (error_state)
00424 break;
00425
00426 for (int j = 0; j < nargout; j++)
00427 newretval[j](i) = tmp(j);
00428 }
00429
00430 if (!error_state)
00431 {
00432 for (int j = 0; j < nargout; j++)
00433 retval(j) = octave_value (newretval[j]);
00434 }
00435 }
00436 else
00437 retval = octregexp (args, nargout, who, case_insensitive);
00438
00439 return retval;
00440
00441 }
00442
00443 DEFUN_DLD (regexp, args, nargout,
00444 "-*- texinfo -*-\n\
00445 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\
00446 @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
00447 Regular expression string matching. Search for @var{pat} in @var{str} and\n\
00448 return the positions and substrings of any matches, or empty values if there\n\
00449 are none.\n\
00450 \n\
00451 The matched pattern @var{pat} can include any of the standard regex\n\
00452 operators, including:\n\
00453 \n\
00454 @table @code\n\
00455 @item .\n\
00456 Match any character\n\
00457 \n\
00458 @item * + ? @{@}\n\
00459 Repetition operators, representing\n\
00460 @table @code\n\
00461 @item *\n\
00462 Match zero or more times\n\
00463 \n\
00464 @item +\n\
00465 Match one or more times\n\
00466 \n\
00467 @item ?\n\
00468 Match zero or one times\n\
00469 \n\
00470 @item @{@var{n}@}\n\
00471 Match exactly @var{n} times\n\
00472 \n\
00473 @item @{@var{n},@}\n\
00474 Match @var{n} or more times\n\
00475 \n\
00476 @item @{@var{m},@var{n}@}\n\
00477 Match between @var{m} and @var{n} times\n\
00478 @end table\n\
00479 \n\
00480 @item [@dots{}] [^@dots{}]\n\
00481 \n\
00482 List operators. The pattern will match any character listed between \"[\"\n\
00483 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\
00484 any character except those listed between brackets will match.\n\
00485 \n\
00486 Escape sequences defined below can also be used inside list\n\
00487 operators. For example, a template for a floating point number might be\n\
00488 @code{[-+.\\d]+}.\n\
00489 \n\
00490 @item ()\n\
00491 Grouping operator\n\
00492 \n\
00493 @item |\n\
00494 Alternation operator. Match one of a choice of regular expressions. The\n\
00495 alternatives must be delimited by the grouping operator @code{()} above.\n\
00496 \n\
00497 @item ^ $\n\
00498 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\
00499 end (@code{$}) of the string.\n\
00500 @end table\n\
00501 \n\
00502 In addition, the following escaped characters have special meaning. Note,\n\
00503 it is recommended to quote @var{pat} in single quotes, rather than double\n\
00504 quotes, to avoid the escape sequences being interpreted by Octave before\n\
00505 being passed to @code{regexp}.\n\
00506 \n\
00507 @table @code\n\
00508 @item \\b\n\
00509 Match a word boundary\n\
00510 \n\
00511 @item \\B\n\
00512 Match within a word\n\
00513 \n\
00514 @item \\w\n\
00515 Match any word character\n\
00516 \n\
00517 @item \\W\n\
00518 Match any non-word character\n\
00519 \n\
00520 @item \\<\n\
00521 Match the beginning of a word\n\
00522 \n\
00523 @item \\>\n\
00524 Match the end of a word\n\
00525 \n\
00526 @item \\s\n\
00527 Match any whitespace character\n\
00528 \n\
00529 @item \\S\n\
00530 Match any non-whitespace character\n\
00531 \n\
00532 @item \\d\n\
00533 Match any digit\n\
00534 \n\
00535 @item \\D\n\
00536 Match any non-digit\n\
00537 @end table\n\
00538 \n\
00539 The outputs of @code{regexp} default to the order given below\n\
00540 \n\
00541 @table @var\n\
00542 @item s\n\
00543 The start indices of each matching substring\n\
00544 \n\
00545 @item e\n\
00546 The end indices of each matching substring\n\
00547 \n\
00548 @item te\n\
00549 The extents of each matched token surrounded by @code{(@dots{})} in\n\
00550 @var{pat}\n\
00551 \n\
00552 @item m\n\
00553 A cell array of the text of each match\n\
00554 \n\
00555 @item t\n\
00556 A cell array of the text of each token matched\n\
00557 \n\
00558 @item nm\n\
00559 A structure containing the text of each matched named token, with the name\n\
00560 being used as the fieldname. A named token is denoted by\n\
00561 @code{(?<name>@dots{})}.\n\
00562 \n\
00563 @item sp\n\
00564 A cell array of the text not returned by match.\n\
00565 @end table\n\
00566 \n\
00567 Particular output arguments, or the order of the output arguments, can be\n\
00568 selected by additional @var{opt} arguments. These are strings and the\n\
00569 correspondence between the output arguments and the optional argument\n\
00570 are\n\
00571 \n\
00572 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
00573 @item @tab 'start' @tab @var{s} @tab\n\
00574 @item @tab 'end' @tab @var{e} @tab\n\
00575 @item @tab 'tokenExtents' @tab @var{te} @tab\n\
00576 @item @tab 'match' @tab @var{m} @tab\n\
00577 @item @tab 'tokens' @tab @var{t} @tab\n\
00578 @item @tab 'names' @tab @var{nm} @tab\n\
00579 @item @tab 'split' @tab @var{sp} @tab\n\
00580 @end multitable\n\
00581 \n\
00582 Additional arguments are summarized below.\n\
00583 \n\
00584 @table @samp\n\
00585 @item once\n\
00586 Return only the first occurrence of the pattern.\n\
00587 \n\
00588 @item matchcase\n\
00589 Make the matching case sensitive. (default)\n\
00590 \n\
00591 Alternatively, use (?-i) in the pattern.\n\
00592 \n\
00593 @item ignorecase\n\
00594 Ignore case when matching the pattern to the string.\n\
00595 \n\
00596 Alternatively, use (?i) in the pattern.\n\
00597 \n\
00598 @item stringanchors\n\
00599 Match the anchor characters at the beginning and end of the string.\n\
00600 (default)\n\
00601 \n\
00602 Alternatively, use (?-m) in the pattern.\n\
00603 \n\
00604 @item lineanchors\n\
00605 Match the anchor characters at the beginning and end of the line.\n\
00606 \n\
00607 Alternatively, use (?m) in the pattern.\n\
00608 \n\
00609 @item dotall\n\
00610 The pattern @code{.} matches all characters including the newline character.\n\
00611 (default)\n\
00612 \n\
00613 Alternatively, use (?s) in the pattern.\n\
00614 \n\
00615 @item dotexceptnewline\n\
00616 The pattern @code{.} matches all characters except the newline character.\n\
00617 \n\
00618 Alternatively, use (?-s) in the pattern.\n\
00619 \n\
00620 @item literalspacing\n\
00621 All characters in the pattern, including whitespace, are significant and are\n\
00622 used in pattern matching. (default)\n\
00623 \n\
00624 Alternatively, use (?-x) in the pattern.\n\
00625 \n\
00626 @item freespacing\n\
00627 The pattern may include arbitrary whitespace and also comments beginning with\n\
00628 the character @samp{#}.\n\
00629 \n\
00630 Alternatively, use (?x) in the pattern.\n\
00631 \n\
00632 @end table\n\
00633 @seealso{regexpi, strfind, regexprep}\n\
00634 @end deftypefn")
00635 {
00636 octave_value_list retval;
00637
00638 int nargin = args.length ();
00639
00640 if (nargin < 2)
00641 print_usage ();
00642 else if (args(0).is_cell () || args(1).is_cell ())
00643 retval = octcellregexp (args, nargout, "regexp");
00644 else
00645 retval = octregexp (args, nargout, "regexp");
00646
00647 return retval;
00648 }
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872 DEFUN_DLD (regexpi, args, nargout,
00873 "-*- texinfo -*-\n\
00874 @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexpi (@var{str}, @var{pat})\n\
00875 @deftypefnx {Loadable Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
00876 \n\
00877 Case insensitive regular expression string matching. Search for @var{pat} in\n\
00878 @var{str} and return the positions and substrings of any matches, or empty\n\
00879 values if there are none. @xref{doc-regexp,,regexp}, for details on the\n\
00880 syntax of the search pattern.\n\
00881 @seealso{regexp}\n\
00882 @end deftypefn")
00883 {
00884 octave_value_list retval;
00885
00886 int nargin = args.length ();
00887
00888 if (nargin < 2)
00889 print_usage ();
00890 else if (args(0).is_cell () || args(1).is_cell ())
00891 retval = octcellregexp (args, nargout, "regexpi", true);
00892 else
00893 retval = octregexp (args, nargout, "regexpi", true);
00894
00895 return retval;
00896 }
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032 static octave_value
01033 octregexprep (const octave_value_list &args, const std::string &who)
01034 {
01035 octave_value retval;
01036
01037 int nargin = args.length ();
01038
01039
01040 const std::string buffer = args(0).string_value ();
01041 if (error_state)
01042 return retval;
01043
01044 const std::string pattern = args(1).string_value ();
01045 if (error_state)
01046 return retval;
01047
01048 const std::string replacement = args(2).string_value ();
01049 if (error_state)
01050 return retval;
01051
01052
01053
01054 octave_value_list regexpargs (nargin-3, octave_value ());
01055
01056 int len = 0;
01057 for (int i = 3; i < nargin; i++)
01058 {
01059 const std::string opt = args(i).string_value ();
01060 if (opt != "tokenize" && opt != "start" && opt != "end"
01061 && opt != "tokenextents" && opt != "match" && opt != "tokens"
01062 && opt != "names" && opt != "split" && opt != "warnings")
01063 {
01064 regexpargs(len++) = args(i);
01065 }
01066 }
01067 regexpargs.resize (len);
01068
01069 regexp::opts options;
01070 bool extra_args = false;
01071 parse_options (options, regexpargs, who, 0, extra_args);
01072 if (error_state)
01073 return retval;
01074
01075 return regexp_replace (pattern, buffer, replacement, options, who);
01076 }
01077
01078 DEFUN_DLD (regexprep, args, ,
01079 "-*- texinfo -*-\n\
01080 @deftypefn {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
01081 @deftypefnx {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
01082 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
01083 \n\
01084 The pattern is a regular expression as documented for @code{regexp}.\n\
01085 @xref{doc-regexp,,regexp}.\n\
01086 \n\
01087 The replacement string may contain @code{$i}, which substitutes\n\
01088 for the ith set of parentheses in the match string. For example,\n\
01089 \n\
01090 @example\n\
01091 regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\
01092 @end example\n\
01093 \n\
01094 @noindent\n\
01095 returns \"Dunn, Bill\"\n\
01096 \n\
01097 Options in addition to those of @code{regexp} are\n\
01098 \n\
01099 @table @samp\n\
01100 \n\
01101 @item once\n\
01102 Replace only the first occurrence of @var{pat} in the result.\n\
01103 \n\
01104 @item warnings\n\
01105 This option is present for compatibility but is ignored.\n\
01106 \n\
01107 @end table\n\
01108 @seealso{regexp, regexpi, strrep}\n\
01109 @end deftypefn")
01110 {
01111 octave_value_list retval;
01112 int nargin = args.length ();
01113
01114 if (nargin < 3)
01115 {
01116 print_usage ();
01117 return retval;
01118 }
01119
01120 if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
01121 {
01122 Cell str;
01123 Cell pat;
01124 Cell rep;
01125 dim_vector dv0;
01126 dim_vector dv1 (1, 1);
01127
01128 if (args(0).is_cell ())
01129 str = args(0).cell_value ();
01130 else
01131 str = Cell (args(0));
01132
01133 if (args(1).is_cell ())
01134 pat = args(1).cell_value ();
01135 else
01136 pat = Cell (args(1));
01137
01138 if (args(2).is_cell ())
01139 rep = args(2).cell_value ();
01140 else
01141 rep = Cell (args(2));
01142
01143 dv0 = str.dims ();
01144 if (pat.numel () != 1)
01145 {
01146 dv1 = pat.dims ();
01147 if (rep.numel () != 1 && dv1 != rep.dims ())
01148 error ("regexprep: inconsistent cell array dimensions");
01149 }
01150 else if (rep.numel () != 1)
01151 dv1 = rep.dims ();
01152
01153 if (!error_state)
01154 {
01155 Cell ret (dv0);
01156 octave_value_list new_args = args;
01157
01158 for (octave_idx_type i = 0; i < dv0.numel (); i++)
01159 {
01160 new_args(0) = str(i);
01161 if (pat.numel() == 1)
01162 new_args(1) = pat(0);
01163 if (rep.numel() == 1)
01164 new_args(2) = rep(0);
01165
01166 for (octave_idx_type j = 0; j < dv1.numel (); j++)
01167 {
01168 if (pat.numel () != 1)
01169 new_args(1) = pat(j);
01170 if (rep.numel () != 1)
01171 new_args(2) = rep(j);
01172 new_args(0) = octregexprep (new_args, "regexprep");
01173
01174 if (error_state)
01175 break;
01176 }
01177
01178 if (error_state)
01179 break;
01180
01181 ret(i) = new_args(0);
01182 }
01183
01184 if (!error_state)
01185 retval = args(0).is_cell ()
01186 ? octave_value (ret) : octave_value (ret(0));
01187 }
01188 }
01189 else
01190 retval = octregexprep (args, "regexprep");
01191
01192 return retval;
01193 }
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250