GNU Octave: src/DLD-FUNCTIONS/regexp.cc Source File

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2005-2012 David Bateman
00004 Copyright (C) 2002-2005 Paul Kienzle
00005 
00006 This file is part of Octave.
00007 
00008 Octave is free software; you can redistribute it and/or modify it
00009 under the terms of the GNU General Public License as published by the
00010 Free Software Foundation; either version 3 of the License, or (at your
00011 option) any later version.
00012 
00013 Octave is distributed in the hope that it will be useful, but WITHOUT
00014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00015 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00016 for more details.
00017 
00018 You should have received a copy of the GNU General Public License
00019 along with Octave; see the file COPYING.  If not, see
00020 <http://www.gnu.org/licenses/>.
00021 
00022 */
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027 
00028 #include <list>
00029 #include <sstream>
00030 
00031 #include <pcre.h>
00032 
00033 #include "base-list.h"
00034 #include "oct-locbuf.h"
00035 #include "quit.h"
00036 #include "regexp.h"
00037 #include "str-vec.h"
00038 
00039 #include "defun-dld.h"
00040 #include "Cell.h"
00041 #include "error.h"
00042 #include "gripes.h"
00043 #include "oct-map.h"
00044 #include "oct-obj.h"
00045 #include "utils.h"
00046 
00047 static void
00048 parse_options (regexp::opts& options, const octave_value_list& args,
00049                const std::string& who, int skip, bool& extra_args)
00050 {
00051   int nargin = args.length ();
00052 
00053   extra_args = false;
00054 
00055   for (int i = skip; i < nargin; i++)
00056     {
00057       std::string str = args(i).string_value ();
00058 
00059       if (error_state)
00060         {
00061           error ("%s: optional arguments must be character strings",
00062                  who.c_str ());
00063           break;
00064         }
00065 
00066       std::transform (str.begin (), str.end (), str.begin (), tolower);
00067 
00068       if (str.find ("once", 0) == 0)
00069         options.once (true);
00070       else if (str.find ("matchcase", 0) == 0)
00071         options.case_insensitive (false);
00072       else if (str.find ("ignorecase", 0) == 0)
00073         options.case_insensitive (true);
00074       else if (str.find ("dotall", 0) == 0)
00075         options.dotexceptnewline (false);
00076       else if (str.find ("stringanchors", 0) == 0)
00077         options.lineanchors (false);
00078       else if (str.find ("literalspacing", 0) == 0)
00079         options.freespacing (false);
00080       else if (str.find ("dotexceptnewline", 0) == 0)
00081         options.dotexceptnewline (true);
00082       else if (str.find ("lineanchors", 0) == 0)
00083         options.lineanchors (true);
00084       else if (str.find ("freespacing", 0) == 0)
00085         options.freespacing (true);
00086       else if (str.find ("start", 0) == 0
00087                || str.find ("end", 0) == 0
00088                || str.find ("tokenextents", 0) == 0
00089                || str.find ("match", 0) == 0
00090                || str.find ("tokens", 0) == 0
00091                || str.find ("names", 0) == 0
00092                || str.find ("split", 0) == 0)
00093         extra_args = true;
00094       else
00095         error ("%s: unrecognized option", who.c_str ());
00096     }
00097 }
00098 
00099 static octave_value_list
00100 octregexp (const octave_value_list &args, int nargout,
00101            const std::string &who, bool case_insensitive = false)
00102 {
00103   octave_value_list retval;
00104 
00105   int nargin = args.length ();
00106 
00107   // Make sure we have string, pattern
00108   const std::string buffer = args(0).string_value ();
00109   if (error_state)
00110     return retval;
00111 
00112   const std::string pattern = args(1).string_value ();
00113   if (error_state)
00114     return retval;
00115 
00116   regexp::opts options;
00117   options.case_insensitive (case_insensitive);
00118   bool extra_options = false;
00119   parse_options (options, args, who, 2, extra_options);
00120   if (error_state)
00121     return retval;
00122 
00123   regexp::match_data rx_lst = regexp_match (pattern, buffer, options, who);
00124 
00125   string_vector named_pats = rx_lst.named_patterns ();
00126 
00127   size_t sz = rx_lst.size ();
00128 
00129   if (! error_state)
00130     {
00131       // Converted the linked list in the correct form for the return values
00132 
00133       octave_idx_type i = 0;
00134       octave_scalar_map nmap;
00135 
00136       retval.resize (7);
00137 
00138       if (sz == 1)
00139         {
00140           string_vector named_tokens = rx_lst.begin()->named_tokens ();
00141 
00142           for (int j = 0; j < named_pats.length (); j++)
00143             nmap.assign (named_pats(j), named_tokens(j));
00144 
00145           retval(5) = nmap;
00146         }
00147       else
00148         {
00149           for (int j = 0; j < named_pats.length (); j++)
00150             {
00151               Cell tmp (dim_vector (1, sz));
00152 
00153               i = 0;
00154               for (regexp::match_data::const_iterator p = rx_lst.begin ();
00155                    p != rx_lst.end (); p++)
00156                 {
00157                   string_vector named_tokens = p->named_tokens ();
00158 
00159                   tmp(i++) = named_tokens(j);
00160                 }
00161 
00162               nmap.assign (named_pats(j), octave_value (tmp));
00163             }
00164 
00165           retval(5) = nmap;
00166         }
00167 
00168       if (options.once ())
00169         {
00170           regexp::match_data::const_iterator p = rx_lst.begin ();
00171 
00172           retval(4) = sz ? p->tokens () : Cell ();
00173           retval(3) = sz ? p->match_string () : std::string ();
00174           retval(2) = sz ? p->token_extents () : Matrix ();
00175 
00176           if (sz)
00177             {
00178               double start = p->start ();
00179               double end = p->end ();
00180 
00181               Cell split (dim_vector (1, 2));
00182               split(0) = buffer.substr (0, start-1);
00183               split(1) = buffer.substr (end);
00184 
00185               retval(6) = split;
00186               retval(1) = end;
00187               retval(0) = start;
00188             }
00189           else
00190             {
00191               retval(6) = buffer;
00192               retval(1) = Matrix ();
00193               retval(0) = Matrix ();
00194             }
00195         }
00196       else
00197         {
00198           Cell tokens (dim_vector (1, sz));
00199           Cell match_string (dim_vector (1, sz));
00200           Cell token_extents (dim_vector (1, sz));
00201           NDArray end (dim_vector (1, sz));
00202           NDArray start (dim_vector (1, sz));
00203           Cell split (dim_vector (1, sz+1));
00204           size_t sp_start = 0;
00205 
00206           i = 0;
00207           for (regexp::match_data::const_iterator p = rx_lst.begin ();
00208                p != rx_lst.end (); p++)
00209             {
00210               double s = p->start ();
00211               double e = p->end ();
00212 
00213               string_vector tmp = p->tokens ();
00214               tokens(i) = Cell (dim_vector (1, tmp.length ()), tmp);
00215               match_string(i) = p->match_string ();
00216               token_extents(i) = p->token_extents ();
00217               end(i) = e;
00218               start(i) = s;
00219               split(i) = buffer.substr (sp_start, s-sp_start-1);
00220               sp_start = e;
00221               i++;
00222             }
00223 
00224           split(i) = buffer.substr (sp_start);
00225 
00226           retval(6) = split;
00227           retval(4) = tokens;
00228           retval(3) = match_string;
00229           retval(2) = token_extents;
00230           retval(1) = end;
00231           retval(0) = start;
00232         }
00233 
00234       // Alter the order of the output arguments
00235 
00236       if (extra_options)
00237         {
00238           int n = 0;
00239           octave_value_list new_retval;
00240           new_retval.resize (nargout);
00241 
00242           OCTAVE_LOCAL_BUFFER (int, arg_used, 6);
00243           for (int j = 0; j < 6; j++)
00244             arg_used[j] = false;
00245 
00246           for (int j = 2; j < nargin; j++)
00247             {
00248               int k = 0;
00249               std::string str = args(j).string_value ();
00250               std::transform (str.begin (), str.end (), str.begin (), tolower);
00251 
00252               if (str.find ("once", 0) == 0
00253                   || str.find ("stringanchors", 0) == 0
00254                   || str.find ("lineanchors", 0) == 0
00255                   || str.find ("matchcase", 0) == 0
00256                   || str.find ("ignorecase", 0) == 0
00257                   || str.find ("dotall", 0) == 0
00258                   || str.find ("dotexceptnewline", 0) == 0
00259                   || str.find ("literalspacing", 0) == 0
00260                   || str.find ("freespacing", 0) == 0)
00261                 continue;
00262               else if (str.find ("start", 0) == 0)
00263                 k = 0;
00264               else if (str.find ("end", 0) == 0)
00265                 k = 1;
00266               else if (str.find ("tokenextents", 0) == 0)
00267                 k = 2;
00268               else if (str.find ("match", 0) == 0)
00269                 k = 3;
00270               else if (str.find ("tokens", 0) == 0)
00271                 k = 4;
00272               else if (str.find ("names", 0) == 0)
00273                 k = 5;
00274               else if (str.find ("split", 0) == 0)
00275                 k = 6;
00276 
00277               new_retval(n++) = retval(k);
00278               arg_used[k] = true;
00279 
00280               if (n == nargout)
00281                 break;
00282             }
00283 
00284           // Fill in the rest of the arguments
00285           if (n < nargout)
00286             {
00287               for (int j = 0; j < 6; j++)
00288                 {
00289                   if (! arg_used[j])
00290                     new_retval(n++) = retval(j);
00291                 }
00292             }
00293 
00294           retval = new_retval;
00295         }
00296     }
00297 
00298   return retval;
00299 }
00300 
00301 static octave_value_list
00302 octcellregexp (const octave_value_list &args, int nargout,
00303                const std::string &who, bool case_insensitive = false)
00304 {
00305   octave_value_list retval;
00306 
00307   if (args(0).is_cell ())
00308     {
00309       OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
00310       octave_value_list new_args = args;
00311       Cell cellstr = args(0).cell_value ();
00312       if (args(1).is_cell ())
00313         {
00314           Cell cellpat = args(1).cell_value ();
00315 
00316           if (cellpat.numel () == 1)
00317             {
00318               for (int j = 0; j < nargout; j++)
00319                 newretval[j].resize (cellstr.dims ());
00320 
00321               new_args(1) = cellpat(0);
00322 
00323               for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00324                 {
00325                   new_args(0) = cellstr(i);
00326                   octave_value_list tmp = octregexp (new_args, nargout, who,
00327                                                      case_insensitive);
00328 
00329                   if (error_state)
00330                     break;
00331 
00332                   for (int j = 0; j < nargout; j++)
00333                     newretval[j](i) = tmp(j);
00334                 }
00335             }
00336           else if (cellstr.numel () == 1)
00337             {
00338               for (int j = 0; j < nargout; j++)
00339                 newretval[j].resize (cellpat.dims ());
00340 
00341               new_args(0) = cellstr(0);
00342 
00343               for (octave_idx_type i = 0; i < cellpat.numel (); i++)
00344                 {
00345                   new_args(1) = cellpat(i);
00346                   octave_value_list tmp = octregexp (new_args, nargout, who,
00347                                                      case_insensitive);
00348 
00349                   if (error_state)
00350                     break;
00351 
00352                   for (int j = 0; j < nargout; j++)
00353                     newretval[j](i) = tmp(j);
00354                 }
00355             }
00356           else if (cellstr.numel () == cellpat.numel ())
00357             {
00358 
00359               if (cellstr.dims () != cellpat.dims ())
00360                 error ("%s: inconsistent cell array dimensions", who.c_str ());
00361               else
00362                 {
00363                   for (int j = 0; j < nargout; j++)
00364                     newretval[j].resize (cellstr.dims ());
00365 
00366                   for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00367                     {
00368                       new_args(0) = cellstr(i);
00369                       new_args(1) = cellpat(i);
00370 
00371                       octave_value_list tmp = octregexp (new_args, nargout, who,
00372                                                          case_insensitive);
00373 
00374                       if (error_state)
00375                         break;
00376 
00377                       for (int j = 0; j < nargout; j++)
00378                         newretval[j](i) = tmp(j);
00379                     }
00380                 }
00381             }
00382           else
00383             error ("regexp: cell array arguments must be scalar or equal size");
00384         }
00385       else
00386         {
00387           for (int j = 0; j < nargout; j++)
00388             newretval[j].resize (cellstr.dims ());
00389 
00390           for (octave_idx_type i = 0; i < cellstr.numel (); i++)
00391             {
00392               new_args(0) = cellstr(i);
00393               octave_value_list tmp = octregexp (new_args, nargout, who,
00394                                                  case_insensitive);
00395 
00396               if (error_state)
00397                 break;
00398 
00399               for (int j = 0; j < nargout; j++)
00400                 newretval[j](i) = tmp(j);
00401             }
00402         }
00403 
00404       if (!error_state)
00405         for (int j = 0; j < nargout; j++)
00406           retval(j) = octave_value (newretval[j]);
00407     }
00408   else if (args(1).is_cell ())
00409     {
00410       OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
00411       octave_value_list new_args = args;
00412       Cell cellpat = args(1).cell_value ();
00413 
00414       for (int j = 0; j < nargout; j++)
00415         newretval[j].resize(cellpat.dims ());
00416 
00417       for (octave_idx_type i = 0; i < cellpat.numel (); i++)
00418         {
00419           new_args(1) = cellpat(i);
00420           octave_value_list tmp = octregexp (new_args, nargout, who,
00421                                              case_insensitive);
00422 
00423           if (error_state)
00424             break;
00425 
00426           for (int j = 0; j < nargout; j++)
00427             newretval[j](i) = tmp(j);
00428         }
00429 
00430       if (!error_state)
00431         {
00432           for (int j = 0; j < nargout; j++)
00433             retval(j) = octave_value (newretval[j]);
00434         }
00435     }
00436   else
00437     retval = octregexp (args, nargout, who, case_insensitive);
00438 
00439   return retval;
00440 
00441 }
00442 
00443 DEFUN_DLD (regexp, args, nargout,
00444   "-*- texinfo -*-\n\
00445 @deftypefn  {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\
00446 @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
00447 Regular expression string matching.  Search for @var{pat} in @var{str} and\n\
00448 return the positions and substrings of any matches, or empty values if there\n\
00449 are none.\n\
00450 \n\
00451 The matched pattern @var{pat} can include any of the standard regex\n\
00452 operators, including:\n\
00453 \n\
00454 @table @code\n\
00455 @item .\n\
00456 Match any character\n\
00457 \n\
00458 @item * + ? @{@}\n\
00459 Repetition operators, representing\n\
00460 @table @code\n\
00461 @item *\n\
00462 Match zero or more times\n\
00463 \n\
00464 @item +\n\
00465 Match one or more times\n\
00466 \n\
00467 @item ?\n\
00468 Match zero or one times\n\
00469 \n\
00470 @item @{@var{n}@}\n\
00471 Match exactly @var{n} times\n\
00472 \n\
00473 @item @{@var{n},@}\n\
00474 Match @var{n} or more times\n\
00475 \n\
00476 @item @{@var{m},@var{n}@}\n\
00477 Match between @var{m} and @var{n} times\n\
00478 @end table\n\
00479 \n\
00480 @item [@dots{}] [^@dots{}]\n\
00481 \n\
00482 List operators.  The pattern will match any character listed between \"[\"\n\
00483 and \"]\".  If the first character is \"^\" then the pattern is inverted and\n\
00484 any character except those listed between brackets will match.\n\
00485 \n\
00486 Escape sequences defined below can also be used inside list\n\
00487 operators.  For example, a template for a floating point number might be\n\
00488 @code{[-+.\\d]+}.\n\
00489 \n\
00490 @item ()\n\
00491 Grouping operator\n\
00492 \n\
00493 @item |\n\
00494 Alternation operator.  Match one of a choice of regular expressions.  The\n\
00495 alternatives must be delimited by the grouping operator @code{()} above.\n\
00496 \n\
00497 @item ^ $\n\
00498 Anchoring operators.  Requires pattern to occur at the start (@code{^}) or\n\
00499 end (@code{$}) of the string.\n\
00500 @end table\n\
00501 \n\
00502 In addition, the following escaped characters have special meaning.  Note,\n\
00503 it is recommended to quote @var{pat} in single quotes, rather than double\n\
00504 quotes, to avoid the escape sequences being interpreted by Octave before\n\
00505 being passed to @code{regexp}.\n\
00506 \n\
00507 @table @code\n\
00508 @item \\b\n\
00509 Match a word boundary\n\
00510 \n\
00511 @item \\B\n\
00512 Match within a word\n\
00513 \n\
00514 @item \\w\n\
00515 Match any word character\n\
00516 \n\
00517 @item \\W\n\
00518 Match any non-word character\n\
00519 \n\
00520 @item \\<\n\
00521 Match the beginning of a word\n\
00522 \n\
00523 @item \\>\n\
00524 Match the end of a word\n\
00525 \n\
00526 @item \\s\n\
00527 Match any whitespace character\n\
00528 \n\
00529 @item \\S\n\
00530 Match any non-whitespace character\n\
00531 \n\
00532 @item \\d\n\
00533 Match any digit\n\
00534 \n\
00535 @item \\D\n\
00536 Match any non-digit\n\
00537 @end table\n\
00538 \n\
00539 The outputs of @code{regexp} default to the order given below\n\
00540 \n\
00541 @table @var\n\
00542 @item s\n\
00543 The start indices of each matching substring\n\
00544 \n\
00545 @item e\n\
00546 The end indices of each matching substring\n\
00547 \n\
00548 @item te\n\
00549 The extents of each matched token surrounded by @code{(@dots{})} in\n\
00550 @var{pat}\n\
00551 \n\
00552 @item m\n\
00553 A cell array of the text of each match\n\
00554 \n\
00555 @item t\n\
00556 A cell array of the text of each token matched\n\
00557 \n\
00558 @item nm\n\
00559 A structure containing the text of each matched named token, with the name\n\
00560 being used as the fieldname.  A named token is denoted by\n\
00561 @code{(?<name>@dots{})}.\n\
00562 \n\
00563 @item sp\n\
00564 A cell array of the text not returned by match.\n\
00565 @end table\n\
00566 \n\
00567 Particular output arguments, or the order of the output arguments, can be\n\
00568 selected by additional @var{opt} arguments.  These are strings and the\n\
00569 correspondence between the output arguments and the optional argument\n\
00570 are\n\
00571 \n\
00572 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
00573 @item @tab 'start'        @tab @var{s}  @tab\n\
00574 @item @tab 'end'          @tab @var{e}  @tab\n\
00575 @item @tab 'tokenExtents' @tab @var{te} @tab\n\
00576 @item @tab 'match'        @tab @var{m}  @tab\n\
00577 @item @tab 'tokens'       @tab @var{t}  @tab\n\
00578 @item @tab 'names'        @tab @var{nm} @tab\n\
00579 @item @tab 'split'        @tab @var{sp} @tab\n\
00580 @end multitable\n\
00581 \n\
00582 Additional arguments are summarized below.\n\
00583 \n\
00584 @table @samp\n\
00585 @item once\n\
00586 Return only the first occurrence of the pattern.\n\
00587 \n\
00588 @item matchcase\n\
00589 Make the matching case sensitive.  (default)\n\
00590 \n\
00591 Alternatively, use (?-i) in the pattern.\n\
00592 \n\
00593 @item ignorecase\n\
00594 Ignore case when matching the pattern to the string.\n\
00595 \n\
00596 Alternatively, use (?i) in the pattern.\n\
00597 \n\
00598 @item stringanchors\n\
00599 Match the anchor characters at the beginning and end of the string.\n\
00600 (default)\n\
00601 \n\
00602 Alternatively, use (?-m) in the pattern.\n\
00603 \n\
00604 @item lineanchors\n\
00605 Match the anchor characters at the beginning and end of the line.\n\
00606 \n\
00607 Alternatively, use (?m) in the pattern.\n\
00608 \n\
00609 @item dotall\n\
00610 The pattern @code{.} matches all characters including the newline character.\n\
00611  (default)\n\
00612 \n\
00613 Alternatively, use (?s) in the pattern.\n\
00614 \n\
00615 @item dotexceptnewline\n\
00616 The pattern @code{.} matches all characters except the newline character.\n\
00617 \n\
00618 Alternatively, use (?-s) in the pattern.\n\
00619 \n\
00620 @item literalspacing\n\
00621 All characters in the pattern, including whitespace, are significant and are\n\
00622 used in pattern matching.  (default)\n\
00623 \n\
00624 Alternatively, use (?-x) in the pattern.\n\
00625 \n\
00626 @item freespacing\n\
00627 The pattern may include arbitrary whitespace and also comments beginning with\n\
00628 the character @samp{#}.\n\
00629 \n\
00630 Alternatively, use (?x) in the pattern.\n\
00631 \n\
00632 @end table\n\
00633 @seealso{regexpi, strfind, regexprep}\n\
00634 @end deftypefn")
00635 {
00636   octave_value_list retval;
00637 
00638   int nargin = args.length ();
00639 
00640   if (nargin < 2)
00641     print_usage ();
00642   else if (args(0).is_cell () || args(1).is_cell ())
00643     retval = octcellregexp (args, nargout, "regexp");
00644   else
00645     retval = octregexp (args, nargout, "regexp");
00646 
00647   return retval;
00648 }
00649 
00650 /*
00651 
00652 ## PCRE_ERROR_MATCHLIMIT test
00653 %!test
00654 %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
00655 %! ws = warning("query");
00656 %! unwind_protect
00657 %!   warning("off");
00658 %!   regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n');
00659 %! unwind_protect_cleanup
00660 %!   warning(ws);
00661 %! end_unwind_protect
00662 
00663 ## seg-fault test
00664 %!assert(regexp("abcde","."),[1,2,3,4,5])
00665 ## Infinite loop test
00666 %!assert (isempty (regexp("abcde", "")))
00667 
00668 ## Check that anchoring of pattern works correctly
00669 %!assert(regexp('abcabc','^abc'),1);
00670 %!assert(regexp('abcabc','abc$'),4);
00671 %!assert(regexp('abcabc','^abc$'),zeros(1,0));
00672 
00673 %!test
00674 %! [s, e, te, m, t] = regexp(' No Match ', 'f(.*)uck');
00675 %! assert (s,zeros(1,0))
00676 %! assert (e,zeros(1,0))
00677 %! assert (te,cell(1,0))
00678 %! assert (m, cell(1,0))
00679 %! assert (t, cell(1,0))
00680 
00681 %!test
00682 %! [s, e, te, m, t] = regexp(' FiRetrUck ', 'f(.*)uck');
00683 %! assert (s,zeros(1,0))
00684 %! assert (e,zeros(1,0))
00685 %! assert (te,cell(1,0))
00686 %! assert (m, cell(1,0))
00687 %! assert (t, cell(1,0))
00688 
00689 %!test
00690 %! [s, e, te, m, t] = regexp(' firetruck ', 'f(.*)uck');
00691 %! assert (s,2)
00692 %! assert (e,10)
00693 %! assert (te{1},[3,7])
00694 %! assert (m{1}, 'firetruck')
00695 %! assert (t{1}{1}, 'iretr')
00696 
00697 %!test
00698 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*');
00699 %! assert (s,[1,12])
00700 %! assert (e,[5,17])
00701 %! assert (size(te), [1,2])
00702 %! assert (isempty(te{1}))
00703 %! assert (isempty(te{2}))
00704 %! assert (m{1},'short')
00705 %! assert (m{2},'string')
00706 %! assert (size(t), [1,2])
00707 %! assert (isempty(t{1}))
00708 %! assert (isempty(t{2}))
00709 
00710 %!test
00711 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once');
00712 %! assert (s,1)
00713 %! assert (e,5)
00714 %! assert (isempty(te))
00715 %! assert (m,'short')
00716 %! assert (isempty(t))
00717 
00718 %!test
00719 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
00720 %! assert (s,1)
00721 %! assert (e,5)
00722 %! assert (isempty(te))
00723 %! assert (m,'short')
00724 %! assert (isempty(t))
00725 
00726 %!test
00727 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)');
00728 %! assert (s,1)
00729 %! assert (e,10)
00730 %! assert (size(te), [1,1])
00731 %! assert (te{1}, [1 5; 7, 10])
00732 %! assert (m{1},'short test')
00733 %! assert (size(t),[1,1])
00734 %! assert (t{1}{1},'short')
00735 %! assert (t{1}{2},'test')
00736 %! assert (size(nm), [1,1])
00737 %! assert (!isempty(fieldnames(nm)))
00738 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
00739 %! assert (nm.word1,'short')
00740 %! assert (nm.word2,'test')
00741 
00742 %!test
00743 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
00744 %! assert (s,1)
00745 %! assert (e,10)
00746 %! assert (size(te), [1,1])
00747 %! assert (te{1}, [1 5; 7, 10])
00748 %! assert (m{1},'short test')
00749 %! assert (size(t),[1,1])
00750 %! assert (t{1}{1},'short')
00751 %! assert (t{1}{2},'test')
00752 %! assert (size(nm), [1,1])
00753 %! assert (!isempty(fieldnames(nm)))
00754 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
00755 %! assert (nm.word1,'short')
00756 %! assert (nm.word2,'test')
00757 
00758 %!test
00759 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names');
00760 %! assert (size(t), [1,2]);
00761 %! assert (t{1}{1},'John');
00762 %! assert (t{1}{2},'Davis');
00763 %! assert (t{2}{1},'Rogers');
00764 %! assert (t{2}{2},'James');
00765 %! assert (size(nm), [1,1]);
00766 %! assert (nm.first{1},'John');
00767 %! assert (nm.first{2},'James');
00768 %! assert (nm.last{1},'Davis');
00769 %! assert (nm.last{2},'Rogers');
00770 
00771 ## Tests for named tokens
00772 %!test
00773 %! # Parenthesis in named token (ie (int)) causes a problem
00774 %! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
00775 
00776 %!test
00777 %! ## Mix of named and unnamed tokens can cause segfault (bug #35683)
00778 %! str = "abcde";
00779 %! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
00780 %! tokens = regexp (str, ptn, "names");
00781 %! assert (isstruct (tokens) && numel (tokens) == 1);
00782 %! assert (tokens.T1, "a");
00783 %! assert (tokens.T2, "de");
00784 
00785 %!assert(regexp("abc\nabc",'.'),[1:7])
00786 %!assert(regexp("abc\nabc",'.','dotall'),[1:7])
00787 %!test
00788 %! assert(regexp("abc\nabc",'(?s).'),[1:7])
00789 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
00790 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
00791 
00792 %!assert(regexp("caseCaSe",'case'),1)
00793 %!assert(regexp("caseCaSe",'case',"matchcase"),1)
00794 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5])
00795 %!test
00796 %! assert(regexp("caseCaSe",'(?-i)case'),1)
00797 %! assert(regexp("caseCaSe",'(?i)case'),[1,5])
00798 
00799 %!assert (regexp("abc\nabc",'c$'),7)
00800 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7)
00801 %!test
00802 %! assert (regexp("abc\nabc",'(?-m)c$'),7)
00803 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7])
00804 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7])
00805 
00806 %!assert (regexp("this word",'s w'),4)
00807 %!assert (regexp("this word",'s w','literalspacing'),4)
00808 %!test
00809 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4)
00810 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0))
00811 %! assert (regexp("this word",'(?x)s w'),zeros(1,0))
00812 
00813 %!error regexp('string', 'tri', 'BadArg');
00814 %!error regexp('string');
00815 
00816 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)})
00817 %!assert(regexp({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)})
00818 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]})
00819 %!assert(regexp('Strings',{'t','s'}),{2,7})
00820 
00821 ## Test case for lookaround operators
00822 %!test
00823 %! assert(regexp('Iraq','q(?!u)'),4)
00824 %! assert(regexp('quit','q(?!u)'), zeros(1,0))
00825 %! assert(regexp('quit','q(?=u)','match'), {'q'})
00826 %! assert(regexp("quit",'q(?=u+)','match'), {'q'})
00827 %! assert(regexp("qit",'q(?=u+)','match'), cell(1,0))
00828 %! assert(regexp("qit",'q(?=u*)','match'), {'q'})
00829 %! assert(regexp('thingamabob','(?<=a)b'), 9)
00830 
00831 ## Tests for split option.
00832 %!shared str
00833 %! str = "foo bar foo";
00834 %!test
00835 %! [a, b] = regexp (str, "f..", "match", "split");
00836 %! assert (a, {"foo", "foo"});
00837 %! assert (b, {"", " bar ", ""});
00838 %!test
00839 %! [a, b] = regexp (str, "f..", "match", "split", "once");
00840 %! assert (a, "foo");
00841 %! assert (b, {"", " bar foo"});
00842 %!test
00843 %! [a, b] = regexp (str, "fx.", "match", "split");
00844 %! assert (a, cell (1, 0));
00845 %! assert (b, {"foo bar foo"});
00846 %!test
00847 %! [a, b] = regexp (str, "fx.", "match", "split", "once");
00848 %! assert (a, "");
00849 %! assert (b, "foo bar foo")
00850 
00851 %!shared str
00852 %! str = "foo bar";
00853 %!test
00854 %! [a, b] = regexp (str, "f..", "match", "split");
00855 %! assert (a, {"foo"});
00856 %! assert (b, {"", " bar"});
00857 %!test
00858 %! [a, b] = regexp (str, "b..", "match", "split");
00859 %! assert (a, {"bar"});
00860 %! assert (b, {"foo ", ""});
00861 %!test
00862 %! [a, b] = regexp (str, "x", "match", "split");
00863 %! assert (a, cell (1, 0));
00864 %! assert (b, {"foo bar"});
00865 %!test
00866 %! [a, b] = regexp (str, "[o]+", "match", "split");
00867 %! assert (a, {"oo"});
00868 %! assert (b, {"f", " bar"});
00869 
00870 */
00871 
00872 DEFUN_DLD (regexpi, args, nargout,
00873   "-*- texinfo -*-\n\
00874 @deftypefn  {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexpi (@var{str}, @var{pat})\n\
00875 @deftypefnx {Loadable Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
00876 \n\
00877 Case insensitive regular expression string matching.  Search for @var{pat} in\n\
00878 @var{str} and return the positions and substrings of any matches, or empty\n\
00879 values if there are none.  @xref{doc-regexp,,regexp}, for details on the\n\
00880 syntax of the search pattern.\n\
00881 @seealso{regexp}\n\
00882 @end deftypefn")
00883 {
00884   octave_value_list retval;
00885 
00886   int nargin = args.length ();
00887 
00888   if (nargin < 2)
00889     print_usage ();
00890   else if (args(0).is_cell () || args(1).is_cell ())
00891     retval = octcellregexp (args, nargout, "regexpi", true);
00892   else
00893     retval = octregexp (args, nargout, "regexpi", true);
00894 
00895   return retval;
00896 }
00897 
00898 /*
00899 
00900 ## seg-fault test
00901 %!assert(regexpi("abcde","."),[1,2,3,4,5])
00902 
00903 ## Check that anchoring of pattern works correctly
00904 %!assert(regexpi('abcabc','^ABC'),1);
00905 %!assert(regexpi('abcabc','ABC$'),4);
00906 %!assert(regexpi('abcabc','^ABC$'),zeros(1,0));
00907 
00908 %!test
00909 %! [s, e, te, m, t] = regexpi(' No Match ', 'f(.*)uck');
00910 %! assert (s,zeros(1,0))
00911 %! assert (e,zeros(1,0))
00912 %! assert (te,cell(1,0))
00913 %! assert (m, cell(1,0))
00914 %! assert (t, cell(1,0))
00915 
00916 %!test
00917 %! [s, e, te, m, t] = regexpi(' FiRetrUck ', 'f(.*)uck');
00918 %! assert (s,2)
00919 %! assert (e,10)
00920 %! assert (te{1},[3,7])
00921 %! assert (m{1}, 'FiRetrUck')
00922 %! assert (t{1}{1}, 'iRetr')
00923 
00924 %!test
00925 %! [s, e, te, m, t] = regexpi(' firetruck ', 'f(.*)uck');
00926 %! assert (s,2)
00927 %! assert (e,10)
00928 %! assert (te{1},[3,7])
00929 %! assert (m{1}, 'firetruck')
00930 %! assert (t{1}{1}, 'iretr')
00931 
00932 %!test
00933 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*');
00934 %! assert (s,[1,12])
00935 %! assert (e,[5,17])
00936 %! assert (size(te), [1,2])
00937 %! assert (isempty(te{1}))
00938 %! assert (isempty(te{2}))
00939 %! assert (m{1},'ShoRt')
00940 %! assert (m{2},'String')
00941 %! assert (size(t), [1,2])
00942 %! assert (isempty(t{1}))
00943 %! assert (isempty(t{2}))
00944 
00945 %!test
00946 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once');
00947 %! assert (s,1)
00948 %! assert (e,5)
00949 %! assert (isempty(te))
00950 %! assert (m,'ShoRt')
00951 %! assert (isempty(t))
00952 
00953 %!test
00954 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
00955 %! assert (s,1)
00956 %! assert (e,5)
00957 %! assert (isempty(te))
00958 %! assert (m,'ShoRt')
00959 %! assert (isempty(t))
00960 
00961 %!test
00962 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)');
00963 %! assert (s,1)
00964 %! assert (e,10)
00965 %! assert (size(te), [1,1])
00966 %! assert (te{1}, [1 5; 7, 10])
00967 %! assert (m{1},'ShoRt Test')
00968 %! assert (size(t),[1,1])
00969 %! assert (t{1}{1},'ShoRt')
00970 %! assert (t{1}{2},'Test')
00971 %! assert (size(nm), [1,1])
00972 %! assert (!isempty(fieldnames(nm)))
00973 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
00974 %! assert (nm.word1,'ShoRt')
00975 %! assert (nm.word2,'Test')
00976 
00977 %!test
00978 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
00979 %! assert (s,1)
00980 %! assert (e,10)
00981 %! assert (size(te), [1,1])
00982 %! assert (te{1}, [1 5; 7, 10])
00983 %! assert (m{1},'ShoRt Test')
00984 %! assert (size(t),[1,1])
00985 %! assert (t{1}{1},'ShoRt')
00986 %! assert (t{1}{2},'Test')
00987 %! assert (size(nm), [1,1])
00988 %! assert (!isempty(fieldnames(nm)))
00989 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
00990 %! assert (nm.word1,'ShoRt')
00991 %! assert (nm.word2,'Test')
00992 
00993 %!assert(regexpi("abc\nabc",'.'),[1:7])
00994 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7])
00995 %!test
00996 %! assert(regexpi("abc\nabc",'(?s).'),[1:7])
00997 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
00998 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
00999 
01000 %!assert(regexpi("caseCaSe",'case'),[1,5])
01001 %!assert(regexpi("caseCaSe",'case',"matchcase"),1)
01002 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5])
01003 %!test
01004 %! assert(regexpi("caseCaSe",'(?-i)case'),1)
01005 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5])
01006 
01007 %!assert (regexpi("abc\nabc",'C$'),7)
01008 %!assert (regexpi("abc\nabc",'C$',"stringanchors"),7)
01009 %!test
01010 %! assert (regexpi("abc\nabc",'(?-m)C$'),7)
01011 %! assert (regexpi("abc\nabc",'C$',"lineanchors"),[3,7])
01012 %! assert (regexpi("abc\nabc",'(?m)C$'),[3,7])
01013 
01014 %!assert (regexpi("this word",'S w'),4)
01015 %!assert (regexpi("this word",'S w','literalspacing'),4)
01016 %!test
01017 %! assert (regexpi("this word",'(?-x)S w','literalspacing'),4)
01018 %! assert (regexpi("this word",'S w','freespacing'),zeros(1,0))
01019 %! assert (regexpi("this word",'(?x)S w'),zeros(1,0))
01020 
01021 %!error regexpi('string', 'tri', 'BadArg');
01022 %!error regexpi('string');
01023 
01024 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)})
01025 %!assert(regexpi({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)})
01026 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]})
01027 %!assert(regexpi('Strings',{'t','s'}),{2,[1,7]})
01028 
01029 */
01030 
01031 
01032 static octave_value
01033 octregexprep (const octave_value_list &args, const std::string &who)
01034 {
01035   octave_value retval;
01036 
01037   int nargin = args.length ();
01038 
01039   // Make sure we have string, pattern, replacement
01040   const std::string buffer = args(0).string_value ();
01041   if (error_state)
01042     return retval;
01043 
01044   const std::string pattern = args(1).string_value ();
01045   if (error_state)
01046     return retval;
01047 
01048   const std::string replacement = args(2).string_value ();
01049   if (error_state)
01050     return retval;
01051 
01052   // Pack options excluding 'tokenize' and various output
01053   // reordering strings into regexp arg list
01054   octave_value_list regexpargs (nargin-3, octave_value ());
01055 
01056   int len = 0;
01057   for (int i = 3; i < nargin; i++)
01058     {
01059       const std::string opt = args(i).string_value ();
01060       if (opt != "tokenize" && opt != "start" && opt != "end"
01061           && opt != "tokenextents" && opt != "match" && opt != "tokens"
01062           && opt != "names"  && opt != "split" && opt != "warnings")
01063         {
01064           regexpargs(len++) = args(i);
01065         }
01066     }
01067   regexpargs.resize (len);
01068 
01069   regexp::opts options;
01070   bool extra_args = false;
01071   parse_options (options, regexpargs, who, 0, extra_args);
01072   if (error_state)
01073     return retval;
01074 
01075   return regexp_replace (pattern, buffer, replacement, options, who);
01076 }
01077 
01078 DEFUN_DLD (regexprep, args, ,
01079   "-*- texinfo -*-\n\
01080 @deftypefn  {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
01081 @deftypefnx {Loadable Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
01082 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
01083 \n\
01084 The pattern is a regular expression as documented for @code{regexp}.\n\
01085 @xref{doc-regexp,,regexp}.\n\
01086 \n\
01087 The replacement string may contain @code{$i}, which substitutes\n\
01088 for the ith set of parentheses in the match string.  For example,\n\
01089 \n\
01090 @example\n\
01091 regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\
01092 @end example\n\
01093 \n\
01094 @noindent\n\
01095 returns \"Dunn, Bill\"\n\
01096 \n\
01097 Options in addition to those of @code{regexp} are\n\
01098 \n\
01099 @table @samp\n\
01100 \n\
01101 @item once\n\
01102 Replace only the first occurrence of @var{pat} in the result.\n\
01103 \n\
01104 @item warnings\n\
01105 This option is present for compatibility but is ignored.\n\
01106 \n\
01107 @end table\n\
01108 @seealso{regexp, regexpi, strrep}\n\
01109 @end deftypefn")
01110 {
01111   octave_value_list retval;
01112   int nargin = args.length ();
01113 
01114   if (nargin < 3)
01115     {
01116       print_usage ();
01117       return retval;
01118     }
01119 
01120   if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
01121     {
01122       Cell str;
01123       Cell pat;
01124       Cell rep;
01125       dim_vector dv0;
01126       dim_vector dv1 (1, 1);
01127 
01128       if (args(0).is_cell ())
01129         str = args(0).cell_value ();
01130       else
01131         str = Cell (args(0));
01132 
01133       if (args(1).is_cell ())
01134         pat = args(1).cell_value ();
01135       else
01136         pat = Cell (args(1));
01137 
01138       if (args(2).is_cell ())
01139         rep = args(2).cell_value ();
01140       else
01141         rep = Cell (args(2));
01142 
01143       dv0 = str.dims ();
01144       if (pat.numel () != 1)
01145         {
01146           dv1 = pat.dims ();
01147           if (rep.numel () != 1 && dv1 != rep.dims ())
01148             error ("regexprep: inconsistent cell array dimensions");
01149         }
01150       else if (rep.numel () != 1)
01151         dv1 = rep.dims ();
01152 
01153       if (!error_state)
01154         {
01155           Cell ret (dv0);
01156           octave_value_list new_args = args;
01157 
01158           for (octave_idx_type i = 0; i < dv0.numel (); i++)
01159             {
01160               new_args(0) = str(i);
01161               if (pat.numel() == 1)
01162                 new_args(1) = pat(0);
01163               if (rep.numel() == 1)
01164                 new_args(2) = rep(0);
01165 
01166               for (octave_idx_type j = 0; j < dv1.numel (); j++)
01167                 {
01168                   if (pat.numel () != 1)
01169                     new_args(1) = pat(j);
01170                   if (rep.numel () != 1)
01171                     new_args(2) = rep(j);
01172                   new_args(0) = octregexprep (new_args, "regexprep");
01173 
01174                   if (error_state)
01175                     break;
01176                 }
01177 
01178               if (error_state)
01179                 break;
01180 
01181               ret(i) = new_args(0);
01182             }
01183 
01184           if (!error_state)
01185             retval = args(0).is_cell ()
01186               ? octave_value (ret) : octave_value (ret(0));
01187         }
01188     }
01189   else
01190     retval = octregexprep (args, "regexprep");
01191 
01192   return retval;
01193 }
01194 
01195 /*
01196 %!test  # Replace with empty
01197 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
01198 %! t = regexprep(xml,'<[!?][^>]*>','');
01199 %! assert(t,' <tag v="hello">some stuff</tag>')
01200 
01201 %!test  # Replace with non-empty
01202 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
01203 %! t = regexprep(xml,'<[!?][^>]*>','?');
01204 %! assert(t,'? <tag v="hello">some stuff?</tag>')
01205 
01206 %!test  # Check that 'tokenize' is ignored
01207 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
01208 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize');
01209 %! assert(t,' <tag v="hello">some stuff</tag>')
01210 
01211 ## Test capture replacement
01212 %!test
01213 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
01214 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
01215 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1');
01216 %! assert(t,result)
01217 
01218 ## Return the original if no match
01219 %!assert(regexprep('hello','world','earth'),'hello')
01220 
01221 ## Test a general replacement
01222 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g");
01223 
01224 ## Make sure it works at the beginning and end
01225 %!assert(regexprep("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g");
01226 %!assert(regexprep("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_");
01227 
01228 ## Options
01229 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g");
01230 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g");
01231 
01232 ## Option combinations
01233 %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g");
01234 
01235 ## End conditions on replacement
01236 %!assert(regexprep("abc","(b)",".$1"),"a.bc");
01237 %!assert(regexprep("abc","(b)","$1"),"abc");
01238 %!assert(regexprep("abc","(b)","$1."),"ab.c");
01239 %!assert(regexprep("abc","(b)","$1.."),"ab..c");
01240 
01241 ## Test cell array arguments
01242 %!assert(regexprep("abc",{"b","a"},"?"),"??c")
01243 %!assert(regexprep({"abc","cba"},"b","?"),{"a?c","c?a"})
01244 %!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"!?c","c?!"})
01245 
01246 # Nasty lookbehind expression
01247 %!test
01248 %! assert(regexprep('x^(-1)+y(-1)+z(-1)=0','(?<=[a-z]+)\(\-[1-9]*\)','_minus1'),'x^(-1)+y_minus1+z_minus1=0')
01249 
01250 */