34 #if defined (HAVE_PCRE_H)
36 #elif defined (HAVE_PCRE_PCRE_H)
37 #include <pcre/pcre.h>
50 #define PCRE_MATCHLIMIT_MAX 10
53 #define MAXLOOKBEHIND 10
64 pcre_free (static_cast<pcre *> (
data));
78 std::ostringstream buf;
80 while ((new_pos =
pattern.find (
"(?", pos)) != std::string::npos)
82 if (
pattern.at (new_pos + 2) ==
'<'
83 && !(
pattern.at (new_pos + 3) ==
'='
84 ||
pattern.at (new_pos + 3) ==
'!'))
95 size_t tmp_pos =
pattern.find_first_of (
'>', new_pos);
97 if (tmp_pos == std::string::npos)
99 (*current_liboctave_error_handler)
100 (
"regexp: syntax error in pattern");
104 std::string tmp_name =
105 pattern.substr (new_pos+3, tmp_pos-new_pos-3);
109 for (
int i = 0; i <
nnames; i++)
128 if (new_pos - pos > 0)
129 buf <<
pattern.substr (pos, new_pos-pos);
131 buf <<
"(?P<n00" << inames++;
132 else if (inames < 100)
133 buf <<
"(?P<n0" << inames++;
135 buf <<
"(?P<n" << inames++;
139 else if (
pattern.at (new_pos + 2) ==
'<')
148 size_t tmp_pos1 = new_pos + 2;
149 size_t tmp_pos2 = tmp_pos1;
151 while (tmp_pos1 <
pattern.length () && brackets > 0)
153 char ch =
pattern.at (tmp_pos1);
170 buf <<
pattern.substr (pos, new_pos - pos) <<
"(?";
175 size_t tmp_pos3 =
pattern.find_first_of (
"*+", tmp_pos2);
177 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
179 if (!lookbehind_warned)
181 lookbehind_warned =
true;
182 (*current_liboctave_warning_handler)
183 (
"%s: arbitrary length lookbehind patterns are only supported up to length %d",
187 buf <<
pattern.substr (pos, new_pos - pos) <<
"(";
191 if (
pattern.at (tmp_pos3) ==
'*')
196 for (; i < max_length + 1; i++)
198 buf <<
pattern.substr (new_pos, tmp_pos3 - new_pos)
200 buf <<
pattern.substr (tmp_pos3 + 1,
201 tmp_pos1 - tmp_pos3 - 1);
208 buf <<
pattern.substr (pos, tmp_pos1 - pos);
215 buf <<
pattern.substr (pos, new_pos - pos) <<
"(?";
225 std::string buf_str = buf.str ();
233 data = pcre_compile (buf_str.c_str (), pcre_options, &err, &erroffset, 0);
236 (*current_liboctave_error_handler)
237 (
"%s: %s at position %d of expression",
who.c_str (),
246 std::list<regexp::match_element> lst;
254 pcre *re = static_cast <pcre *> (
data);
256 pcre_fullinfo (re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns);
257 pcre_fullinfo (re, 0, PCRE_INFO_NAMECOUNT, &namecount);
258 pcre_fullinfo (re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
259 pcre_fullinfo (re, 0, PCRE_INFO_NAMETABLE, &nametable);
264 for (
int i = 0; i < namecount; i++)
268 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
269 |
static_cast<int> (nametable[i*nameentrysize+1]);
276 int matches = pcre_exec (re, 0, buffer.c_str (),
277 buffer.length (), idx,
278 (idx ? PCRE_NOTBOL : 0),
279 ovector, (subpatterns+1)*3);
281 if (matches == PCRE_ERROR_MATCHLIMIT)
285 (*current_liboctave_warning_handler)
286 (
"your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
290 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
291 static_cast <void *> (&pe.match_limit));
293 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
296 while (matches == PCRE_ERROR_MATCHLIMIT
301 pe.match_limit *= 10;
302 matches = pcre_exec (re, &pe, buffer.c_str (),
303 buffer.length (), idx,
304 (idx ? PCRE_NOTBOL : 0),
305 ovector, (subpatterns+1)*3);
309 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
311 (*current_liboctave_error_handler)
312 (
"%s: internal error calling pcre_exec; error code from pcre_exec is %i",
313 who.c_str (), matches);
316 else if (matches == PCRE_ERROR_NOMATCH)
321 idx = ovector[0] + 1;
322 if (idx < buffer.length ())
330 Matrix token_extents (matches-1, 2);
332 for (
int i = 1; i < matches; i++)
334 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0
335 && (i == 1 || ovector[2*i] != ovector[2*i-2]
336 || ovector[2*i-1] != ovector[2*i+1]))
338 token_extents(pos_match,0) =
double (ovector[2*i]+1);
339 token_extents(pos_match++,1) =
double (ovector[2*i+1]);
343 token_extents.
resize (pos_match, 2);
345 double start =
double (ovector[0]+1);
346 double end =
double (ovector[1]);
348 const char **listptr;
349 int status = pcre_get_substring_list (buffer.c_str (), ovector,
352 if (status == PCRE_ERROR_NOMEMORY)
354 (*current_liboctave_error_handler)
355 (
"%s: cannot allocate memory in pcre_get_substring_list",
365 for (
int i = 1; i < matches; i++)
367 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
369 if (i == 1 || ovector[2*i] != ovector[2*i-2]
370 || ovector[2*i-1] != ovector[2*i+1])
378 for (
int j = 0; j < namecount; j++)
383 std::string (*(listptr+i-pos_offset));
389 tokens(pos_match++) = std::string (*(listptr+i));
396 std::string match_string = std::string (*listptr);
398 pcre_free_substring_list (listptr);
401 token_extents, start, end);
402 lst.push_back (new_elem);
404 if (ovector[1] <= ovector[0])
407 idx = ovector[0] + 1;
408 if (idx <= buffer.length ())
429 return rx_lst.
size () > 0;
460 size_t num_matches = rx_lst.
size ();
462 if (num_matches == 0)
476 std::string repstr = replacement;
477 std::vector<rep_token_t> tokens;
480 for (
size_t i=0; i < repstr.size (); i++)
482 if (repstr[i] ==
'\\')
484 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
490 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
496 else if (repstr[i] ==
'$')
498 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
503 tmp_token.
num = repstr[i+1]-
'0';
504 tokens.push_back (tmp_token);
510 int num_tokens = tokens.size ();
515 const size_t replen = repstr.size () - 2*num_tokens;
518 for (
size_t i = 0; i < num_matches; i++)
522 double start = p->start ();
523 double end = p->end ();
525 const Matrix pairs (p->token_extents ());
527 for (
int j = 0; j < num_tokens; j++)
529 if (tokens[j].num == 0)
530 pairlen +=
static_cast<size_t> (end - start) + 1;
531 else if (tokens[j].num <= pairs.rows ())
532 pairlen += static_cast<size_t> (pairs(tokens[j].num-1,1)
533 - pairs(tokens[j].num-1,0)) + 1;
535 delta += (
static_cast<int> (replen + pairlen)
536 - static_cast<int> (end - start + 1));
541 rep.reserve (buffer.size () + delta);
544 for (
size_t i = 0; i < num_matches; i++)
548 double start = p->start ();
549 double end = p->end ();
551 const Matrix pairs (p->token_extents ());
552 rep.
append (&buffer[from], static_cast<size_t> (start - 1) - from);
553 from =
static_cast<size_t> (end);
557 for (
int j = 0; j < num_tokens; j++)
559 rep.
append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
560 cur_pos = tokens[j].pos+2;
562 int k = tokens[j].num;
566 rep.append (&buffer[static_cast<size_t> (end - 1)],
567 static_cast<size_t> (end - start) + 1);
569 else if (k <= pairs.rows ())
572 rep.append (&buffer[static_cast<size_t> (pairs(k-1,0)-1)],
573 static_cast<size_t> (pairs(k-1,1)
574 - pairs(k-1,0)) + 1);
581 if (cur_pos < repstr.size ())
582 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
586 rep.append (&buffer[from], buffer.size () - from);
591 const size_t replen = repstr.size ();
594 for (
size_t i = 0; i < num_matches; i++)
597 delta +=
static_cast<int> (replen)
598 - static_cast<int> (p->end () - p->start () + 1);
603 rep.reserve (buffer.size () + delta);
606 for (
size_t i = 0; i < num_matches; i++)
609 rep.append (&buffer[from],
610 static_cast<size_t> (p->start () - 1) - from);
611 from =
static_cast<size_t> (p->end ());
615 rep.append (&buffer[from], buffer.size () - from);