34 #if defined (HAVE_PCRE_H)
36 #elif defined (HAVE_PCRE_PCRE_H)
37 #include <pcre/pcre.h>
50 #define PCRE_MATCHLIMIT_MAX 10
53 #define MAXLOOKBEHIND 10
64 pcre_free (static_cast<pcre *> (
data));
78 std::ostringstream buf;
80 while ((new_pos =
pattern.find (
"(?", pos)) != std::string::npos)
82 if (
pattern.at (new_pos + 2) ==
'<'
83 && !(
pattern.at (new_pos + 3) ==
'='
84 ||
pattern.at (new_pos + 3) ==
'!'))
95 size_t tmp_pos =
pattern.find_first_of (
'>', new_pos);
97 if (tmp_pos == std::string::npos)
99 (*current_liboctave_error_handler)
100 (
"regexp: syntax error in pattern");
104 std::string tmp_name =
105 pattern.substr (new_pos+3, tmp_pos-new_pos-3);
109 for (
int i = 0; i <
nnames; i++)
128 if (new_pos - pos > 0)
129 buf <<
pattern.substr (pos, new_pos-pos);
131 buf <<
"(?P<n00" << inames++;
132 else if (inames < 100)
133 buf <<
"(?P<n0" << inames++;
135 buf <<
"(?P<n" << inames++;
139 else if (
pattern.at (new_pos + 2) ==
'<')
148 size_t tmp_pos1 = new_pos + 2;
149 size_t tmp_pos2 = tmp_pos1;
151 while (tmp_pos1 <
pattern.length () && brackets > 0)
153 char ch =
pattern.at (tmp_pos1);
170 buf <<
pattern.substr (pos, new_pos - pos) <<
"(?";
175 size_t tmp_pos3 =
pattern.find_first_of (
"*+", tmp_pos2);
177 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
182 (*current_liboctave_warning_with_id_handler)
183 (
"Octave:regexp-lookbehind-limit",
184 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
188 buf <<
pattern.substr (pos, new_pos - pos) <<
"(";
192 if (
pattern.at (tmp_pos3) ==
'*')
197 for (; i < max_length + 1; i++)
199 buf <<
pattern.substr (new_pos, tmp_pos3 - new_pos)
201 buf <<
pattern.substr (tmp_pos3 + 1,
202 tmp_pos1 - tmp_pos3 - 1);
209 buf <<
pattern.substr (pos, tmp_pos1 - pos);
216 buf <<
pattern.substr (pos, new_pos - pos) <<
"(?";
226 std::string buf_str = buf.str ();
234 data = pcre_compile (buf_str.c_str (), pcre_options, &err, &erroffset, 0);
237 (*current_liboctave_error_handler)
238 (
"%s: %s at position %d of expression",
who.c_str (),
247 std::list<regexp::match_element> lst;
255 pcre *re =
static_cast<pcre *
> (
data);
257 pcre_fullinfo (re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns);
258 pcre_fullinfo (re, 0, PCRE_INFO_NAMECOUNT, &namecount);
259 pcre_fullinfo (re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
260 pcre_fullinfo (re, 0, PCRE_INFO_NAMETABLE, &nametable);
265 for (
int i = 0; i < namecount; i++)
269 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
270 |
static_cast<int> (nametable[i*nameentrysize+1]);
277 int matches = pcre_exec (re, 0, buffer.c_str (),
278 buffer.length (), idx,
279 (idx ? PCRE_NOTBOL : 0),
280 ovector, (subpatterns+1)*3);
282 if (matches == PCRE_ERROR_MATCHLIMIT)
286 (*current_liboctave_warning_with_id_handler)
287 (
"Octave:regexp-match-limit",
288 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
292 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
293 static_cast<void *> (&pe.match_limit));
295 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
298 while (matches == PCRE_ERROR_MATCHLIMIT
303 pe.match_limit *= 10;
304 matches = pcre_exec (re, &pe, buffer.c_str (),
305 buffer.length (), idx,
306 (idx ? PCRE_NOTBOL : 0),
307 ovector, (subpatterns+1)*3);
311 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
313 (*current_liboctave_error_handler)
314 (
"%s: internal error calling pcre_exec; error code from pcre_exec is %i",
315 who.c_str (), matches);
318 else if (matches == PCRE_ERROR_NOMATCH)
323 idx = ovector[0] + 1;
324 if (idx < buffer.length ())
332 Matrix token_extents (matches-1, 2);
334 for (
int i = 1; i < matches; i++)
336 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0
337 && (i == 1 || ovector[2*i] != ovector[2*i-2]
338 || ovector[2*i-1] != ovector[2*i+1]))
340 token_extents(pos_match,0) =
double (ovector[2*i]+1);
341 token_extents(pos_match++,1) =
double (ovector[2*i+1]);
345 token_extents.
resize (pos_match, 2);
347 double start =
double (ovector[0]+1);
348 double end =
double (ovector[1]);
350 const char **listptr;
351 int status = pcre_get_substring_list (buffer.c_str (), ovector,
354 if (status == PCRE_ERROR_NOMEMORY)
356 (*current_liboctave_error_handler)
357 (
"%s: cannot allocate memory in pcre_get_substring_list",
367 for (
int i = 1; i < matches; i++)
369 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
371 if (i == 1 || ovector[2*i] != ovector[2*i-2]
372 || ovector[2*i-1] != ovector[2*i+1])
380 for (
int j = 0; j < namecount; j++)
385 std::string (*(listptr+i-pos_offset));
391 tokens(pos_match++) = std::string (*(listptr+i));
398 std::string match_string = std::string (*listptr);
400 pcre_free_substring_list (listptr);
403 token_extents, start, end);
404 lst.push_back (new_elem);
406 if (ovector[1] <= ovector[0])
409 idx = ovector[0] + 1;
410 if (idx <= buffer.length ())
431 return rx_lst.
size () > 0;
462 size_t num_matches = rx_lst.
size ();
464 if (num_matches == 0)
478 std::string repstr = replacement;
479 std::vector<rep_token_t> tokens;
482 for (
size_t i=0; i < repstr.size (); i++)
484 if (repstr[i] ==
'\\')
486 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
492 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
498 else if (repstr[i] ==
'$')
500 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
505 tmp_token.
num = repstr[i+1]-
'0';
506 tokens.push_back (tmp_token);
512 int num_tokens = tokens.size ();
517 const size_t replen = repstr.size () - 2*num_tokens;
520 for (
size_t i = 0; i < num_matches; i++)
524 double start = p->start ();
525 double end = p->end ();
527 const Matrix pairs (p->token_extents ());
529 for (
int j = 0; j < num_tokens; j++)
531 if (tokens[j].num == 0)
532 pairlen +=
static_cast<size_t> (end - start) + 1;
533 else if (tokens[j].num <= pairs.rows ())
534 pairlen += static_cast<size_t> (pairs(tokens[j].num-1,1)
535 - pairs(tokens[j].num-1,0)) + 1;
537 delta += (
static_cast<int> (replen + pairlen)
538 - static_cast<int> (end - start + 1));
543 rep.reserve (buffer.size () + delta);
546 for (
size_t i = 0; i < num_matches; i++)
550 double start = p->start ();
551 double end = p->end ();
553 const Matrix pairs (p->token_extents ());
554 rep.
append (&buffer[from], static_cast<size_t> (start - 1) - from);
555 from =
static_cast<size_t> (end);
559 for (
int j = 0; j < num_tokens; j++)
561 rep.
append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
562 cur_pos = tokens[j].pos+2;
564 int k = tokens[j].num;
568 rep.append (&buffer[static_cast<size_t> (end - 1)],
569 static_cast<size_t> (end - start) + 1);
571 else if (k <= pairs.rows ())
574 rep.append (&buffer[static_cast<size_t> (pairs(k-1,0)-1)],
575 static_cast<size_t> (pairs(k-1,1)
576 - pairs(k-1,0)) + 1);
583 if (cur_pos < repstr.size ())
584 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
588 rep.append (&buffer[from], buffer.size () - from);
593 const size_t replen = repstr.size ();
596 for (
size_t i = 0; i < num_matches; i++)
599 delta +=
static_cast<int> (replen)
600 - static_cast<int> (p->end () - p->start () + 1);
605 rep.reserve (buffer.size () + delta);
608 for (
size_t i = 0; i < num_matches; i++)
611 rep.append (&buffer[from],
612 static_cast<size_t> (p->start () - 1) - from);
613 from =
static_cast<size_t> (p->end ());
617 rep.append (&buffer[from], buffer.size () - from);
void emptymatch(bool val)
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
void dotexceptnewline(bool val)
void freespacing(bool val)
void compile_internal(void)
string_vector & append(const std::string &s)
void resize(const dim_vector &dv, const T &rfv)
std::list< match_element >::const_iterator const_iterator
octave_idx_type length(void) const
Number of elements in the array.
std::string replace(const std::string &buffer, const std::string &replacement)
void case_insensitive(bool val)
match_data match(const std::string &buffer)
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
void lineanchors(bool val)
#define PCRE_MATCHLIMIT_MAX
static bool lookbehind_warned
bool is_match(const std::string &buffer)
Matrix append(const Matrix &a) const