26 #if defined (HAVE_CONFIG_H)
35 #if defined (HAVE_PCRE_H)
37 #elif defined (HAVE_PCRE_PCRE_H)
38 # include <pcre/pcre.h>
54 #define PCRE_MATCHLIMIT_MAX 10
57 #define MAXLOOKBEHIND 10
68 pcre_free (
static_cast<pcre *
> (
m_data));
82 std::ostringstream buf;
84 while ((new_pos =
m_pattern.find (
"(?", pos)) != std::string::npos)
99 size_t tmp_pos =
m_pattern.find_first_of (
'>', new_pos);
101 if (tmp_pos == std::string::npos)
102 (*current_liboctave_error_handler)
103 (
"regexp: syntax error in pattern");
106 =
m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
110 for (
int i = 0; i <
m_names; i++)
129 if (new_pos - pos > 0)
130 buf <<
m_pattern.substr (pos, new_pos-pos);
132 buf <<
"(?P<n00" << inames++;
133 else if (inames < 100)
134 buf <<
"(?P<n0" << inames++;
136 buf <<
"(?P<n" << inames++;
140 else if (
m_pattern.at (new_pos + 2) ==
'<')
149 size_t tmp_pos1 = new_pos + 2;
150 size_t tmp_pos2 = tmp_pos1;
152 while (tmp_pos1 <
m_pattern.length () && brackets > 0)
171 buf <<
m_pattern.substr (pos, new_pos - pos) <<
"(?";
176 size_t tmp_pos3 =
m_pattern.find_first_of (
"*+", tmp_pos2);
178 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
183 (*current_liboctave_warning_with_id_handler)
184 (
"Octave:regexp-lookbehind-limit",
185 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
189 buf <<
m_pattern.substr (pos, new_pos - pos) <<
'(';
198 for (; i < max_length + 1; i++)
200 buf <<
m_pattern.substr (new_pos, tmp_pos3 - new_pos)
203 tmp_pos1 - tmp_pos3 - 1);
210 buf <<
m_pattern.substr (pos, tmp_pos1 - pos);
217 buf <<
m_pattern.substr (pos, new_pos - pos) <<
"(?";
227 std::string buf_str = buf.str ();
228 while ((pos = buf_str.find (
'\0')) != std::string::npos)
229 buf_str.replace (pos, 1,
"\\000");
241 m_data = pcre_compile (buf_str.c_str (), pcre_options,
242 &err, &erroffset,
nullptr);
245 (*current_liboctave_error_handler)
246 (
"%s: %s at position %d of expression",
m_who.c_str (), err, erroffset);
253 const uint8_t *buf_str =
reinterpret_cast<const uint8_t *
> (buffer.c_str ());
256 (
"%s: the input string is invalid UTF-8",
m_who.c_str ());
260 std::list<regexp::match_element> lst;
268 pcre *re =
static_cast<pcre *
> (
m_data);
270 pcre_fullinfo (re,
nullptr, PCRE_INFO_CAPTURECOUNT, &subpatterns);
271 pcre_fullinfo (re,
nullptr, PCRE_INFO_NAMECOUNT, &namecount);
272 pcre_fullinfo (re,
nullptr, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
273 pcre_fullinfo (re,
nullptr, PCRE_INFO_NAMETABLE, &nametable);
278 for (
int i = 0; i < namecount; i++)
282 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
283 |
static_cast<int> (nametable[i*nameentrysize+1]);
290 int matches = pcre_exec (re,
nullptr, buffer.c_str (),
291 buffer.length (), idx,
292 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
293 ovector, (subpatterns+1)*3);
295 if (matches == PCRE_ERROR_MATCHLIMIT)
299 (*current_liboctave_warning_with_id_handler)
300 (
"Octave:regexp-match-limit",
301 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
305 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
306 static_cast<void *
> (&pe.match_limit));
308 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
311 while (matches == PCRE_ERROR_MATCHLIMIT
316 pe.match_limit *= 10;
317 matches = pcre_exec (re, &pe, buffer.c_str (),
318 buffer.length (), idx,
320 | (idx ? PCRE_NOTBOL : 0),
321 ovector, (subpatterns+1)*3);
325 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
326 (*current_liboctave_error_handler)
327 (
"%s: internal error calling pcre_exec; "
328 "error code from pcre_exec is %i",
m_who.c_str (), matches);
330 if (matches == PCRE_ERROR_NOMATCH)
335 idx = ovector[0] + 1;
336 if (idx < buffer.length ())
344 Matrix token_extents (matches-1, 2);
346 for (
int i = 1; i < matches; i++)
348 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0
349 && (i == 1 || ovector[2*i] != ovector[2*i-2]
350 || ovector[2*i-1] != ovector[2*i+1]))
352 token_extents(pos_match,0) = double (ovector[2*i]+1);
353 token_extents(pos_match++,1) = double (ovector[2*i+1]);
357 token_extents.
resize (pos_match, 2);
359 double start = double (ovector[0]+1);
360 double end = double (ovector[1]);
362 const char **listptr;
363 int status = pcre_get_substring_list (buffer.c_str (), ovector,
366 if (status == PCRE_ERROR_NOMEMORY)
367 (*current_liboctave_error_handler)
368 (
"%s: cannot allocate memory in pcre_get_substring_list",
372 std::string match_string = std::string (*listptr, end - start + 1);
379 for (
int i = 1; i < matches; i++)
381 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
383 if (i == 1 || ovector[2*i] != ovector[2*i-2]
384 || ovector[2*i-1] != ovector[2*i+1])
392 for (
int j = 0; j < namecount; j++)
396 size_t len = ovector[2*i+1] - ovector[2*i];
398 = std::string (*(listptr+i-pos_offset),
405 size_t len = ovector[2*i+1] - ovector[2*i];
406 tokens(pos_match++) = std::string (*(listptr+i),
len);
413 pcre_free_substring_list (listptr);
416 token_extents, start, end);
417 lst.push_back (new_elem);
419 if (ovector[1] <= ovector[0])
422 idx = ovector[0] + 1;
423 if (idx <= buffer.length ())
444 return rx_lst.
size () > 0;
474 size_t num_matches = rx_lst.
size ();
476 if (num_matches == 0)
490 std::string repstr = replacement;
491 std::vector<rep_token_t> tokens;
494 for (
size_t i=0; i < repstr.size (); i++)
496 if (repstr[i] ==
'\\')
498 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
504 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
510 else if (repstr[i] ==
'$')
512 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
517 tmp_token.
num = repstr[i+1]-
'0';
518 tokens.push_back (tmp_token);
524 int num_tokens = tokens.size ();
529 const size_t replen = repstr.size () - 2*num_tokens;
531 auto p = rx_lst.
begin ();
532 for (
size_t i = 0; i < num_matches; i++)
536 double start = p->start ();
537 double end = p->end ();
539 const Matrix pairs (p->token_extents ());
541 for (
int j = 0; j < num_tokens; j++)
543 if (tokens[j].num == 0)
544 pairlen +=
static_cast<size_t> (end - start + 1);
545 else if (tokens[j].num <= pairs.
rows ())
546 pairlen +=
static_cast<size_t> (pairs(tokens[j].num-1,1)
547 - pairs(tokens[j].num-1,0)
550 delta += (
static_cast<int> (replen + pairlen)
551 -
static_cast<int> (end - start + 1));
556 rep.reserve (buffer.size () + delta);
559 for (
size_t i = 0; i < num_matches; i++)
563 double start = p->start ();
564 double end = p->end ();
566 const Matrix pairs (p->token_extents ());
567 rep.append (&buffer[from],
static_cast<size_t> (start - 1 - from));
568 from =
static_cast<size_t> (end);
572 for (
int j = 0; j < num_tokens; j++)
574 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
575 cur_pos = tokens[j].pos+2;
577 int k = tokens[j].num;
581 rep.append (&buffer[
static_cast<size_t> (end - 1)],
582 static_cast<size_t> (end - start + 1));
584 else if (k <= pairs.
rows ())
587 rep.append (&buffer[
static_cast<size_t> (pairs(k-1,0)-1)],
588 static_cast<size_t> (pairs(k-1,1)
589 - pairs(k-1,0) + 1));
596 if (cur_pos < repstr.size ())
597 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
601 rep.append (&buffer[from], buffer.size () - from);
606 const size_t replen = repstr.size ();
608 auto p = rx_lst.
begin ();
609 for (
size_t i = 0; i < num_matches; i++)
613 delta +=
static_cast<int> (replen)
614 -
static_cast<int> (p->end () - p->start () + 1);
619 rep.reserve (buffer.size () + delta);
622 for (
size_t i = 0; i < num_matches; i++)
626 rep.append (&buffer[from],
627 static_cast<size_t> (p->start () - 1 - from));
628 from =
static_cast<size_t> (p->end ());
632 rep.append (&buffer[from], buffer.size () - from);
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
octave_idx_type rows(void) const
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Vector representing the dimensions (size) of an Array.
void lineanchors(bool val)
void case_insensitive(bool val)
void emptymatch(bool val)
void freespacing(bool val)
void dotexceptnewline(bool val)
match_data match(const std::string &buffer)
bool is_match(const std::string &buffer)
string_vector m_named_pats
std::string replace(const std::string &buffer, const std::string &replacement)
void compile_internal(void)
string_vector & append(const std::string &s)
octave_idx_type numel(void) const
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
#define PCRE_MATCHLIMIT_MAX
static bool lookbehind_warned
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
octave_value::octave_value(const Array< char > &chm, char type) return retval
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)