26 #if defined (HAVE_CONFIG_H)
35 #if defined (HAVE_PCRE2_H) || defined (HAVE_PCRE2_PCRE2_H)
36 # define PCRE2_CODE_UNIT_WIDTH 8
37 # if defined (HAVE_PCRE2_H)
39 # elif defined (HAVE_PCRE2_PCRE2_H)
40 # include <pcre2/pcre2.h>
42 #elif defined (HAVE_PCRE_H) || defined (HAVE_PCRE_PCRE_H)
43 # if defined (HAVE_PCRE_H)
45 # elif defined (HAVE_PCRE_PCRE_H)
46 # include <pcre/pcre.h>
60 #if defined (HAVE_PCRE2)
61 typedef pcre2_code octave_pcre_code;
62 typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
63 void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
64 # define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
65 # define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
66 # define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
67 # define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
68 # define OCTAVE_PCRE_UTF PCRE2_UTF
69 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
70 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
71 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
72 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
73 #elif defined (HAVE_PCRE)
74 typedef pcre octave_pcre_code;
75 typedef int OCTAVE_PCRE_SIZE;
76 void (*octave_pcre_code_free) (
void *) = pcre_free;
77 # define OCTAVE_PCRE_CASELESS PCRE_CASELESS
78 # define OCTAVE_PCRE_DOTALL PCRE_DOTALL
79 # define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
80 # define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
81 # define OCTAVE_PCRE_UTF PCRE_UTF8
82 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
83 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
84 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
85 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
87 # error "PCRE2 or PCRE library is required to build Octave"
93 #if defined (HAVE_PCRE2)
94 return pcre2_pattern_info (code, what, where);
96 return pcre_fullinfo (code,
nullptr, what, where);
104 #define PCRE_MATCHLIMIT_MAX 10
107 #define MAXLOOKBEHIND 10
117 octave_pcre_code_free (
static_cast<octave_pcre_code *
> (
m_code));
131 std::ostringstream buf;
133 while ((new_pos =
m_pattern.find (
"(?", pos)) != std::string::npos)
141 && (tmp_pos =
m_pattern.find_first_of (
'>', new_pos))
143 &&
m_pattern.find_first_of (
')', tmp_pos) != std::string::npos)
155 =
m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
159 for (
int i = 0; i <
m_names; i++)
178 if (new_pos - pos > 0)
179 buf <<
m_pattern.substr (pos, new_pos-pos);
181 buf <<
"(?P<n00" << inames++;
182 else if (inames < 100)
183 buf <<
"(?P<n0" << inames++;
185 buf <<
"(?P<n" << inames++;
199 std::size_t tmp_pos1 = new_pos + 2;
200 std::size_t tmp_pos2 = tmp_pos1;
202 while (tmp_pos1 <
m_pattern.length () && brackets > 0)
221 buf <<
m_pattern.substr (pos, new_pos - pos) <<
"(?";
226 std::size_t tmp_pos3 =
m_pattern.find_first_of (
"*+", tmp_pos2);
228 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
233 (*current_liboctave_warning_with_id_handler)
234 (
"Octave:regexp-lookbehind-limit",
235 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
239 buf <<
m_pattern.substr (pos, new_pos - pos) <<
'(';
248 for (; i < max_length + 1; i++)
250 buf <<
m_pattern.substr (new_pos, tmp_pos3 - new_pos)
253 tmp_pos1 - tmp_pos3 - 1);
260 buf <<
m_pattern.substr (pos, tmp_pos1 - pos);
267 buf <<
m_pattern.substr (pos, new_pos - pos) <<
"(?";
277 std::string buf_str = buf.str ();
278 while ((pos = buf_str.find (
'\0')) != std::string::npos)
279 buf_str.replace (pos, 1,
"\\000");
288 #if defined (HAVE_PCRE2)
289 PCRE2_SIZE erroffset;
292 m_code = pcre2_compile (
reinterpret_cast<PCRE2_SPTR
> (buf_str.c_str ()),
293 PCRE2_ZERO_TERMINATED, pcre_options,
294 &errnumber, &erroffset,
nullptr);
308 PCRE2_UCHAR err [256];
309 pcre2_get_error_message (errnumber, err,
sizeof (err));
310 (*current_liboctave_error_handler)
311 (
"%s: %s at position %zu of expression",
m_who.c_str (), err,
318 m_code = pcre_compile (buf_str.c_str (), pcre_options,
319 &err, &erroffset,
nullptr);
322 (*current_liboctave_error_handler)
323 (
"%s: %s at position %d of expression",
m_who.c_str (), err, erroffset);
331 const uint8_t *buf_str =
reinterpret_cast<const uint8_t *
> (buffer.c_str ());
334 (
"%s: the input string is invalid UTF-8",
m_who.c_str ());
338 std::list<regexp::match_element> lst;
346 octave_pcre_code *re =
static_cast<octave_pcre_code *
> (
m_code);
353 #if defined (HAVE_PCRE)
359 for (
int i = 0; i < namecount; i++)
363 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
364 |
static_cast<int> (nametable[i*nameentrysize+1]);
371 #if defined (HAVE_PCRE2)
372 pcre2_match_data *m_data
373 = pcre2_match_data_create_from_pattern (re,
nullptr);
376 ([=] () { pcre2_match_data_free (m_data); });
378 int matches = pcre2_match (re,
reinterpret_cast<PCRE2_SPTR
> (buffer.c_str ()),
379 buffer.length (), idx,
380 PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
383 if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
384 (*current_liboctave_error_handler)
385 (
"%s: internal error calling pcre2_match; "
386 "error code from pcre2_match is %i",
m_who.c_str (), matches);
388 if (matches == PCRE2_ERROR_NOMATCH)
391 OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (m_data);
393 int matches = pcre_exec (re,
nullptr, buffer.c_str (),
394 buffer.length (), idx,
395 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
396 ovector, (subpatterns+1)*3);
398 if (matches == PCRE_ERROR_MATCHLIMIT)
402 (*current_liboctave_warning_with_id_handler)
403 (
"Octave:regexp-match-limit",
404 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
408 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
409 static_cast<void *
> (&pe.match_limit));
411 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
414 while (matches == PCRE_ERROR_MATCHLIMIT
419 pe.match_limit *= 10;
420 matches = pcre_exec (re, &pe, buffer.c_str (),
421 buffer.length (), idx,
423 | (idx ? PCRE_NOTBOL : 0),
424 ovector, (subpatterns+1)*3);
428 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
429 (*current_liboctave_error_handler)
430 (
"%s: internal error calling pcre_exec; "
431 "error code from pcre_exec is %i",
m_who.c_str (), matches);
433 if (matches == PCRE_ERROR_NOMATCH)
439 idx = ovector[0] + 1;
440 if (idx < buffer.length ())
448 Matrix token_extents (matches-1, 2);
450 for (
int i = 1; i < matches; i++)
452 #if defined (HAVE_PCRE2)
453 if (ovector[2*i] != PCRE2_SIZE_MAX
455 if (ovector[2*i] >= 0
457 && ovector[2*i+1] > 0
458 && (i == 1 || ovector[2*i] != ovector[2*i-2]
459 || ovector[2*i-1] != ovector[2*i+1]))
461 token_extents(pos_match, 0) = double (ovector[2*i]+1);
462 token_extents(pos_match++, 1) = double (ovector[2*i+1]);
466 token_extents.
resize (pos_match, 2);
468 OCTAVE_PCRE_SIZE start = ovector[0] + 1;
469 OCTAVE_PCRE_SIZE end = ovector[1];
471 #if defined (HAVE_PCRE2)
473 std::string match_string = std::string (buffer.c_str() + start - 1,
476 const char **listptr;
477 int status = pcre_get_substring_list (buffer.c_str (), ovector,
480 if (status == PCRE_ERROR_NOMEMORY)
481 (*current_liboctave_error_handler)
482 (
"%s: cannot allocate memory in pcre_get_substring_list",
486 std::string match_string = std::string (*listptr, end - start + 1);
494 for (
int i = 1; i < matches; i++)
496 #if defined (HAVE_PCRE2)
497 if (ovector[2*i] != PCRE2_SIZE_MAX
499 if (ovector[2*i] >= 0
501 && ovector[2*i+1] > 0)
503 if (i == 1 || ovector[2*i] != ovector[2*i-2]
504 || ovector[2*i-1] != ovector[2*i+1])
512 for (
int j = 0; j < namecount; j++)
516 std::size_t
len = ovector[2*i+1] - ovector[2*i];
518 #if defined (HAVE_PCRE2)
519 = std::string (buffer.c_str () + ovector[2*i],
len);
521 = std::string (*(listptr+i-pos_offset),
len);
528 std::size_t
len = ovector[2*i+1] - ovector[2*i];
529 #if defined (HAVE_PCRE2)
530 tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i],
len);
532 tokens(pos_match++) = std::string (*(listptr+i),
len);
540 #if ! defined (HAVE_PCRE2)
541 pcre_free_substring_list (listptr);
550 double dstart =
static_cast<double> (start);
551 double dend =
static_cast<double> (end);
557 lst.push_back (new_elem);
559 if (ovector[1] <= ovector[0])
562 idx = ovector[0] + 1;
563 if (idx <= buffer.length ())
584 return rx_lst.
size () > 0;
609 const std::string& replacement)
const
615 std::size_t num_matches = rx_lst.
size ();
617 if (num_matches == 0)
631 std::string repstr = replacement;
632 std::vector<rep_token_t> tokens;
635 for (std::size_t i=0; i < repstr.size (); i++)
637 if (repstr[i] ==
'\\')
639 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
645 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
651 else if (repstr[i] ==
'$')
653 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
658 tmp_token.
num = repstr[i+1]-
'0';
659 tokens.push_back (tmp_token);
665 int num_tokens = tokens.size ();
670 const std::size_t replen = repstr.size () - 2*num_tokens;
672 auto p = rx_lst.
begin ();
673 for (std::size_t i = 0; i < num_matches; i++)
677 double start = p->start ();
678 double end = p->end ();
680 const Matrix pairs (p->token_extents ());
681 std::size_t pairlen = 0;
682 for (
int j = 0; j < num_tokens; j++)
684 if (tokens[j].num == 0)
685 pairlen +=
static_cast<std::size_t
> (end - start + 1);
686 else if (tokens[j].num <= pairs.
rows ())
687 pairlen +=
static_cast<std::size_t
> (pairs(tokens[j].num-1, 1)
688 - pairs(tokens[j].num-1, 0)
691 delta += (
static_cast<int> (replen + pairlen)
692 -
static_cast<int> (end - start + 1));
697 rep.reserve (buffer.size () + delta);
698 std::size_t from = 0;
700 for (std::size_t i = 0; i < num_matches; i++)
704 double start = p->start ();
705 double end = p->end ();
707 const Matrix pairs (p->token_extents ());
708 rep.append (&buffer[from],
static_cast<std::size_t
> (start - 1 - from));
709 from =
static_cast<std::size_t
> (end);
711 std::size_t cur_pos = 0;
713 for (
int j = 0; j < num_tokens; j++)
715 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
716 cur_pos = tokens[j].pos+2;
718 int k = tokens[j].num;
722 rep.append (&buffer[
static_cast<std::size_t
> (end - 1)],
723 static_cast<std::size_t
> (end - start + 1));
725 else if (k <= pairs.
rows ())
728 rep.append (&buffer[
static_cast<std::size_t
> (pairs(k-1, 0)-1)],
729 static_cast<std::size_t
> (pairs(k-1, 1)
730 - pairs(k-1, 0) + 1));
737 if (cur_pos < repstr.size ())
738 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
742 rep.append (&buffer[from], buffer.size () - from);
747 const std::size_t replen = repstr.size ();
749 auto p = rx_lst.
begin ();
750 for (std::size_t i = 0; i < num_matches; i++)
754 delta +=
static_cast<int> (replen)
755 -
static_cast<int> (p->end () - p->start () + 1);
760 rep.reserve (buffer.size () + delta);
761 std::size_t from = 0;
763 for (std::size_t i = 0; i < num_matches; i++)
767 rep.append (&buffer[from],
768 static_cast<std::size_t
> (p->start () - 1 - from));
769 from =
static_cast<std::size_t
> (p->end ());
773 rep.append (&buffer[from], buffer.size () - from);
OCTARRAY_API void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
OCTARRAY_OVERRIDABLE_FUNC_API octave_idx_type rows(void) const
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
std::size_t size(void) const
Vector representing the dimensions (size) of an Array.
void dotexceptnewline(bool val)
void lineanchors(bool val)
void case_insensitive(bool val)
void freespacing(bool val)
void emptymatch(bool val)
std::string replace(const std::string &buffer, const std::string &replacement) const
string_vector m_named_pats
bool is_match(const std::string &buffer) const
match_data match(const std::string &buffer) const
void compile_internal(void)
string_vector & append(const std::string &s)
octave_idx_type numel(void) const
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
static int octave_pcre_pattern_info(const octave_pcre_code *code, int what, void *where)
#define PCRE_MATCHLIMIT_MAX
static bool lookbehind_warned
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)