26 #if defined (HAVE_CONFIG_H)
35 #if defined (HAVE_PCRE2)
36 # define PCRE2_CODE_UNIT_WIDTH 8
37 # if defined (HAVE_PCRE2_H)
39 # elif defined (HAVE_PCRE2_PCRE2_H)
40 # include <pcre2/pcre2.h>
42 #elif defined (HAVE_PCRE)
43 # if defined (HAVE_PCRE_H)
45 # elif defined (HAVE_PCRE_PCRE_H)
46 # include <pcre/pcre.h>
60 #if defined (HAVE_PCRE2)
61 typedef pcre2_code octave_pcre_code;
62 typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
63 void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
64 # define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
65 # define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
66 # define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
67 # define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
68 # define OCTAVE_PCRE_UTF PCRE2_UTF
69 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
70 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
71 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
72 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
73 #elif defined (HAVE_PCRE)
74 typedef pcre octave_pcre_code;
75 typedef int OCTAVE_PCRE_SIZE;
76 void (*octave_pcre_code_free) (
void *) = pcre_free;
77 # define OCTAVE_PCRE_CASELESS PCRE_CASELESS
78 # define OCTAVE_PCRE_DOTALL PCRE_DOTALL
79 # define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
80 # define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
81 # define OCTAVE_PCRE_UTF PCRE_UTF8
82 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
83 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
84 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
85 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
87 # error "PCRE2 or PCRE library is required to build Octave"
91 octave_pcre_pattern_info (
const octave_pcre_code *code,
int what,
void *where)
93 #if defined (HAVE_PCRE2)
94 return pcre2_pattern_info (code, what, where);
96 return pcre_fullinfo (code,
nullptr, what, where);
104 #define PCRE_MATCHLIMIT_MAX 10
107 #define MAXLOOKBEHIND 10
109 static bool lookbehind_warned =
false;
117 octave_pcre_code_free (
static_cast<octave_pcre_code *
> (m_code));
121 regexp::compile_internal ()
131 std::ostringstream buf;
133 while ((new_pos = m_pattern.find (
"(?", pos)) != std::string::npos)
136 if (m_pattern.size () > new_pos + 2
137 && m_pattern.at (new_pos + 2) ==
'<'
138 && ! (m_pattern.size () > new_pos + 3
139 && (m_pattern.at (new_pos + 3) ==
'='
140 || m_pattern.at (new_pos + 3) ==
'!'))
141 && (tmp_pos = m_pattern.find_first_of (
'>', new_pos))
143 && m_pattern.find_first_of (
')', tmp_pos) != std::string::npos)
155 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
159 for (
int i = 0; i < m_names; i++)
161 if (m_named_pats(i) == tmp_name)
164 m_named_idx(inames) = i;
173 m_named_idx(inames) = m_names;
174 m_named_pats.
append (tmp_name);
178 if (new_pos - pos > 0)
179 buf << m_pattern.substr (pos, new_pos-pos);
181 buf <<
"(?P<n00" << inames++;
182 else if (inames < 100)
183 buf <<
"(?P<n0" << inames++;
185 buf <<
"(?P<n" << inames++;
189 else if (m_pattern.size () > new_pos + 2
190 && m_pattern.at (new_pos + 2) ==
'<')
199 std::size_t tmp_pos1 = new_pos + 2;
200 std::size_t tmp_pos2 = tmp_pos1;
202 while (tmp_pos1 < m_pattern.length () && brackets > 0)
204 char ch = m_pattern.at (tmp_pos1);
221 buf << m_pattern.substr (pos, new_pos - pos) <<
"(?";
226 std::size_t tmp_pos3 = m_pattern.find_first_of (
"*+", tmp_pos2);
228 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
230 if (! lookbehind_warned)
232 lookbehind_warned =
true;
233 (*current_liboctave_warning_with_id_handler)
234 (
"Octave:regexp-lookbehind-limit",
235 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
239 buf << m_pattern.substr (pos, new_pos - pos) <<
'(';
243 if (m_pattern.at (tmp_pos3) ==
'*')
248 for (; i < max_length + 1; i++)
250 buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
252 buf << m_pattern.substr (tmp_pos3 + 1,
253 tmp_pos1 - tmp_pos3 - 1);
260 buf << m_pattern.substr (pos, tmp_pos1 - pos);
267 buf << m_pattern.substr (pos, new_pos - pos) <<
"(?";
273 buf << m_pattern.substr (pos);
277 std::string buf_str = buf.str ();
278 while ((pos = buf_str.find (
'\0')) != std::string::npos)
279 buf_str.replace (pos, 1,
"\\000");
284 | (m_options.
lineanchors () ? OCTAVE_PCRE_MULTILINE : 0)
285 | (m_options.
freespacing () ? OCTAVE_PCRE_EXTENDED : 0)
288 #if defined (HAVE_PCRE2)
289 PCRE2_SIZE erroffset;
292 m_code = pcre2_compile (
reinterpret_cast<PCRE2_SPTR
> (buf_str.c_str ()),
293 PCRE2_ZERO_TERMINATED, pcre_options,
294 &errnumber, &erroffset,
nullptr);
308 PCRE2_UCHAR err [256];
309 pcre2_get_error_message (errnumber, err,
sizeof (err));
310 (*current_liboctave_error_handler)
311 (
"%s: %s at position %zu of expression", m_who.c_str (), err,
318 m_code = pcre_compile (buf_str.c_str (), pcre_options,
319 &err, &erroffset,
nullptr);
322 (*current_liboctave_error_handler)
323 (
"%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
331 const uint8_t *buf_str =
reinterpret_cast<const uint8_t *
> (buffer.c_str ());
334 (
"%s: the input string is invalid UTF-8", m_who.c_str ());
338 std::list<regexp::match_element> lst;
346 octave_pcre_code *re =
static_cast<octave_pcre_code *
> (m_code);
348 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
349 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
350 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
351 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
353 #if defined (HAVE_PCRE)
359 for (
int i = 0; i < namecount; i++)
363 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
364 |
static_cast<int> (nametable[i*nameentrysize+1]);
371 #if defined (HAVE_PCRE2)
372 pcre2_match_data *m_data
373 = pcre2_match_data_create_from_pattern (re,
nullptr);
376 ([=] () { pcre2_match_data_free (m_data); });
378 int matches = pcre2_match (re,
reinterpret_cast<PCRE2_SPTR
> (buffer.c_str ()),
379 buffer.length (), idx,
380 PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
383 if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
384 (*current_liboctave_error_handler)
385 (
"%s: internal error calling pcre2_match; "
386 "error code from pcre2_match is %i", m_who.c_str (), matches);
388 if (matches == PCRE2_ERROR_NOMATCH)
391 OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (m_data);
393 int matches = pcre_exec (re,
nullptr, buffer.c_str (),
394 buffer.length (), idx,
395 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
396 ovector, (subpatterns+1)*3);
398 if (matches == PCRE_ERROR_MATCHLIMIT)
402 (*current_liboctave_warning_with_id_handler)
403 (
"Octave:regexp-match-limit",
404 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
408 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
409 static_cast<void *
> (&pe.match_limit));
411 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
414 while (matches == PCRE_ERROR_MATCHLIMIT
419 pe.match_limit *= 10;
420 matches = pcre_exec (re, &pe, buffer.c_str (),
421 buffer.length (), idx,
423 | (idx ? PCRE_NOTBOL : 0),
424 ovector, (subpatterns+1)*3);
428 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
429 (*current_liboctave_error_handler)
430 (
"%s: internal error calling pcre_exec; "
431 "error code from pcre_exec is %i", m_who.c_str (), matches);
433 if (matches == PCRE_ERROR_NOMATCH)
436 if (ovector[0] >= ovector[1] && ! m_options.
emptymatch ())
439 idx = ovector[0] + 1;
440 if (idx < buffer.length ())
448 Matrix token_extents (matches-1, 2);
450 for (
int i = 1; i < matches; i++)
452 #if defined (HAVE_PCRE2)
453 if (ovector[2*i] != PCRE2_SIZE_MAX
455 if (ovector[2*i] >= 0
457 && ovector[2*i+1] > 0
458 && (i == 1 || ovector[2*i] != ovector[2*i-2]
459 || ovector[2*i-1] != ovector[2*i+1]))
461 token_extents(pos_match, 0) = double (ovector[2*i]+1);
462 token_extents(pos_match++, 1) = double (ovector[2*i+1]);
466 token_extents.
resize (pos_match, 2);
468 OCTAVE_PCRE_SIZE start = ovector[0] + 1;
469 OCTAVE_PCRE_SIZE end = ovector[1];
471 #if defined (HAVE_PCRE2)
473 std::string match_string = std::string (buffer.c_str() + start - 1,
476 const char **listptr;
477 int status = pcre_get_substring_list (buffer.c_str (), ovector,
480 if (status == PCRE_ERROR_NOMEMORY)
481 (*current_liboctave_error_handler)
482 (
"%s: cannot allocate memory in pcre_get_substring_list",
486 std::string match_string = std::string (*listptr, end - start + 1);
491 #if ! defined (HAVE_PCRE2)
496 for (
int i = 1; i < matches; i++)
498 #if defined (HAVE_PCRE2)
499 if (ovector[2*i] != PCRE2_SIZE_MAX
501 if (ovector[2*i] >= 0
503 && ovector[2*i+1] > 0)
505 if (i == 1 || ovector[2*i] != ovector[2*i-2]
506 || ovector[2*i-1] != ovector[2*i+1])
514 for (
int j = 0; j < namecount; j++)
518 std::size_t
len = ovector[2*i+1] - ovector[2*i];
519 named_tokens(m_named_idx(j))
520 #if defined (HAVE_PCRE2)
521 = std::string (buffer.c_str () + ovector[2*i],
len);
523 = std::string (*(listptr+i-pos_offset),
len);
530 std::size_t
len = ovector[2*i+1] - ovector[2*i];
531 #if defined (HAVE_PCRE2)
532 tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i],
len);
534 tokens(pos_match++) = std::string (*(listptr+i),
len);
537 #if ! defined (HAVE_PCRE2)
544 #if ! defined (HAVE_PCRE2)
545 pcre_free_substring_list (listptr);
554 double dstart =
static_cast<double> (start);
555 double dend =
static_cast<double> (end);
561 lst.push_back (new_elem);
563 if (ovector[1] <= ovector[0])
566 idx = ovector[0] + 1;
567 if (idx <= buffer.length ())
573 if (m_options.
once () || idx >= buffer.length ())
588 return rx_lst.
size () > 0;
613 const std::string& replacement)
const
619 std::size_t num_matches = rx_lst.
size ();
621 if (num_matches == 0)
635 std::string repstr = replacement;
636 std::vector<rep_token_t> tokens;
639 for (std::size_t i=0; i < repstr.size (); i++)
641 if (repstr[i] ==
'\\')
643 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
649 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
655 else if (repstr[i] ==
'$')
657 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
659 rep_token_t tmp_token;
662 tmp_token.num = repstr[i+1]-
'0';
663 tokens.push_back (tmp_token);
669 int num_tokens = tokens.size ();
674 const std::size_t replen = repstr.size () - 2*num_tokens;
676 auto p = rx_lst.
begin ();
677 for (std::size_t i = 0; i < num_matches; i++)
681 double start = p->start ();
682 double end = p->end ();
684 const Matrix pairs (p->token_extents ());
685 std::size_t pairlen = 0;
686 for (
int j = 0; j < num_tokens; j++)
688 if (tokens[j].num == 0)
689 pairlen +=
static_cast<std::size_t
> (end - start + 1);
690 else if (tokens[j].num <= pairs.
rows ())
691 pairlen +=
static_cast<std::size_t
> (pairs(tokens[j].num-1, 1)
692 - pairs(tokens[j].num-1, 0)
695 delta += (
static_cast<int> (replen + pairlen)
696 -
static_cast<int> (end - start + 1));
701 rep.reserve (buffer.size () + delta);
702 std::size_t from = 0;
704 for (std::size_t i = 0; i < num_matches; i++)
708 double start = p->start ();
709 double end = p->end ();
711 const Matrix pairs (p->token_extents ());
712 rep.append (&buffer[from],
static_cast<std::size_t
> (start - 1 - from));
713 from =
static_cast<std::size_t
> (end);
715 std::size_t cur_pos = 0;
717 for (
int j = 0; j < num_tokens; j++)
719 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
720 cur_pos = tokens[j].pos+2;
722 int k = tokens[j].num;
726 rep.append (&buffer[
static_cast<std::size_t
> (end - 1)],
727 static_cast<std::size_t
> (end - start + 1));
729 else if (k <= pairs.
rows ())
732 rep.append (&buffer[
static_cast<std::size_t
> (pairs(k-1, 0)-1)],
733 static_cast<std::size_t
> (pairs(k-1, 1)
734 - pairs(k-1, 0) + 1));
741 if (cur_pos < repstr.size ())
742 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
746 rep.append (&buffer[from], buffer.size () - from);
751 const std::size_t replen = repstr.size ();
753 auto p = rx_lst.
begin ();
754 for (std::size_t i = 0; i < num_matches; i++)
758 delta +=
static_cast<int> (replen)
759 -
static_cast<int> (p->end () - p->start () + 1);
764 rep.reserve (buffer.size () + delta);
765 std::size_t from = 0;
767 for (std::size_t i = 0; i < num_matches; i++)
771 rep.append (&buffer[from],
772 static_cast<std::size_t
> (p->start () - 1 - from));
773 from =
static_cast<std::size_t
> (p->end ());
777 rep.append (&buffer[from], buffer.size () - from);
784 OCTAVE_END_NAMESPACE(
octave)
octave_idx_type rows() const
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Vector representing the dimensions (size) of an Array.
void dotexceptnewline(bool val)
void lineanchors(bool val)
void case_insensitive(bool val)
void freespacing(bool val)
void emptymatch(bool val)
std::string replace(const std::string &buffer, const std::string &replacement) const
bool is_match(const std::string &buffer) const
match_data match(const std::string &buffer) const
string_vector & append(const std::string &s)
octave_idx_type numel() const
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
#define PCRE_MATCHLIMIT_MAX
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)