330 const uint8_t *buf_str =
reinterpret_cast<const uint8_t *
> (buffer.c_str ());
333 (
"%s: the input string is invalid UTF-8", m_who.c_str ());
337 std::list<regexp::match_element> lst;
345 octave_pcre_code *re =
static_cast<octave_pcre_code *
> (m_code);
347 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
348 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
349 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
350 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
352#if defined (HAVE_PCRE)
358 for (
int i = 0; i < namecount; i++)
362 nidx[i] = (
static_cast<int> (nametable[i*nameentrysize])) << 8
363 |
static_cast<int> (nametable[i*nameentrysize+1]);
370#if defined (HAVE_PCRE2)
371 pcre2_match_data *tmp_match_data
372 = pcre2_match_data_create_from_pattern (re,
nullptr);
374 unwind_action cleanup_match_data ([tmp_match_data] () { pcre2_match_data_free (tmp_match_data); });
376 int matches = pcre2_match (re,
reinterpret_cast<PCRE2_SPTR
> (buffer.c_str ()),
377 buffer.length (), idx,
378 PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
379 tmp_match_data,
nullptr);
381 if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
382 (*current_liboctave_error_handler)
383 (
"%s: internal error calling pcre2_match; "
384 "error code from pcre2_match is %i", m_who.c_str (), matches);
386 if (matches == PCRE2_ERROR_NOMATCH)
389 OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (tmp_match_data);
391 int matches = pcre_exec (re,
nullptr, buffer.c_str (),
392 buffer.length (), idx,
393 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
394 ovector, (subpatterns+1)*3);
396 if (matches == PCRE_ERROR_MATCHLIMIT)
400 (*current_liboctave_warning_with_id_handler)
401 (
"Octave:regexp-match-limit",
402 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
406 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
407 static_cast<void *
> (&pe.match_limit));
409 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
412 while (matches == PCRE_ERROR_MATCHLIMIT
417 pe.match_limit *= 10;
418 matches = pcre_exec (re, &pe, buffer.c_str (),
419 buffer.length (), idx,
421 | (idx ? PCRE_NOTBOL : 0),
422 ovector, (subpatterns+1)*3);
426 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
427 (*current_liboctave_error_handler)
428 (
"%s: internal error calling pcre_exec; "
429 "error code from pcre_exec is %i", m_who.c_str (), matches);
431 if (matches == PCRE_ERROR_NOMATCH)
434 if (ovector[0] >= ovector[1] && ! m_options.
emptymatch ())
437 idx = ovector[0] + 1;
438 if (idx < buffer.length ())
446 Matrix token_extents (matches-1, 2);
448 for (
int i = 1; i < matches; i++)
450#if defined (HAVE_PCRE2)
451 if (ovector[2*i] != PCRE2_SIZE_MAX
453 if (ovector[2*i] >= 0
455 && ovector[2*i+1] > 0
456 && (i == 1 || ovector[2*i] != ovector[2*i-2]
457 || ovector[2*i-1] != ovector[2*i+1]))
459 token_extents(pos_match, 0) =
double (ovector[2*i]+1);
460 token_extents(pos_match++, 1) =
double (ovector[2*i+1]);
464 token_extents.
resize (pos_match, 2);
466 OCTAVE_PCRE_SIZE start = ovector[0] + 1;
467 OCTAVE_PCRE_SIZE end = ovector[1];
469#if defined (HAVE_PCRE2)
471 std::string match_string = std::string (buffer.c_str() + start - 1,
474 const char **listptr;
475 int status = pcre_get_substring_list (buffer.c_str (), ovector,
478 if (status == PCRE_ERROR_NOMEMORY)
479 (*current_liboctave_error_handler)
480 (
"%s: cannot allocate memory in pcre_get_substring_list",
484 std::string match_string = std::string (*listptr, end - start + 1);
489#if ! defined (HAVE_PCRE2)
494 for (
int i = 1; i < matches; i++)
496#if defined (HAVE_PCRE2)
497 if (ovector[2*i] != PCRE2_SIZE_MAX
499 if (ovector[2*i] >= 0
501 && ovector[2*i+1] > 0)
503 if (i == 1 || ovector[2*i] != ovector[2*i-2]
504 || ovector[2*i-1] != ovector[2*i+1])
512 for (
int j = 0; j < namecount; j++)
516 std::size_t
len = ovector[2*i+1] - ovector[2*i];
517 named_tokens(m_named_idx(j))
518#if defined (HAVE_PCRE2)
519 = std::string (buffer.c_str () + ovector[2*i],
len);
521 = std::string (*(listptr+i-pos_offset),
len);
528 std::size_t
len = ovector[2*i+1] - ovector[2*i];
529#if defined (HAVE_PCRE2)
530 tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i],
len);
532 tokens(pos_match++) = std::string (*(listptr+i),
len);
535#if ! defined (HAVE_PCRE2)
542#if ! defined (HAVE_PCRE2)
543 pcre_free_substring_list (listptr);
552 double dstart =
static_cast<double> (start);
553 double dend =
static_cast<double> (end);
559 lst.push_back (new_elem);
561 if (ovector[1] <= ovector[0])
564 idx = ovector[0] + 1;
565 if (idx <= buffer.length ())
571 if (m_options.
once () || idx >= buffer.length ())
611 const std::string& replacement)
const
617 std::size_t num_matches = rx_lst.size ();
619 if (num_matches == 0)
633 std::string repstr = replacement;
634 std::vector<rep_token_t> tokens;
637 for (std::size_t i=0; i < repstr.size (); i++)
639 if (repstr[i] ==
'\\')
641 if (i < repstr.size () - 1 && repstr[i+1] ==
'$')
647 if (i < repstr.size () - 1 && repstr[i+1] ==
'\\')
653 else if (repstr[i] ==
'$')
655 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
657 rep_token_t tmp_token;
660 tmp_token.num = repstr[i+1]-
'0';
661 tokens.push_back (tmp_token);
667 int num_tokens = tokens.size ();
672 const std::size_t replen = repstr.size () - 2*num_tokens;
674 auto p = rx_lst.begin ();
675 for (std::size_t i = 0; i < num_matches; i++)
679 double start = p->start ();
680 double end = p->end ();
682 const Matrix pairs (p->token_extents ());
683 std::size_t pairlen = 0;
684 for (
int j = 0; j < num_tokens; j++)
686 if (tokens[j].num == 0)
687 pairlen +=
static_cast<std::size_t
> (end - start + 1);
688 else if (tokens[j].num <= pairs.
rows ())
689 pairlen +=
static_cast<std::size_t
> (pairs(tokens[j].num-1, 1)
690 - pairs(tokens[j].num-1, 0)
693 delta += (
static_cast<int> (replen + pairlen)
694 -
static_cast<int> (end - start + 1));
699 rep.reserve (buffer.size () + delta);
700 std::size_t from = 0;
702 for (std::size_t i = 0; i < num_matches; i++)
706 double start = p->start ();
707 double end = p->end ();
709 const Matrix pairs (p->token_extents ());
710 rep.append (&buffer[from],
static_cast<std::size_t
> (start - 1 - from));
711 from =
static_cast<std::size_t
> (end);
713 std::size_t cur_pos = 0;
715 for (
int j = 0; j < num_tokens; j++)
717 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
718 cur_pos = tokens[j].pos+2;
720 int k = tokens[j].num;
724 rep.append (&buffer[
static_cast<std::size_t
> (end - 1)],
725 static_cast<std::size_t
> (end - start + 1));
727 else if (k <= pairs.
rows ())
730 rep.append (&buffer[
static_cast<std::size_t
> (pairs(k-1, 0)-1)],
731 static_cast<std::size_t
> (pairs(k-1, 1)
732 - pairs(k-1, 0) + 1));
739 if (cur_pos < repstr.size ())
740 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
744 rep.append (&buffer[from], buffer.size () - from);
749 const std::size_t replen = repstr.size ();
751 auto p = rx_lst.begin ();
752 for (std::size_t i = 0; i < num_matches; i++)
756 delta +=
static_cast<int> (replen)
757 -
static_cast<int> (p->end () - p->start () + 1);
762 rep.reserve (buffer.size () + delta);
763 std::size_t from = 0;
765 for (std::size_t i = 0; i < num_matches; i++)
769 rep.append (&buffer[from],
770 static_cast<std::size_t
> (p->start () - 1 - from));
771 from =
static_cast<std::size_t
> (p->end ());
775 rep.append (&buffer[from], buffer.size () - from);