GNU Octave 10.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
lo-regexp.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2002-2025 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include <list>
31#include <sstream>
32#include <string>
33#include <vector>
34
35#if defined (HAVE_PCRE2)
36# define PCRE2_CODE_UNIT_WIDTH 8
37# if defined (HAVE_PCRE2_H)
38# include <pcre2.h>
39# elif defined (HAVE_PCRE2_PCRE2_H)
40# include <pcre2/pcre2.h>
41# endif
42#elif defined (HAVE_PCRE)
43# if defined (HAVE_PCRE_H)
44# include <pcre.h>
45# elif defined (HAVE_PCRE_PCRE_H)
46# include <pcre/pcre.h>
47# endif
48#endif
49
50#include "Matrix.h"
51#include "lo-error.h"
52#include "oct-locbuf.h"
53#include "quit.h"
54#include "lo-regexp.h"
55#include "str-vec.h"
56#include "unistr-wrappers.h"
57#include "unwind-prot.h"
58
59#if defined (HAVE_PCRE2)
60typedef pcre2_code octave_pcre_code;
61typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
62void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
63# define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
64# define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
65# define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
66# define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
67# define OCTAVE_PCRE_UTF PCRE2_UTF
68# define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
69# define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
70# define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
71# define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
72#elif defined (HAVE_PCRE)
73typedef pcre octave_pcre_code;
74typedef int OCTAVE_PCRE_SIZE;
75void (*octave_pcre_code_free) (void *) = pcre_free;
76# define OCTAVE_PCRE_CASELESS PCRE_CASELESS
77# define OCTAVE_PCRE_DOTALL PCRE_DOTALL
78# define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
79# define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
80# define OCTAVE_PCRE_UTF PCRE_UTF8
81# define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
82# define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
83# define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
84# define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
85#else
86# error "PCRE2 or PCRE library is required to build Octave"
87#endif
88
89static inline int
90octave_pcre_pattern_info (const octave_pcre_code *code, int what, void *where)
91{
92#if defined (HAVE_PCRE2)
93 return pcre2_pattern_info (code, what, where);
94#else
95 return pcre_fullinfo (code, nullptr, what, where);
96#endif
97}
98
100
101// Define the maximum number of retries for a pattern
102// that possibly results in an infinite recursion.
103#define PCRE_MATCHLIMIT_MAX 10
104
105// FIXME: should this be configurable?
106#define MAXLOOKBEHIND 10
107
108static bool lookbehind_warned = false;
109
110// FIXME: don't bother collecting and composing return values
111// the user doesn't want.
112
113void
114regexp::free ()
115{
116 octave_pcre_code_free (static_cast<octave_pcre_code *> (m_code));
117}
118
119void
120regexp::compile_internal ()
121{
122 // If we had a previously compiled pattern, release it.
123 free ();
124
125 std::size_t max_length = MAXLOOKBEHIND;
126
127 std::size_t pos = 0;
128 std::size_t new_pos;
129 int inames = 0;
130 std::ostringstream buf;
131
132 while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
133 {
134 std::size_t tmp_pos;
135 if (m_pattern.size () > new_pos + 2
136 && m_pattern.at (new_pos + 2) == '<'
137 && ! (m_pattern.size () > new_pos + 3
138 && (m_pattern.at (new_pos + 3) == '='
139 || m_pattern.at (new_pos + 3) == '!'))
140 && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
141 != std::string::npos
142 && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
143 {
144 // The syntax of named tokens in pcre is "(?P<name>...)" while
145 // we need a syntax "(?<name>...)", so fix that here. Also an
146 // expression like
147 // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
148 // should be perfectly legal, while pcre does not allow the same
149 // named token name on both sides of the alternative. Also fix
150 // that here by replacing name tokens by dummy names, and dealing
151 // with the dummy names later.
152
153 std::string tmp_name
154 = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
155
156 bool found = false;
157
158 for (int i = 0; i < m_names; i++)
159 {
160 if (m_named_pats(i) == tmp_name)
161 {
162 m_named_idx.resize (dim_vector (inames+1, 1));
163 m_named_idx(inames) = i;
164 found = true;
165 break;
166 }
167 }
168
169 if (! found)
170 {
171 m_named_idx.resize (dim_vector (inames+1, 1));
172 m_named_idx(inames) = m_names;
173 m_named_pats.append (tmp_name);
174 m_names++;
175 }
176
177 if (new_pos > pos)
178 buf << m_pattern.substr (pos, new_pos-pos);
179 if (inames < 10)
180 buf << "(?P<n00" << inames++;
181 else if (inames < 100)
182 buf << "(?P<n0" << inames++;
183 else
184 buf << "(?P<n" << inames++;
185
186 pos = tmp_pos;
187 }
188 else if (m_pattern.size () > new_pos + 2
189 && m_pattern.at (new_pos + 2) == '<')
190 {
191 // Find lookbehind operators of arbitrary length (ie like
192 // "(?<=[a-z]*)") and replace with a maximum length operator
193 // as PCRE can not yet handle arbitrary length lookahead
194 // operators. Use the string length as the maximum length to
195 // avoid issues.
196
197 int brackets = 1;
198 std::size_t tmp_pos1 = new_pos + 2;
199 std::size_t tmp_pos2 = tmp_pos1;
200
201 while (tmp_pos1 < m_pattern.length () && brackets > 0)
202 {
203 char ch = m_pattern.at (tmp_pos1);
204
205 if (ch == '(')
206 brackets++;
207 else if (ch == ')')
208 {
209 if (brackets > 1)
210 tmp_pos2 = tmp_pos1;
211
212 brackets--;
213 }
214
215 tmp_pos1++;
216 }
217
218 if (brackets != 0)
219 {
220 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
221 pos = new_pos + 2;
222 }
223 else
224 {
225 std::size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2);
226
227 if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
228 {
229 if (! lookbehind_warned)
230 {
231 lookbehind_warned = true;
232 (*current_liboctave_warning_with_id_handler)
233 ("Octave:regexp-lookbehind-limit",
234 "%s: arbitrary length lookbehind patterns are only supported up to length %d",
235 m_who.c_str (), MAXLOOKBEHIND);
236 }
237
238 buf << m_pattern.substr (pos, new_pos - pos) << '(';
239
240 std::size_t i;
241
242 if (m_pattern.at (tmp_pos3) == '*')
243 i = 0;
244 else
245 i = 1;
246
247 for (; i < max_length + 1; i++)
248 {
249 buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
250 << '{' << i << '}';
251 buf << m_pattern.substr (tmp_pos3 + 1,
252 tmp_pos1 - tmp_pos3 - 1);
253 if (i != max_length)
254 buf << '|';
255 }
256 buf << ')';
257 }
258 else
259 buf << m_pattern.substr (pos, tmp_pos1 - pos);
260
261 pos = tmp_pos1;
262 }
263 }
264 else
265 {
266 buf << m_pattern.substr (pos, new_pos - pos) << "(?";
267 pos = new_pos + 2;
268 }
269
270 }
271
272 buf << m_pattern.substr (pos);
273
274 // Replace NULLs with escape sequence because conversion function c_str()
275 // will terminate string early at embedded NULLs.
276 std::string buf_str = buf.str ();
277 while ((pos = buf_str.find ('\0')) != std::string::npos)
278 buf_str.replace (pos, 1, "\\000");
279
280 int pcre_options
281 = ( (m_options.case_insensitive () ? OCTAVE_PCRE_CASELESS : 0)
282 | (m_options.dotexceptnewline () ? 0 : OCTAVE_PCRE_DOTALL)
283 | (m_options.lineanchors () ? OCTAVE_PCRE_MULTILINE : 0)
284 | (m_options.freespacing () ? OCTAVE_PCRE_EXTENDED : 0)
285 | OCTAVE_PCRE_UTF);
286
287#if defined (HAVE_PCRE2)
288 PCRE2_SIZE erroffset;
289 int errnumber;
290
291 m_code = pcre2_compile (reinterpret_cast<PCRE2_SPTR> (buf_str.c_str ()),
292 PCRE2_ZERO_TERMINATED, pcre_options,
293 &errnumber, &erroffset, nullptr);
294
295 if (! m_code)
296 {
297 // PCRE docs say:
298 //
299 // If the buffer is too small, the message is truncated (but
300 // still with a trailing zero), and the negative error code
301 // PCRE2_ERROR_NOMEMORY is returned. None of the messages are
302 // very long; a buffer size of 120 code units is ample.
303 //
304 // so we assume that 256 will be large enough to avoid truncated
305 // messages.
306
307 PCRE2_UCHAR err [256];
308 pcre2_get_error_message (errnumber, err, sizeof (err));
309 (*current_liboctave_error_handler)
310 ("%s: %s at position %zu of expression", m_who.c_str (), err,
311 erroffset);
312 }
313#else
314 const char *err;
315 int erroffset;
316
317 m_code = pcre_compile (buf_str.c_str (), pcre_options,
318 &err, &erroffset, nullptr);
319
320 if (! m_code)
321 (*current_liboctave_error_handler)
322 ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
323#endif
324}
325
327regexp::match (const std::string& buffer) const
328{
329 // check if input is valid utf-8
330 const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ());
331 if (octave_u8_check_wrapper (buf_str, buffer.length ()))
333 ("%s: the input string is invalid UTF-8", m_who.c_str ());
334
335 regexp::match_data retval;
336
337 std::list<regexp::match_element> lst;
338
339 int subpatterns;
340 int namecount;
341 int nameentrysize;
342 char *nametable;
343 std::size_t idx = 0;
344
345 octave_pcre_code *re = static_cast<octave_pcre_code *> (m_code);
346
347 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
348 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
349 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
350 octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
351
352#if defined (HAVE_PCRE)
353 OCTAVE_LOCAL_BUFFER (OCTAVE_PCRE_SIZE, ovector, (subpatterns+1)*3);
354#endif
355
356 OCTAVE_LOCAL_BUFFER (int, nidx, namecount);
357
358 for (int i = 0; i < namecount; i++)
359 {
360 // Index of subpattern in first two bytes of name (MSB first).
361 // Extract index.
362 nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8
363 | static_cast<int> (nametable[i*nameentrysize+1]);
364 }
365
366 while (true)
367 {
368 octave_quit ();
369
370#if defined (HAVE_PCRE2)
371 pcre2_match_data *tmp_match_data
372 = pcre2_match_data_create_from_pattern (re, nullptr);
373
374 unwind_action cleanup_match_data ([tmp_match_data] () { pcre2_match_data_free (tmp_match_data); });
375
376 int matches = pcre2_match (re, reinterpret_cast<PCRE2_SPTR> (buffer.c_str ()),
377 buffer.length (), idx,
378 PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
379 tmp_match_data, nullptr);
380
381 if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
382 (*current_liboctave_error_handler)
383 ("%s: internal error calling pcre2_match; "
384 "error code from pcre2_match is %i", m_who.c_str (), matches);
385
386 if (matches == PCRE2_ERROR_NOMATCH)
387 break;
388
389 OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (tmp_match_data);
390#else
391 int matches = pcre_exec (re, nullptr, buffer.c_str (),
392 buffer.length (), idx,
393 PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
394 ovector, (subpatterns+1)*3);
395
396 if (matches == PCRE_ERROR_MATCHLIMIT)
397 {
398 // Try harder; start with default value for MATCH_LIMIT
399 // and increase it.
400 (*current_liboctave_warning_with_id_handler)
401 ("Octave:regexp-match-limit",
402 "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
403
404 pcre_extra pe;
405
406 pcre_config (PCRE_CONFIG_MATCH_LIMIT,
407 static_cast<void *> (&pe.match_limit));
408
409 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
410
411 int i = 0;
412 while (matches == PCRE_ERROR_MATCHLIMIT
413 && i++ < PCRE_MATCHLIMIT_MAX)
414 {
415 octave_quit ();
416
417 pe.match_limit *= 10;
418 matches = pcre_exec (re, &pe, buffer.c_str (),
419 buffer.length (), idx,
420 PCRE_NO_UTF8_CHECK
421 | (idx ? PCRE_NOTBOL : 0),
422 ovector, (subpatterns+1)*3);
423 }
424 }
425
426 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
427 (*current_liboctave_error_handler)
428 ("%s: internal error calling pcre_exec; "
429 "error code from pcre_exec is %i", m_who.c_str (), matches);
430
431 if (matches == PCRE_ERROR_NOMATCH)
432 break;
433#endif
434 if (ovector[0] >= ovector[1] && ! m_options.emptymatch ())
435 {
436 // Zero length match. Skip to next char.
437 idx = ovector[0] + 1;
438 if (idx < buffer.length ())
439 continue;
440 else
441 break;
442 }
443 else
444 {
445 int pos_match = 0;
446 Matrix token_extents (matches-1, 2);
447
448 for (int i = 1; i < matches; i++)
449 {
450#if defined (HAVE_PCRE2)
451 if (ovector[2*i] != PCRE2_SIZE_MAX
452#else
453 if (ovector[2*i] >= 0
454#endif
455 && ovector[2*i+1] > 0
456 && (i == 1 || ovector[2*i] != ovector[2*i-2]
457 || ovector[2*i-1] != ovector[2*i+1]))
458 {
459 token_extents(pos_match, 0) = double (ovector[2*i]+1);
460 token_extents(pos_match++, 1) = double (ovector[2*i+1]);
461 }
462 }
463
464 token_extents.resize (pos_match, 2);
465
466 OCTAVE_PCRE_SIZE start = ovector[0] + 1;
467 OCTAVE_PCRE_SIZE end = ovector[1];
468
469#if defined (HAVE_PCRE2)
470 // Must use explicit length constructor as match can contain '\0'.
471 std::string match_string = std::string (buffer.c_str() + start - 1,
472 end - start + 1);
473#else
474 const char **listptr;
475 int status = pcre_get_substring_list (buffer.c_str (), ovector,
476 matches, &listptr);
477
478 if (status == PCRE_ERROR_NOMEMORY)
479 (*current_liboctave_error_handler)
480 ("%s: cannot allocate memory in pcre_get_substring_list",
481 m_who.c_str ());
482
483 // Must use explicit length constructor as match can contain '\0'.
484 std::string match_string = std::string (*listptr, end - start + 1);
485#endif
486
487 string_vector tokens (pos_match);
488 string_vector named_tokens (m_names);
489#if ! defined (HAVE_PCRE2)
490 int pos_offset = 0;
491#endif
492 pos_match = 0;
493
494 for (int i = 1; i < matches; i++)
495 {
496#if defined (HAVE_PCRE2)
497 if (ovector[2*i] != PCRE2_SIZE_MAX
498#else
499 if (ovector[2*i] >= 0
500#endif
501 && ovector[2*i+1] > 0)
502 {
503 if (i == 1 || ovector[2*i] != ovector[2*i-2]
504 || ovector[2*i-1] != ovector[2*i+1])
505 {
506 if (namecount > 0)
507 {
508 // FIXME: Should probably do this with a map()
509 // rather than a linear search. However,
510 // the number of captured, named expressions
511 // is usually pretty small (< 4)
512 for (int j = 0; j < namecount; j++)
513 {
514 if (nidx[j] == i)
515 {
516 std::size_t len = ovector[2*i+1] - ovector[2*i];
517 named_tokens(m_named_idx(j))
518#if defined (HAVE_PCRE2)
519 = std::string (buffer.c_str () + ovector[2*i], len);
520#else
521 = std::string (*(listptr+i-pos_offset), len);
522#endif
523 break;
524 }
525 }
526 }
527
528 std::size_t len = ovector[2*i+1] - ovector[2*i];
529#if defined (HAVE_PCRE2)
530 tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i], len);
531#else
532 tokens(pos_match++) = std::string (*(listptr+i), len);
533#endif
534 }
535#if ! defined (HAVE_PCRE2)
536 else
537 pos_offset++;
538#endif
539 }
540 }
541
542#if ! defined (HAVE_PCRE2)
543 pcre_free_substring_list (listptr);
544#endif
545
546 // FIXME: MATCH_ELEMENT uses double values for these,
547 // presumably because that is what the Octave interpreter
548 // uses. Should we check that the values don't exceed
549 // flintmax here? It seems unlikely that it would happen,
550 // but...
551
552 double dstart = static_cast<double> (start);
553 double dend = static_cast<double> (end);
554
555 regexp::match_element new_elem (named_tokens, tokens, match_string,
556 token_extents,
557 dstart, dend);
558
559 lst.push_back (new_elem);
560
561 if (ovector[1] <= ovector[0])
562 {
563 // Zero length match. Skip to next char.
564 idx = ovector[0] + 1;
565 if (idx <= buffer.length ())
566 continue;
567 }
568 else
569 idx = ovector[1];
570
571 if (m_options.once () || idx >= buffer.length ())
572 break;
573 }
574 }
575
576 retval = regexp::match_data (lst, m_named_pats);
577
578 return retval;
579}
580
581bool
582regexp::is_match (const std::string& buffer) const
583{
584 regexp::match_data rx_lst = match (buffer);
585
586 return rx_lst.size () > 0;
587}
588
590regexp::is_match (const string_vector& buffer) const
591{
592 octave_idx_type len = buffer.numel ();
593
594 Array<bool> retval (dim_vector (len, 1));
595
596 for (octave_idx_type i = 0; i < buffer.numel (); i++)
597 retval(i) = is_match (buffer(i));
598
599 return retval;
600}
601
602// Declare rep_token_t used in processing replacement string
603struct rep_token_t
604{
605 std::size_t pos;
606 int num;
607};
608
609std::string
610regexp::replace (const std::string& buffer,
611 const std::string& replacement) const
612{
613 std::string retval;
614
615 const regexp::match_data rx_lst = match (buffer);
616
617 std::size_t num_matches = rx_lst.size ();
618
619 if (num_matches == 0)
620 {
621 retval = buffer;
622 return retval;
623 }
624
625 // Identify replacement tokens; build a vector of group numbers in
626 // the replacement string so that we can quickly calculate the size
627 // of the replacement.
628
629 // FIXME: All code assumes that only 10 tokens ($0-$9) exist.
630 // $11 represents $1 followed by the character '1' rather than
631 // the eleventh capture buffer.
632
633 std::string repstr = replacement;
634 std::vector<rep_token_t> tokens;
635 tokens.reserve (5); // Reserve memory for 5 pattern replacements
636
637 for (std::size_t i=0; i < repstr.size (); i++)
638 {
639 if (repstr[i] == '\\')
640 {
641 if (i < repstr.size () - 1 && repstr[i+1] == '$')
642 {
643 repstr.erase (i, 1); // erase backslash
644 i++; // skip over '$'
645 continue;
646 }
647 if (i < repstr.size () - 1 && repstr[i+1] == '\\')
648 {
649 repstr.erase (i, 1); // erase 1st backslash
650 continue;
651 }
652 }
653 else if (repstr[i] == '$')
654 {
655 if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
656 {
657 rep_token_t tmp_token;
658
659 tmp_token.pos = i;
660 tmp_token.num = repstr[i+1]-'0';
661 tokens.push_back (tmp_token);
662 }
663 }
664 }
665
666 std::string rep;
667 int num_tokens = tokens.size ();
668
669 if (num_tokens > 0)
670 {
671 // Determine replacement length
672 const std::size_t replen = repstr.size () - 2*num_tokens;
673 int delta = 0;
674 auto p = rx_lst.begin ();
675 for (std::size_t i = 0; i < num_matches; i++)
676 {
677 octave_quit ();
678
679 double start = p->start ();
680 double end = p->end ();
681
682 const Matrix pairs (p->token_extents ());
683 std::size_t pairlen = 0;
684 for (int j = 0; j < num_tokens; j++)
685 {
686 if (tokens[j].num == 0)
687 pairlen += static_cast<std::size_t> (end - start + 1);
688 else if (tokens[j].num <= pairs.rows ())
689 pairlen += static_cast<std::size_t> (pairs(tokens[j].num-1, 1)
690 - pairs(tokens[j].num-1, 0)
691 + 1);
692 }
693 delta += (static_cast<int> (replen + pairlen)
694 - static_cast<int> (end - start + 1));
695 p++;
696 }
697
698 // Build replacement string
699 rep.reserve (buffer.size () + delta);
700 std::size_t from = 0;
701 p = rx_lst.begin ();
702 for (std::size_t i = 0; i < num_matches; i++)
703 {
704 octave_quit ();
705
706 double start = p->start ();
707 double end = p->end ();
708
709 const Matrix pairs (p->token_extents ());
710 rep.append (&buffer[from], static_cast<std::size_t> (start - 1 - from));
711 from = static_cast<std::size_t> (end);
712
713 std::size_t cur_pos = 0;
714
715 for (int j = 0; j < num_tokens; j++)
716 {
717 rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
718 cur_pos = tokens[j].pos+2;
719
720 int k = tokens[j].num;
721 if (k == 0)
722 {
723 // replace with entire match
724 rep.append (&buffer[static_cast<std::size_t> (end - 1)],
725 static_cast<std::size_t> (end - start + 1));
726 }
727 else if (k <= pairs.rows ())
728 {
729 // replace with group capture
730 rep.append (&buffer[static_cast<std::size_t> (pairs(k-1, 0)-1)],
731 static_cast<std::size_t> (pairs(k-1, 1)
732 - pairs(k-1, 0) + 1));
733 }
734 else
735 {
736 // replace with nothing
737 }
738 }
739 if (cur_pos < repstr.size ())
740 rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
741
742 p++;
743 }
744 rep.append (&buffer[from], buffer.size () - from);
745 }
746 else
747 {
748 // Determine repstr length
749 const std::size_t replen = repstr.size ();
750 int delta = 0;
751 auto p = rx_lst.begin ();
752 for (std::size_t i = 0; i < num_matches; i++)
753 {
754 octave_quit ();
755
756 delta += static_cast<int> (replen)
757 - static_cast<int> (p->end () - p->start () + 1);
758 p++;
759 }
760
761 // Build replacement string
762 rep.reserve (buffer.size () + delta);
763 std::size_t from = 0;
764 p = rx_lst.begin ();
765 for (std::size_t i = 0; i < num_matches; i++)
766 {
767 octave_quit ();
768
769 rep.append (&buffer[from],
770 static_cast<std::size_t> (p->start () - 1 - from));
771 from = static_cast<std::size_t> (p->end ());
772 rep.append (repstr);
773 p++;
774 }
775 rep.append (&buffer[from], buffer.size () - from);
776 }
777
778 retval = rep;
779 return retval;
780}
781
782OCTAVE_END_NAMESPACE(octave)
N Dimensional Array with copy-on-write semantics.
Definition Array.h:130
octave_idx_type rows() const
Definition Array.h:463
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Definition dMatrix.h:156
Vector representing the dimensions (size) of an Array.
Definition dim-vector.h:90
void dotexceptnewline(bool val)
Definition lo-regexp.h:137
void lineanchors(bool val)
Definition lo-regexp.h:140
void case_insensitive(bool val)
Definition lo-regexp.h:136
void freespacing(bool val)
Definition lo-regexp.h:139
void emptymatch(bool val)
Definition lo-regexp.h:138
void once(bool val)
Definition lo-regexp.h:141
std::string replace(const std::string &buffer, const std::string &replacement) const
Definition lo-regexp.cc:610
bool is_match(const std::string &buffer) const
Definition lo-regexp.cc:582
match_data match(const std::string &buffer) const
Definition lo-regexp.cc:327
string_vector & append(const std::string &s)
Definition str-vec.cc:110
octave_idx_type numel() const
Definition str-vec.h:98
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition lo-error.c:41
#define MAXLOOKBEHIND
Definition lo-regexp.cc:106
#define PCRE_MATCHLIMIT_MAX
Definition lo-regexp.cc:103
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition xerbla.cc:61