GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lo-regexp.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2002-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <list>
31 #include <sstream>
32 #include <string>
33 #include <vector>
34 
35 #if defined (HAVE_PCRE2)
36 # define PCRE2_CODE_UNIT_WIDTH 8
37 # if defined (HAVE_PCRE2_H)
38 # include <pcre2.h>
39 # elif defined (HAVE_PCRE2_PCRE2_H)
40 # include <pcre2/pcre2.h>
41 # endif
42 #elif defined (HAVE_PCRE)
43 # if defined (HAVE_PCRE_H)
44 # include <pcre.h>
45 # elif defined (HAVE_PCRE_PCRE_H)
46 # include <pcre/pcre.h>
47 # endif
48 #endif
49 
50 #include "Matrix.h"
51 #include "base-list.h"
52 #include "lo-error.h"
53 #include "oct-locbuf.h"
54 #include "quit.h"
55 #include "lo-regexp.h"
56 #include "str-vec.h"
57 #include "unistr-wrappers.h"
58 #include "unwind-prot.h"
59 
60 #if defined (HAVE_PCRE2)
61 typedef pcre2_code octave_pcre_code;
62 typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
63 void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
64 # define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
65 # define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
66 # define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
67 # define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
68 # define OCTAVE_PCRE_UTF PCRE2_UTF
69 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
70 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
71 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
72 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
73 #elif defined (HAVE_PCRE)
74 typedef pcre octave_pcre_code;
75 typedef int OCTAVE_PCRE_SIZE;
76 void (*octave_pcre_code_free) (void *) = pcre_free;
77 # define OCTAVE_PCRE_CASELESS PCRE_CASELESS
78 # define OCTAVE_PCRE_DOTALL PCRE_DOTALL
79 # define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
80 # define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
81 # define OCTAVE_PCRE_UTF PCRE_UTF8
82 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
83 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
84 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
85 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
86 #else
87 # error "PCRE2 or PCRE library is required to build Octave"
88 #endif
89 
90 static inline int
91 octave_pcre_pattern_info (const octave_pcre_code *code, int what, void *where)
92 {
93 #if defined (HAVE_PCRE2)
94  return pcre2_pattern_info (code, what, where);
95 #else
96  return pcre_fullinfo (code, nullptr, what, where);
97 #endif
98 }
99 
101 
102 // Define the maximum number of retries for a pattern
103 // that possibly results in an infinite recursion.
104 #define PCRE_MATCHLIMIT_MAX 10
105 
106 // FIXME: should this be configurable?
107 #define MAXLOOKBEHIND 10
108 
109 static bool lookbehind_warned = false;
110 
111 // FIXME: don't bother collecting and composing return values
112 // the user doesn't want.
113 
114 void
115 regexp::free ()
116 {
117  octave_pcre_code_free (static_cast<octave_pcre_code *> (m_code));
118 }
119 
120 void
121 regexp::compile_internal ()
122 {
123  // If we had a previously compiled pattern, release it.
124  free ();
125 
126  std::size_t max_length = MAXLOOKBEHIND;
127 
128  std::size_t pos = 0;
129  std::size_t new_pos;
130  int inames = 0;
131  std::ostringstream buf;
132 
133  while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
134  {
135  std::size_t tmp_pos;
136  if (m_pattern.size () > new_pos + 2
137  && m_pattern.at (new_pos + 2) == '<'
138  && ! (m_pattern.size () > new_pos + 3
139  && (m_pattern.at (new_pos + 3) == '='
140  || m_pattern.at (new_pos + 3) == '!'))
141  && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
142  != std::string::npos
143  && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
144  {
145  // The syntax of named tokens in pcre is "(?P<name>...)" while
146  // we need a syntax "(?<name>...)", so fix that here. Also an
147  // expression like
148  // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
149  // should be perfectly legal, while pcre does not allow the same
150  // named token name on both sides of the alternative. Also fix
151  // that here by replacing name tokens by dummy names, and dealing
152  // with the dummy names later.
153 
154  std::string tmp_name
155  = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
156 
157  bool found = false;
158 
159  for (int i = 0; i < m_names; i++)
160  {
161  if (m_named_pats(i) == tmp_name)
162  {
163  m_named_idx.resize (dim_vector (inames+1, 1));
164  m_named_idx(inames) = i;
165  found = true;
166  break;
167  }
168  }
169 
170  if (! found)
171  {
172  m_named_idx.resize (dim_vector (inames+1, 1));
173  m_named_idx(inames) = m_names;
174  m_named_pats.append (tmp_name);
175  m_names++;
176  }
177 
178  if (new_pos - pos > 0)
179  buf << m_pattern.substr (pos, new_pos-pos);
180  if (inames < 10)
181  buf << "(?P<n00" << inames++;
182  else if (inames < 100)
183  buf << "(?P<n0" << inames++;
184  else
185  buf << "(?P<n" << inames++;
186 
187  pos = tmp_pos;
188  }
189  else if (m_pattern.size () > new_pos + 2
190  && m_pattern.at (new_pos + 2) == '<')
191  {
192  // Find lookbehind operators of arbitrary length (ie like
193  // "(?<=[a-z]*)") and replace with a maximum length operator
194  // as PCRE can not yet handle arbitrary length lookahead
195  // operators. Use the string length as the maximum length to
196  // avoid issues.
197 
198  int brackets = 1;
199  std::size_t tmp_pos1 = new_pos + 2;
200  std::size_t tmp_pos2 = tmp_pos1;
201 
202  while (tmp_pos1 < m_pattern.length () && brackets > 0)
203  {
204  char ch = m_pattern.at (tmp_pos1);
205 
206  if (ch == '(')
207  brackets++;
208  else if (ch == ')')
209  {
210  if (brackets > 1)
211  tmp_pos2 = tmp_pos1;
212 
213  brackets--;
214  }
215 
216  tmp_pos1++;
217  }
218 
219  if (brackets != 0)
220  {
221  buf << m_pattern.substr (pos, new_pos - pos) << "(?";
222  pos = new_pos + 2;
223  }
224  else
225  {
226  std::size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2);
227 
228  if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
229  {
230  if (! lookbehind_warned)
231  {
232  lookbehind_warned = true;
233  (*current_liboctave_warning_with_id_handler)
234  ("Octave:regexp-lookbehind-limit",
235  "%s: arbitrary length lookbehind patterns are only supported up to length %d",
236  m_who.c_str (), MAXLOOKBEHIND);
237  }
238 
239  buf << m_pattern.substr (pos, new_pos - pos) << '(';
240 
241  std::size_t i;
242 
243  if (m_pattern.at (tmp_pos3) == '*')
244  i = 0;
245  else
246  i = 1;
247 
248  for (; i < max_length + 1; i++)
249  {
250  buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
251  << '{' << i << '}';
252  buf << m_pattern.substr (tmp_pos3 + 1,
253  tmp_pos1 - tmp_pos3 - 1);
254  if (i != max_length)
255  buf << '|';
256  }
257  buf << ')';
258  }
259  else
260  buf << m_pattern.substr (pos, tmp_pos1 - pos);
261 
262  pos = tmp_pos1;
263  }
264  }
265  else
266  {
267  buf << m_pattern.substr (pos, new_pos - pos) << "(?";
268  pos = new_pos + 2;
269  }
270 
271  }
272 
273  buf << m_pattern.substr (pos);
274 
275  // Replace NULLs with escape sequence because conversion function c_str()
276  // will terminate string early at embedded NULLs.
277  std::string buf_str = buf.str ();
278  while ((pos = buf_str.find ('\0')) != std::string::npos)
279  buf_str.replace (pos, 1, "\\000");
280 
281  int pcre_options
282  = ( (m_options.case_insensitive () ? OCTAVE_PCRE_CASELESS : 0)
283  | (m_options.dotexceptnewline () ? 0 : OCTAVE_PCRE_DOTALL)
284  | (m_options.lineanchors () ? OCTAVE_PCRE_MULTILINE : 0)
285  | (m_options.freespacing () ? OCTAVE_PCRE_EXTENDED : 0)
286  | OCTAVE_PCRE_UTF);
287 
288 #if defined (HAVE_PCRE2)
289  PCRE2_SIZE erroffset;
290  int errnumber;
291 
292  m_code = pcre2_compile (reinterpret_cast<PCRE2_SPTR> (buf_str.c_str ()),
293  PCRE2_ZERO_TERMINATED, pcre_options,
294  &errnumber, &erroffset, nullptr);
295 
296  if (! m_code)
297  {
298  // PCRE docs say:
299  //
300  // If the buffer is too small, the message is truncated (but
301  // still with a trailing zero), and the negative error code
302  // PCRE2_ERROR_NOMEMORY is returned. None of the messages are
303  // very long; a buffer size of 120 code units is ample.
304  //
305  // so we assume that 256 will be large enough to avoid truncated
306  // messages.
307 
308  PCRE2_UCHAR err [256];
309  pcre2_get_error_message (errnumber, err, sizeof (err));
310  (*current_liboctave_error_handler)
311  ("%s: %s at position %zu of expression", m_who.c_str (), err,
312  erroffset);
313  }
314 #else
315  const char *err;
316  int erroffset;
317 
318  m_code = pcre_compile (buf_str.c_str (), pcre_options,
319  &err, &erroffset, nullptr);
320 
321  if (! m_code)
322  (*current_liboctave_error_handler)
323  ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
324 #endif
325 }
326 
328 regexp::match (const std::string& buffer) const
329 {
330  // check if input is valid utf-8
331  const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ());
332  if (octave_u8_check_wrapper (buf_str, buffer.length ()))
334  ("%s: the input string is invalid UTF-8", m_who.c_str ());
335 
336  regexp::match_data retval;
337 
338  std::list<regexp::match_element> lst;
339 
340  int subpatterns;
341  int namecount;
342  int nameentrysize;
343  char *nametable;
344  std::size_t idx = 0;
345 
346  octave_pcre_code *re = static_cast<octave_pcre_code *> (m_code);
347 
348  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
349  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
350  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
351  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
352 
353 #if defined (HAVE_PCRE)
354  OCTAVE_LOCAL_BUFFER (OCTAVE_PCRE_SIZE, ovector, (subpatterns+1)*3);
355 #endif
356 
357  OCTAVE_LOCAL_BUFFER (int, nidx, namecount);
358 
359  for (int i = 0; i < namecount; i++)
360  {
361  // Index of subpattern in first two bytes of name (MSB first).
362  // Extract index.
363  nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8
364  | static_cast<int> (nametable[i*nameentrysize+1]);
365  }
366 
367  while (true)
368  {
369  octave_quit ();
370 
371 #if defined (HAVE_PCRE2)
372  pcre2_match_data *m_data
373  = pcre2_match_data_create_from_pattern (re, nullptr);
374 
375  unwind_action cleanup_match_data
376  ([=] () { pcre2_match_data_free (m_data); });
377 
378  int matches = pcre2_match (re, reinterpret_cast<PCRE2_SPTR> (buffer.c_str ()),
379  buffer.length (), idx,
380  PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
381  m_data, nullptr);
382 
383  if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
384  (*current_liboctave_error_handler)
385  ("%s: internal error calling pcre2_match; "
386  "error code from pcre2_match is %i", m_who.c_str (), matches);
387 
388  if (matches == PCRE2_ERROR_NOMATCH)
389  break;
390 
391  OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (m_data);
392 #else
393  int matches = pcre_exec (re, nullptr, buffer.c_str (),
394  buffer.length (), idx,
395  PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
396  ovector, (subpatterns+1)*3);
397 
398  if (matches == PCRE_ERROR_MATCHLIMIT)
399  {
400  // Try harder; start with default value for MATCH_LIMIT
401  // and increase it.
402  (*current_liboctave_warning_with_id_handler)
403  ("Octave:regexp-match-limit",
404  "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
405 
406  pcre_extra pe;
407 
408  pcre_config (PCRE_CONFIG_MATCH_LIMIT,
409  static_cast<void *> (&pe.match_limit));
410 
411  pe.flags = PCRE_EXTRA_MATCH_LIMIT;
412 
413  int i = 0;
414  while (matches == PCRE_ERROR_MATCHLIMIT
415  && i++ < PCRE_MATCHLIMIT_MAX)
416  {
417  octave_quit ();
418 
419  pe.match_limit *= 10;
420  matches = pcre_exec (re, &pe, buffer.c_str (),
421  buffer.length (), idx,
422  PCRE_NO_UTF8_CHECK
423  | (idx ? PCRE_NOTBOL : 0),
424  ovector, (subpatterns+1)*3);
425  }
426  }
427 
428  if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
429  (*current_liboctave_error_handler)
430  ("%s: internal error calling pcre_exec; "
431  "error code from pcre_exec is %i", m_who.c_str (), matches);
432 
433  if (matches == PCRE_ERROR_NOMATCH)
434  break;
435 #endif
436  if (ovector[0] >= ovector[1] && ! m_options.emptymatch ())
437  {
438  // Zero length match. Skip to next char.
439  idx = ovector[0] + 1;
440  if (idx < buffer.length ())
441  continue;
442  else
443  break;
444  }
445  else
446  {
447  int pos_match = 0;
448  Matrix token_extents (matches-1, 2);
449 
450  for (int i = 1; i < matches; i++)
451  {
452 #if defined (HAVE_PCRE2)
453  if (ovector[2*i] != PCRE2_SIZE_MAX
454 #else
455  if (ovector[2*i] >= 0
456 #endif
457  && ovector[2*i+1] > 0
458  && (i == 1 || ovector[2*i] != ovector[2*i-2]
459  || ovector[2*i-1] != ovector[2*i+1]))
460  {
461  token_extents(pos_match, 0) = double (ovector[2*i]+1);
462  token_extents(pos_match++, 1) = double (ovector[2*i+1]);
463  }
464  }
465 
466  token_extents.resize (pos_match, 2);
467 
468  OCTAVE_PCRE_SIZE start = ovector[0] + 1;
469  OCTAVE_PCRE_SIZE end = ovector[1];
470 
471 #if defined (HAVE_PCRE2)
472  // Must use explicit length constructor as match can contain '\0'.
473  std::string match_string = std::string (buffer.c_str() + start - 1,
474  end - start + 1);
475 #else
476  const char **listptr;
477  int status = pcre_get_substring_list (buffer.c_str (), ovector,
478  matches, &listptr);
479 
480  if (status == PCRE_ERROR_NOMEMORY)
481  (*current_liboctave_error_handler)
482  ("%s: cannot allocate memory in pcre_get_substring_list",
483  m_who.c_str ());
484 
485  // Must use explicit length constructor as match can contain '\0'.
486  std::string match_string = std::string (*listptr, end - start + 1);
487 #endif
488 
489  string_vector tokens (pos_match);
490  string_vector named_tokens (m_names);
491 #if ! defined (HAVE_PCRE2)
492  int pos_offset = 0;
493 #endif
494  pos_match = 0;
495 
496  for (int i = 1; i < matches; i++)
497  {
498 #if defined (HAVE_PCRE2)
499  if (ovector[2*i] != PCRE2_SIZE_MAX
500 #else
501  if (ovector[2*i] >= 0
502 #endif
503  && ovector[2*i+1] > 0)
504  {
505  if (i == 1 || ovector[2*i] != ovector[2*i-2]
506  || ovector[2*i-1] != ovector[2*i+1])
507  {
508  if (namecount > 0)
509  {
510  // FIXME: Should probably do this with a map()
511  // rather than a linear search. However,
512  // the number of captured, named expressions
513  // is usually pretty small (< 4)
514  for (int j = 0; j < namecount; j++)
515  {
516  if (nidx[j] == i)
517  {
518  std::size_t len = ovector[2*i+1] - ovector[2*i];
519  named_tokens(m_named_idx(j))
520 #if defined (HAVE_PCRE2)
521  = std::string (buffer.c_str () + ovector[2*i], len);
522 #else
523  = std::string (*(listptr+i-pos_offset), len);
524 #endif
525  break;
526  }
527  }
528  }
529 
530  std::size_t len = ovector[2*i+1] - ovector[2*i];
531 #if defined (HAVE_PCRE2)
532  tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i], len);
533 #else
534  tokens(pos_match++) = std::string (*(listptr+i), len);
535 #endif
536  }
537 #if ! defined (HAVE_PCRE2)
538  else
539  pos_offset++;
540 #endif
541  }
542  }
543 
544 #if ! defined (HAVE_PCRE2)
545  pcre_free_substring_list (listptr);
546 #endif
547 
548  // FIXME: MATCH_ELEMENT uses double values for these,
549  // presumably because that is what the Octave interpreter
550  // uses. Should we check that the values don't exceed
551  // flintmax here? It seems unlikely that it would happen,
552  // but...
553 
554  double dstart = static_cast<double> (start);
555  double dend = static_cast<double> (end);
556 
557  regexp::match_element new_elem (named_tokens, tokens, match_string,
558  token_extents,
559  dstart, dend);
560 
561  lst.push_back (new_elem);
562 
563  if (ovector[1] <= ovector[0])
564  {
565  // Zero length match. Skip to next char.
566  idx = ovector[0] + 1;
567  if (idx <= buffer.length ())
568  continue;
569  }
570  else
571  idx = ovector[1];
572 
573  if (m_options.once () || idx >= buffer.length ())
574  break;
575  }
576  }
577 
578  retval = regexp::match_data (lst, m_named_pats);
579 
580  return retval;
581 }
582 
583 bool
584 regexp::is_match (const std::string& buffer) const
585 {
586  regexp::match_data rx_lst = match (buffer);
587 
588  return rx_lst.size () > 0;
589 }
590 
592 regexp::is_match (const string_vector& buffer) const
593 {
594  octave_idx_type len = buffer.numel ();
595 
596  Array<bool> retval (dim_vector (len, 1));
597 
598  for (octave_idx_type i = 0; i < buffer.numel (); i++)
599  retval(i) = is_match (buffer(i));
600 
601  return retval;
602 }
603 
604 // Declare rep_token_t used in processing replacement string
605 struct rep_token_t
606 {
607  std::size_t pos;
608  int num;
609 };
610 
611 std::string
612 regexp::replace (const std::string& buffer,
613  const std::string& replacement) const
614 {
615  std::string retval;
616 
617  const regexp::match_data rx_lst = match (buffer);
618 
619  std::size_t num_matches = rx_lst.size ();
620 
621  if (num_matches == 0)
622  {
623  retval = buffer;
624  return retval;
625  }
626 
627  // Identify replacement tokens; build a vector of group numbers in
628  // the replacement string so that we can quickly calculate the size
629  // of the replacement.
630 
631  // FIXME: All code assumes that only 10 tokens ($0-$9) exist.
632  // $11 represents $1 followed by the character '1' rather than
633  // the eleventh capture buffer.
634 
635  std::string repstr = replacement;
636  std::vector<rep_token_t> tokens;
637  tokens.reserve (5); // Reserve memory for 5 pattern replacements
638 
639  for (std::size_t i=0; i < repstr.size (); i++)
640  {
641  if (repstr[i] == '\\')
642  {
643  if (i < repstr.size () - 1 && repstr[i+1] == '$')
644  {
645  repstr.erase (i, 1); // erase backslash
646  i++; // skip over '$'
647  continue;
648  }
649  if (i < repstr.size () - 1 && repstr[i+1] == '\\')
650  {
651  repstr.erase (i, 1); // erase 1st backslash
652  continue;
653  }
654  }
655  else if (repstr[i] == '$')
656  {
657  if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
658  {
659  rep_token_t tmp_token;
660 
661  tmp_token.pos = i;
662  tmp_token.num = repstr[i+1]-'0';
663  tokens.push_back (tmp_token);
664  }
665  }
666  }
667 
668  std::string rep;
669  int num_tokens = tokens.size ();
670 
671  if (num_tokens > 0)
672  {
673  // Determine replacement length
674  const std::size_t replen = repstr.size () - 2*num_tokens;
675  int delta = 0;
676  auto p = rx_lst.begin ();
677  for (std::size_t i = 0; i < num_matches; i++)
678  {
679  octave_quit ();
680 
681  double start = p->start ();
682  double end = p->end ();
683 
684  const Matrix pairs (p->token_extents ());
685  std::size_t pairlen = 0;
686  for (int j = 0; j < num_tokens; j++)
687  {
688  if (tokens[j].num == 0)
689  pairlen += static_cast<std::size_t> (end - start + 1);
690  else if (tokens[j].num <= pairs.rows ())
691  pairlen += static_cast<std::size_t> (pairs(tokens[j].num-1, 1)
692  - pairs(tokens[j].num-1, 0)
693  + 1);
694  }
695  delta += (static_cast<int> (replen + pairlen)
696  - static_cast<int> (end - start + 1));
697  p++;
698  }
699 
700  // Build replacement string
701  rep.reserve (buffer.size () + delta);
702  std::size_t from = 0;
703  p = rx_lst.begin ();
704  for (std::size_t i = 0; i < num_matches; i++)
705  {
706  octave_quit ();
707 
708  double start = p->start ();
709  double end = p->end ();
710 
711  const Matrix pairs (p->token_extents ());
712  rep.append (&buffer[from], static_cast<std::size_t> (start - 1 - from));
713  from = static_cast<std::size_t> (end);
714 
715  std::size_t cur_pos = 0;
716 
717  for (int j = 0; j < num_tokens; j++)
718  {
719  rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
720  cur_pos = tokens[j].pos+2;
721 
722  int k = tokens[j].num;
723  if (k == 0)
724  {
725  // replace with entire match
726  rep.append (&buffer[static_cast<std::size_t> (end - 1)],
727  static_cast<std::size_t> (end - start + 1));
728  }
729  else if (k <= pairs.rows ())
730  {
731  // replace with group capture
732  rep.append (&buffer[static_cast<std::size_t> (pairs(k-1, 0)-1)],
733  static_cast<std::size_t> (pairs(k-1, 1)
734  - pairs(k-1, 0) + 1));
735  }
736  else
737  {
738  // replace with nothing
739  }
740  }
741  if (cur_pos < repstr.size ())
742  rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
743 
744  p++;
745  }
746  rep.append (&buffer[from], buffer.size () - from);
747  }
748  else
749  {
750  // Determine repstr length
751  const std::size_t replen = repstr.size ();
752  int delta = 0;
753  auto p = rx_lst.begin ();
754  for (std::size_t i = 0; i < num_matches; i++)
755  {
756  octave_quit ();
757 
758  delta += static_cast<int> (replen)
759  - static_cast<int> (p->end () - p->start () + 1);
760  p++;
761  }
762 
763  // Build replacement string
764  rep.reserve (buffer.size () + delta);
765  std::size_t from = 0;
766  p = rx_lst.begin ();
767  for (std::size_t i = 0; i < num_matches; i++)
768  {
769  octave_quit ();
770 
771  rep.append (&buffer[from],
772  static_cast<std::size_t> (p->start () - 1 - from));
773  from = static_cast<std::size_t> (p->end ());
774  rep.append (repstr);
775  p++;
776  }
777  rep.append (&buffer[from], buffer.size () - from);
778  }
779 
780  retval = rep;
781  return retval;
782 }
783 
784 OCTAVE_END_NAMESPACE(octave)
octave_idx_type rows() const
Definition: Array.h:459
void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array-base.cc:1023
Definition: dMatrix.h:42
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Definition: dMatrix.h:158
std::size_t size() const
Definition: base-list.h:52
iterator begin()
Definition: base-list.h:65
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
void dotexceptnewline(bool val)
Definition: lo-regexp.h:140
void lineanchors(bool val)
Definition: lo-regexp.h:143
void case_insensitive(bool val)
Definition: lo-regexp.h:139
void freespacing(bool val)
Definition: lo-regexp.h:142
void emptymatch(bool val)
Definition: lo-regexp.h:141
void once(bool val)
Definition: lo-regexp.h:144
std::string replace(const std::string &buffer, const std::string &replacement) const
Definition: lo-regexp.cc:612
bool is_match(const std::string &buffer) const
Definition: lo-regexp.cc:584
match_data match(const std::string &buffer) const
Definition: lo-regexp.cc:328
string_vector & append(const std::string &s)
Definition: str-vec.cc:110
octave_idx_type numel() const
Definition: str-vec.h:100
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition: lo-error.c:41
#define MAXLOOKBEHIND
Definition: lo-regexp.cc:107
#define PCRE_MATCHLIMIT_MAX
Definition: lo-regexp.cc:104
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61