GNU Octave  8.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lo-regexp.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2002-2023 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <list>
31 #include <sstream>
32 #include <string>
33 #include <vector>
34 
35 #if defined (HAVE_PCRE2_H) || defined (HAVE_PCRE2_PCRE2_H)
36 # define PCRE2_CODE_UNIT_WIDTH 8
37 # if defined (HAVE_PCRE2_H)
38 # include <pcre2.h>
39 # elif defined (HAVE_PCRE2_PCRE2_H)
40 # include <pcre2/pcre2.h>
41 # endif
42 #elif defined (HAVE_PCRE_H) || defined (HAVE_PCRE_PCRE_H)
43 # if defined (HAVE_PCRE_H)
44 # include <pcre.h>
45 # elif defined (HAVE_PCRE_PCRE_H)
46 # include <pcre/pcre.h>
47 # endif
48 #endif
49 
50 #include "Matrix.h"
51 #include "base-list.h"
52 #include "lo-error.h"
53 #include "oct-locbuf.h"
54 #include "quit.h"
55 #include "lo-regexp.h"
56 #include "str-vec.h"
57 #include "unistr-wrappers.h"
58 #include "unwind-prot.h"
59 
60 #if defined (HAVE_PCRE2)
61 typedef pcre2_code octave_pcre_code;
62 typedef PCRE2_SIZE OCTAVE_PCRE_SIZE;
63 void (*octave_pcre_code_free) (octave_pcre_code *) = pcre2_code_free;
64 # define OCTAVE_PCRE_CASELESS PCRE2_CASELESS
65 # define OCTAVE_PCRE_DOTALL PCRE2_DOTALL
66 # define OCTAVE_PCRE_MULTILINE PCRE2_MULTILINE
67 # define OCTAVE_PCRE_EXTENDED PCRE2_EXTENDED
68 # define OCTAVE_PCRE_UTF PCRE2_UTF
69 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE2_INFO_CAPTURECOUNT
70 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE2_INFO_NAMECOUNT
71 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE2_INFO_NAMEENTRYSIZE
72 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE2_INFO_NAMETABLE
73 #elif defined (HAVE_PCRE)
74 typedef pcre octave_pcre_code;
75 typedef int OCTAVE_PCRE_SIZE;
76 void (*octave_pcre_code_free) (void *) = pcre_free;
77 # define OCTAVE_PCRE_CASELESS PCRE_CASELESS
78 # define OCTAVE_PCRE_DOTALL PCRE_DOTALL
79 # define OCTAVE_PCRE_MULTILINE PCRE_MULTILINE
80 # define OCTAVE_PCRE_EXTENDED PCRE_EXTENDED
81 # define OCTAVE_PCRE_UTF PCRE_UTF8
82 # define OCTAVE_PCRE_INFO_CAPTURECOUNT PCRE_INFO_CAPTURECOUNT
83 # define OCTAVE_PCRE_INFO_NAMECOUNT PCRE_INFO_NAMECOUNT
84 # define OCTAVE_PCRE_INFO_NAMEENTRYSIZE PCRE_INFO_NAMEENTRYSIZE
85 # define OCTAVE_PCRE_INFO_NAMETABLE PCRE_INFO_NAMETABLE
86 #else
87 # error "PCRE2 or PCRE library is required to build Octave"
88 #endif
89 
90 static inline int
91 octave_pcre_pattern_info (const octave_pcre_code *code, int what, void *where)
92 {
93 #if defined (HAVE_PCRE2)
94  return pcre2_pattern_info (code, what, where);
95 #else
96  return pcre_fullinfo (code, nullptr, what, where);
97 #endif
98 }
99 
101 
102 // Define the maximum number of retries for a pattern
103 // that possibly results in an infinite recursion.
104 #define PCRE_MATCHLIMIT_MAX 10
105 
106 // FIXME: should this be configurable?
107 #define MAXLOOKBEHIND 10
108 
109 static bool lookbehind_warned = false;
110 
111 // FIXME: don't bother collecting and composing return values
112 // the user doesn't want.
113 
114 void
116 {
117  octave_pcre_code_free (static_cast<octave_pcre_code *> (m_code));
118 }
119 
120 void
122 {
123  // If we had a previously compiled pattern, release it.
124  free ();
125 
126  std::size_t max_length = MAXLOOKBEHIND;
127 
128  std::size_t pos = 0;
129  std::size_t new_pos;
130  int inames = 0;
131  std::ostringstream buf;
132 
133  while ((new_pos = m_pattern.find ("(?", pos)) != std::string::npos)
134  {
135  std::size_t tmp_pos;
136  if (m_pattern.size () > new_pos + 2
137  && m_pattern.at (new_pos + 2) == '<'
138  && ! (m_pattern.size () > new_pos + 3
139  && (m_pattern.at (new_pos + 3) == '='
140  || m_pattern.at (new_pos + 3) == '!'))
141  && (tmp_pos = m_pattern.find_first_of ('>', new_pos))
142  != std::string::npos
143  && m_pattern.find_first_of (')', tmp_pos) != std::string::npos)
144  {
145  // The syntax of named tokens in pcre is "(?P<name>...)" while
146  // we need a syntax "(?<name>...)", so fix that here. Also an
147  // expression like
148  // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)"
149  // should be perfectly legal, while pcre does not allow the same
150  // named token name on both sides of the alternative. Also fix
151  // that here by replacing name tokens by dummy names, and dealing
152  // with the dummy names later.
153 
154  std::string tmp_name
155  = m_pattern.substr (new_pos+3, tmp_pos-new_pos-3);
156 
157  bool found = false;
158 
159  for (int i = 0; i < m_names; i++)
160  {
161  if (m_named_pats(i) == tmp_name)
162  {
163  m_named_idx.resize (dim_vector (inames+1, 1));
164  m_named_idx(inames) = i;
165  found = true;
166  break;
167  }
168  }
169 
170  if (! found)
171  {
172  m_named_idx.resize (dim_vector (inames+1, 1));
173  m_named_idx(inames) = m_names;
174  m_named_pats.append (tmp_name);
175  m_names++;
176  }
177 
178  if (new_pos - pos > 0)
179  buf << m_pattern.substr (pos, new_pos-pos);
180  if (inames < 10)
181  buf << "(?P<n00" << inames++;
182  else if (inames < 100)
183  buf << "(?P<n0" << inames++;
184  else
185  buf << "(?P<n" << inames++;
186 
187  pos = tmp_pos;
188  }
189  else if (m_pattern.size () > new_pos + 2
190  && m_pattern.at (new_pos + 2) == '<')
191  {
192  // Find lookbehind operators of arbitrary length (ie like
193  // "(?<=[a-z]*)") and replace with a maximum length operator
194  // as PCRE can not yet handle arbitrary length lookahead
195  // operators. Use the string length as the maximum length to
196  // avoid issues.
197 
198  int brackets = 1;
199  std::size_t tmp_pos1 = new_pos + 2;
200  std::size_t tmp_pos2 = tmp_pos1;
201 
202  while (tmp_pos1 < m_pattern.length () && brackets > 0)
203  {
204  char ch = m_pattern.at (tmp_pos1);
205 
206  if (ch == '(')
207  brackets++;
208  else if (ch == ')')
209  {
210  if (brackets > 1)
211  tmp_pos2 = tmp_pos1;
212 
213  brackets--;
214  }
215 
216  tmp_pos1++;
217  }
218 
219  if (brackets != 0)
220  {
221  buf << m_pattern.substr (pos, new_pos - pos) << "(?";
222  pos = new_pos + 2;
223  }
224  else
225  {
226  std::size_t tmp_pos3 = m_pattern.find_first_of ("*+", tmp_pos2);
227 
228  if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1)
229  {
230  if (! lookbehind_warned)
231  {
232  lookbehind_warned = true;
233  (*current_liboctave_warning_with_id_handler)
234  ("Octave:regexp-lookbehind-limit",
235  "%s: arbitrary length lookbehind patterns are only supported up to length %d",
236  m_who.c_str (), MAXLOOKBEHIND);
237  }
238 
239  buf << m_pattern.substr (pos, new_pos - pos) << '(';
240 
241  std::size_t i;
242 
243  if (m_pattern.at (tmp_pos3) == '*')
244  i = 0;
245  else
246  i = 1;
247 
248  for (; i < max_length + 1; i++)
249  {
250  buf << m_pattern.substr (new_pos, tmp_pos3 - new_pos)
251  << '{' << i << '}';
252  buf << m_pattern.substr (tmp_pos3 + 1,
253  tmp_pos1 - tmp_pos3 - 1);
254  if (i != max_length)
255  buf << '|';
256  }
257  buf << ')';
258  }
259  else
260  buf << m_pattern.substr (pos, tmp_pos1 - pos);
261 
262  pos = tmp_pos1;
263  }
264  }
265  else
266  {
267  buf << m_pattern.substr (pos, new_pos - pos) << "(?";
268  pos = new_pos + 2;
269  }
270 
271  }
272 
273  buf << m_pattern.substr (pos);
274 
275  // Replace NULLs with escape sequence because conversion function c_str()
276  // will terminate string early at embedded NULLs.
277  std::string buf_str = buf.str ();
278  while ((pos = buf_str.find ('\0')) != std::string::npos)
279  buf_str.replace (pos, 1, "\\000");
280 
281  int pcre_options
282  = ( (m_options.case_insensitive () ? OCTAVE_PCRE_CASELESS : 0)
283  | (m_options.dotexceptnewline () ? 0 : OCTAVE_PCRE_DOTALL)
284  | (m_options.lineanchors () ? OCTAVE_PCRE_MULTILINE : 0)
285  | (m_options.freespacing () ? OCTAVE_PCRE_EXTENDED : 0)
286  | OCTAVE_PCRE_UTF);
287 
288 #if defined (HAVE_PCRE2)
289  PCRE2_SIZE erroffset;
290  int errnumber;
291 
292  m_code = pcre2_compile (reinterpret_cast<PCRE2_SPTR> (buf_str.c_str ()),
293  PCRE2_ZERO_TERMINATED, pcre_options,
294  &errnumber, &erroffset, nullptr);
295 
296  if (! m_code)
297  {
298  // PCRE docs say:
299  //
300  // If the buffer is too small, the message is truncated (but
301  // still with a trailing zero), and the negative error code
302  // PCRE2_ERROR_NOMEMORY is returned. None of the messages are
303  // very long; a buffer size of 120 code units is ample.
304  //
305  // so we assume that 256 will be large enough to avoid truncated
306  // messages.
307 
308  PCRE2_UCHAR err [256];
309  pcre2_get_error_message (errnumber, err, sizeof (err));
310  (*current_liboctave_error_handler)
311  ("%s: %s at position %zu of expression", m_who.c_str (), err,
312  erroffset);
313  }
314 #else
315  const char *err;
316  int erroffset;
317 
318  m_code = pcre_compile (buf_str.c_str (), pcre_options,
319  &err, &erroffset, nullptr);
320 
321  if (! m_code)
322  (*current_liboctave_error_handler)
323  ("%s: %s at position %d of expression", m_who.c_str (), err, erroffset);
324 #endif
325 }
326 
328 regexp::match (const std::string& buffer) const
329 {
330  // check if input is valid utf-8
331  const uint8_t *buf_str = reinterpret_cast<const uint8_t *> (buffer.c_str ());
332  if (octave_u8_check_wrapper (buf_str, buffer.length ()))
334  ("%s: the input string is invalid UTF-8", m_who.c_str ());
335 
336  regexp::match_data retval;
337 
338  std::list<regexp::match_element> lst;
339 
340  int subpatterns;
341  int namecount;
342  int nameentrysize;
343  char *nametable;
344  std::size_t idx = 0;
345 
346  octave_pcre_code *re = static_cast<octave_pcre_code *> (m_code);
347 
348  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_CAPTURECOUNT, &subpatterns);
349  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMECOUNT, &namecount);
350  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
351  octave_pcre_pattern_info (re, OCTAVE_PCRE_INFO_NAMETABLE, &nametable);
352 
353 #if defined (HAVE_PCRE)
354  OCTAVE_LOCAL_BUFFER (OCTAVE_PCRE_SIZE, ovector, (subpatterns+1)*3);
355 #endif
356 
357  OCTAVE_LOCAL_BUFFER (int, nidx, namecount);
358 
359  for (int i = 0; i < namecount; i++)
360  {
361  // Index of subpattern in first two bytes of name (MSB first).
362  // Extract index.
363  nidx[i] = (static_cast<int> (nametable[i*nameentrysize])) << 8
364  | static_cast<int> (nametable[i*nameentrysize+1]);
365  }
366 
367  while (true)
368  {
369  octave_quit ();
370 
371 #if defined (HAVE_PCRE2)
372  pcre2_match_data *m_data
373  = pcre2_match_data_create_from_pattern (re, nullptr);
374 
375  unwind_action cleanup_match_data
376  ([=] () { pcre2_match_data_free (m_data); });
377 
378  int matches = pcre2_match (re, reinterpret_cast<PCRE2_SPTR> (buffer.c_str ()),
379  buffer.length (), idx,
380  PCRE2_NO_UTF_CHECK | (idx ? PCRE2_NOTBOL : 0),
381  m_data, nullptr);
382 
383  if (matches < 0 && matches != PCRE2_ERROR_NOMATCH)
384  (*current_liboctave_error_handler)
385  ("%s: internal error calling pcre2_match; "
386  "error code from pcre2_match is %i", m_who.c_str (), matches);
387 
388  if (matches == PCRE2_ERROR_NOMATCH)
389  break;
390 
391  OCTAVE_PCRE_SIZE *ovector = pcre2_get_ovector_pointer (m_data);
392 #else
393  int matches = pcre_exec (re, nullptr, buffer.c_str (),
394  buffer.length (), idx,
395  PCRE_NO_UTF8_CHECK | (idx ? PCRE_NOTBOL : 0),
396  ovector, (subpatterns+1)*3);
397 
398  if (matches == PCRE_ERROR_MATCHLIMIT)
399  {
400  // Try harder; start with default value for MATCH_LIMIT
401  // and increase it.
402  (*current_liboctave_warning_with_id_handler)
403  ("Octave:regexp-match-limit",
404  "your pattern caused PCRE to hit its MATCH_LIMIT; trying harder now, but this will be slow");
405 
406  pcre_extra pe;
407 
408  pcre_config (PCRE_CONFIG_MATCH_LIMIT,
409  static_cast<void *> (&pe.match_limit));
410 
411  pe.flags = PCRE_EXTRA_MATCH_LIMIT;
412 
413  int i = 0;
414  while (matches == PCRE_ERROR_MATCHLIMIT
415  && i++ < PCRE_MATCHLIMIT_MAX)
416  {
417  octave_quit ();
418 
419  pe.match_limit *= 10;
420  matches = pcre_exec (re, &pe, buffer.c_str (),
421  buffer.length (), idx,
422  PCRE_NO_UTF8_CHECK
423  | (idx ? PCRE_NOTBOL : 0),
424  ovector, (subpatterns+1)*3);
425  }
426  }
427 
428  if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
429  (*current_liboctave_error_handler)
430  ("%s: internal error calling pcre_exec; "
431  "error code from pcre_exec is %i", m_who.c_str (), matches);
432 
433  if (matches == PCRE_ERROR_NOMATCH)
434  break;
435 #endif
436  if (ovector[0] >= ovector[1] && ! m_options.emptymatch ())
437  {
438  // Zero length match. Skip to next char.
439  idx = ovector[0] + 1;
440  if (idx < buffer.length ())
441  continue;
442  else
443  break;
444  }
445  else
446  {
447  int pos_match = 0;
448  Matrix token_extents (matches-1, 2);
449 
450  for (int i = 1; i < matches; i++)
451  {
452 #if defined (HAVE_PCRE2)
453  if (ovector[2*i] != PCRE2_SIZE_MAX
454 #else
455  if (ovector[2*i] >= 0
456 #endif
457  && ovector[2*i+1] > 0
458  && (i == 1 || ovector[2*i] != ovector[2*i-2]
459  || ovector[2*i-1] != ovector[2*i+1]))
460  {
461  token_extents(pos_match, 0) = double (ovector[2*i]+1);
462  token_extents(pos_match++, 1) = double (ovector[2*i+1]);
463  }
464  }
465 
466  token_extents.resize (pos_match, 2);
467 
468  OCTAVE_PCRE_SIZE start = ovector[0] + 1;
469  OCTAVE_PCRE_SIZE end = ovector[1];
470 
471 #if defined (HAVE_PCRE2)
472  // Must use explicit length constructor as match can contain '\0'.
473  std::string match_string = std::string (buffer.c_str() + start - 1,
474  end - start + 1);
475 #else
476  const char **listptr;
477  int status = pcre_get_substring_list (buffer.c_str (), ovector,
478  matches, &listptr);
479 
480  if (status == PCRE_ERROR_NOMEMORY)
481  (*current_liboctave_error_handler)
482  ("%s: cannot allocate memory in pcre_get_substring_list",
483  m_who.c_str ());
484 
485  // Must use explicit length constructor as match can contain '\0'.
486  std::string match_string = std::string (*listptr, end - start + 1);
487 #endif
488 
489  string_vector tokens (pos_match);
490  string_vector named_tokens (m_names);
491  int pos_offset = 0;
492  pos_match = 0;
493 
494  for (int i = 1; i < matches; i++)
495  {
496 #if defined (HAVE_PCRE2)
497  if (ovector[2*i] != PCRE2_SIZE_MAX
498 #else
499  if (ovector[2*i] >= 0
500 #endif
501  && ovector[2*i+1] > 0)
502  {
503  if (i == 1 || ovector[2*i] != ovector[2*i-2]
504  || ovector[2*i-1] != ovector[2*i+1])
505  {
506  if (namecount > 0)
507  {
508  // FIXME: Should probably do this with a map()
509  // rather than a linear search. However,
510  // the number of captured, named expressions
511  // is usually pretty small (< 4)
512  for (int j = 0; j < namecount; j++)
513  {
514  if (nidx[j] == i)
515  {
516  std::size_t len = ovector[2*i+1] - ovector[2*i];
517  named_tokens(m_named_idx(j))
518 #if defined (HAVE_PCRE2)
519  = std::string (buffer.c_str () + ovector[2*i], len);
520 #else
521  = std::string (*(listptr+i-pos_offset), len);
522 #endif
523  break;
524  }
525  }
526  }
527 
528  std::size_t len = ovector[2*i+1] - ovector[2*i];
529 #if defined (HAVE_PCRE2)
530  tokens(pos_match++) = std::string (buffer.c_str() + ovector[2*i], len);
531 #else
532  tokens(pos_match++) = std::string (*(listptr+i), len);
533 #endif
534  }
535  else
536  pos_offset++;
537  }
538  }
539 
540 #if ! defined (HAVE_PCRE2)
541  pcre_free_substring_list (listptr);
542 #endif
543 
544  // FIXME: MATCH_ELEMENT uses double values for these,
545  // presumably because that is what the Octave interpreter
546  // uses. Should we check that the values don't exceed
547  // flintmax here? It seems unlikely that it would happen,
548  // but...
549 
550  double dstart = static_cast<double> (start);
551  double dend = static_cast<double> (end);
552 
553  regexp::match_element new_elem (named_tokens, tokens, match_string,
554  token_extents,
555  dstart, dend);
556 
557  lst.push_back (new_elem);
558 
559  if (ovector[1] <= ovector[0])
560  {
561  // Zero length match. Skip to next char.
562  idx = ovector[0] + 1;
563  if (idx <= buffer.length ())
564  continue;
565  }
566  else
567  idx = ovector[1];
568 
569  if (m_options.once () || idx >= buffer.length ())
570  break;
571  }
572  }
573 
574  retval = regexp::match_data (lst, m_named_pats);
575 
576  return retval;
577 }
578 
579 bool
580 regexp::is_match (const std::string& buffer) const
581 {
582  regexp::match_data rx_lst = match (buffer);
583 
584  return rx_lst.size () > 0;
585 }
586 
588 regexp::is_match (const string_vector& buffer) const
589 {
590  octave_idx_type len = buffer.numel ();
591 
592  Array<bool> retval (dim_vector (len, 1));
593 
594  for (octave_idx_type i = 0; i < buffer.numel (); i++)
595  retval(i) = is_match (buffer(i));
596 
597  return retval;
598 }
599 
600 // Declare rep_token_t used in processing replacement string
602 {
603  std::size_t pos;
604  int num;
605 };
606 
607 std::string
608 regexp::replace (const std::string& buffer,
609  const std::string& replacement) const
610 {
611  std::string retval;
612 
613  const regexp::match_data rx_lst = match (buffer);
614 
615  std::size_t num_matches = rx_lst.size ();
616 
617  if (num_matches == 0)
618  {
619  retval = buffer;
620  return retval;
621  }
622 
623  // Identify replacement tokens; build a vector of group numbers in
624  // the replacement string so that we can quickly calculate the size
625  // of the replacement.
626 
627  // FIXME: All code assumes that only 10 tokens ($0-$9) exist.
628  // $11 represents $1 followed by the character '1' rather than
629  // the eleventh capture buffer.
630 
631  std::string repstr = replacement;
632  std::vector<rep_token_t> tokens;
633  tokens.reserve (5); // Reserve memory for 5 pattern replacements
634 
635  for (std::size_t i=0; i < repstr.size (); i++)
636  {
637  if (repstr[i] == '\\')
638  {
639  if (i < repstr.size () - 1 && repstr[i+1] == '$')
640  {
641  repstr.erase (i, 1); // erase backslash
642  i++; // skip over '$'
643  continue;
644  }
645  if (i < repstr.size () - 1 && repstr[i+1] == '\\')
646  {
647  repstr.erase (i, 1); // erase 1st backslash
648  continue;
649  }
650  }
651  else if (repstr[i] == '$')
652  {
653  if (i < repstr.size () - 1 && isdigit (repstr[i+1]))
654  {
655  rep_token_t tmp_token;
656 
657  tmp_token.pos = i;
658  tmp_token.num = repstr[i+1]-'0';
659  tokens.push_back (tmp_token);
660  }
661  }
662  }
663 
664  std::string rep;
665  int num_tokens = tokens.size ();
666 
667  if (num_tokens > 0)
668  {
669  // Determine replacement length
670  const std::size_t replen = repstr.size () - 2*num_tokens;
671  int delta = 0;
672  auto p = rx_lst.begin ();
673  for (std::size_t i = 0; i < num_matches; i++)
674  {
675  octave_quit ();
676 
677  double start = p->start ();
678  double end = p->end ();
679 
680  const Matrix pairs (p->token_extents ());
681  std::size_t pairlen = 0;
682  for (int j = 0; j < num_tokens; j++)
683  {
684  if (tokens[j].num == 0)
685  pairlen += static_cast<std::size_t> (end - start + 1);
686  else if (tokens[j].num <= pairs.rows ())
687  pairlen += static_cast<std::size_t> (pairs(tokens[j].num-1, 1)
688  - pairs(tokens[j].num-1, 0)
689  + 1);
690  }
691  delta += (static_cast<int> (replen + pairlen)
692  - static_cast<int> (end - start + 1));
693  p++;
694  }
695 
696  // Build replacement string
697  rep.reserve (buffer.size () + delta);
698  std::size_t from = 0;
699  p = rx_lst.begin ();
700  for (std::size_t i = 0; i < num_matches; i++)
701  {
702  octave_quit ();
703 
704  double start = p->start ();
705  double end = p->end ();
706 
707  const Matrix pairs (p->token_extents ());
708  rep.append (&buffer[from], static_cast<std::size_t> (start - 1 - from));
709  from = static_cast<std::size_t> (end);
710 
711  std::size_t cur_pos = 0;
712 
713  for (int j = 0; j < num_tokens; j++)
714  {
715  rep.append (&repstr[cur_pos], (tokens[j].pos) - cur_pos);
716  cur_pos = tokens[j].pos+2;
717 
718  int k = tokens[j].num;
719  if (k == 0)
720  {
721  // replace with entire match
722  rep.append (&buffer[static_cast<std::size_t> (end - 1)],
723  static_cast<std::size_t> (end - start + 1));
724  }
725  else if (k <= pairs.rows ())
726  {
727  // replace with group capture
728  rep.append (&buffer[static_cast<std::size_t> (pairs(k-1, 0)-1)],
729  static_cast<std::size_t> (pairs(k-1, 1)
730  - pairs(k-1, 0) + 1));
731  }
732  else
733  {
734  // replace with nothing
735  }
736  }
737  if (cur_pos < repstr.size ())
738  rep.append (&repstr[cur_pos], repstr.size () - cur_pos);
739 
740  p++;
741  }
742  rep.append (&buffer[from], buffer.size () - from);
743  }
744  else
745  {
746  // Determine repstr length
747  const std::size_t replen = repstr.size ();
748  int delta = 0;
749  auto p = rx_lst.begin ();
750  for (std::size_t i = 0; i < num_matches; i++)
751  {
752  octave_quit ();
753 
754  delta += static_cast<int> (replen)
755  - static_cast<int> (p->end () - p->start () + 1);
756  p++;
757  }
758 
759  // Build replacement string
760  rep.reserve (buffer.size () + delta);
761  std::size_t from = 0;
762  p = rx_lst.begin ();
763  for (std::size_t i = 0; i < num_matches; i++)
764  {
765  octave_quit ();
766 
767  rep.append (&buffer[from],
768  static_cast<std::size_t> (p->start () - 1 - from));
769  from = static_cast<std::size_t> (p->end ());
770  rep.append (repstr);
771  p++;
772  }
773  rep.append (&buffer[from], buffer.size () - from);
774  }
775 
776  retval = rep;
777  return retval;
778 }
779 
OCTAVE_END_NAMESPACE(octave)
OCTARRAY_API void resize(const dim_vector &dv, const T &rfv)
Size of the specified dimension.
Definition: Array-base.cc:1032
OCTARRAY_OVERRIDABLE_FUNC_API octave_idx_type rows(void) const
Definition: Array.h:459
Definition: dMatrix.h:42
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Definition: dMatrix.h:158
std::size_t size(void) const
Definition: base-list.h:52
iterator begin(void)
Definition: base-list.h:65
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
void dotexceptnewline(bool val)
Definition: lo-regexp.h:140
void lineanchors(bool val)
Definition: lo-regexp.h:143
void case_insensitive(bool val)
Definition: lo-regexp.h:139
void freespacing(bool val)
Definition: lo-regexp.h:142
void emptymatch(bool val)
Definition: lo-regexp.h:141
void once(bool val)
Definition: lo-regexp.h:144
int m_names
Definition: lo-regexp.h:235
Array< int > m_named_idx
Definition: lo-regexp.h:236
std::string m_who
Definition: lo-regexp.h:237
std::string replace(const std::string &buffer, const std::string &replacement) const
Definition: lo-regexp.cc:608
opts m_options
Definition: lo-regexp.h:229
std::string m_pattern
Definition: lo-regexp.h:227
void free(void)
Definition: lo-regexp.cc:115
string_vector m_named_pats
Definition: lo-regexp.h:234
bool is_match(const std::string &buffer) const
Definition: lo-regexp.cc:580
match_data match(const std::string &buffer) const
Definition: lo-regexp.cc:328
void * m_code
Definition: lo-regexp.h:232
void compile_internal(void)
Definition: lo-regexp.cc:121
string_vector & append(const std::string &s)
Definition: str-vec.cc:110
octave_idx_type numel(void) const
Definition: str-vec.h:100
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition: lo-error.c:41
#define MAXLOOKBEHIND
Definition: lo-regexp.cc:107
static int octave_pcre_pattern_info(const octave_pcre_code *code, int what, void *where)
Definition: lo-regexp.cc:91
#define PCRE_MATCHLIMIT_MAX
Definition: lo-regexp.cc:104
static bool lookbehind_warned
Definition: lo-regexp.cc:109
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
std::size_t pos
Definition: lo-regexp.cc:603
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61