GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
lo-regexp.h
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2005-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if ! defined (octave_lo_regexp_h)
27 #define octave_lo_regexp_h 1
28 
29 #include "octave-config.h"
30 
31 #include <list>
32 #include <sstream>
33 #include <string>
34 
35 #include "Array.h"
36 #include "Matrix.h"
37 #include "base-list.h"
38 #include "str-vec.h"
39 
41 
42 class
44 regexp
45 {
46 public:
47 
48  class opts;
49  class match_data;
50 
51  regexp (const std::string& pat = "",
52  const regexp::opts& opt = regexp::opts (),
53  const std::string& w = "regexp")
54  : m_pattern (pat), m_options (opt), m_code (nullptr), m_named_pats (),
55  m_names (0), m_named_idx (), m_who (w)
56  {
57  compile_internal ();
58  }
59 
60  regexp (const regexp&) = default;
61 
62  regexp& operator = (const regexp& rx) = default;
63 
64  ~regexp () { free (); }
65 
66  void compile (const std::string& pat,
67  const regexp::opts& opt = regexp::opts ())
68  {
69  m_pattern = pat;
70  m_options = opt;
71  compile_internal ();
72  }
73 
74  match_data match (const std::string& buffer) const;
75 
76  bool is_match (const std::string& buffer) const;
77 
78  Array<bool> is_match (const string_vector& buffer) const;
79 
80  std::string replace (const std::string& buffer,
81  const std::string& replacement) const;
82 
83  static match_data
84  match (const std::string& pat, const std::string& buffer,
85  const regexp::opts& opt = regexp::opts (),
86  const std::string& who = "regexp")
87  {
88  regexp rx (pat, opt, who);
89 
90  return rx.match (buffer);
91  }
92 
93  static bool
94  is_match (const std::string& pat, const std::string& buffer,
95  const regexp::opts& opt = regexp::opts (),
96  const std::string& who = "regexp")
97  {
98  regexp rx (pat, opt, who);
99 
100  return rx.is_match (buffer);
101  }
102 
103  static Array<bool>
104  is_match (const std::string& pat, const string_vector& buffer,
105  const regexp::opts& opt = regexp::opts (),
106  const std::string& who = "regexp")
107  {
108  regexp rx (pat, opt, who);
109 
110  return rx.is_match (buffer);
111  }
112 
113  static std::string
114  replace (const std::string& pat, const std::string& buffer,
115  const std::string& replacement,
116  const regexp::opts& opt = regexp::opts (),
117  const std::string& who = "regexp")
118  {
119  regexp rx (pat, opt, who);
120 
121  return rx.replace (buffer, replacement);
122  }
123 
124  class opts
125  {
126  public:
127 
128  opts ()
129  : m_case_insensitive (false), m_dotexceptnewline (false),
130  m_emptymatch (false), m_freespacing (false), m_lineanchors (false),
131  m_once (false) { }
132 
133  opts (const opts&) = default;
134 
135  opts& operator = (const opts&) = default;
136 
137  ~opts () = default;
138 
139  void case_insensitive (bool val) { m_case_insensitive = val; }
140  void dotexceptnewline (bool val) { m_dotexceptnewline = val; }
141  void emptymatch (bool val) { m_emptymatch = val; }
142  void freespacing (bool val) { m_freespacing = val; }
143  void lineanchors (bool val) { m_lineanchors = val; }
144  void once (bool val) { m_once = val; }
145 
146  bool case_insensitive () const { return m_case_insensitive; }
147  bool dotexceptnewline () const { return m_dotexceptnewline; }
148  bool emptymatch () const { return m_emptymatch; }
149  bool freespacing () const { return m_freespacing; }
150  bool lineanchors () const { return m_lineanchors; }
151  bool once () const { return m_once; }
152 
153  private:
154 
155  bool m_case_insensitive;
156  bool m_dotexceptnewline;
157  bool m_emptymatch;
158  bool m_freespacing;
159  bool m_lineanchors;
160  bool m_once;
161  };
162 
164  {
165  public:
166 
167  match_element () = delete;
168 
170  const std::string& ms, const Matrix& te,
171  double s, double e)
172  : m_match_string (ms), m_named_tokens (nt), m_tokens (t),
173  m_token_extents (te), m_start (s), m_end (e)
174  { }
175 
176  OCTAVE_DEFAULT_COPY_MOVE_DELETE (match_element)
177 
178  std::string match_string () const { return m_match_string; }
179  string_vector named_tokens () const { return m_named_tokens; }
180  string_vector tokens () const { return m_tokens; }
181  Matrix token_extents () const { return m_token_extents; }
182  double start () const { return m_start; }
183  double end () const { return m_end; }
184 
185  private:
186 
187  std::string m_match_string;
188  string_vector m_named_tokens;
189  string_vector m_tokens;
190  Matrix m_token_extents;
191 
192  // FIXME: Are these values declared as double because that's what
193  // Octave interpreter functions will store? Should they be int or
194  // size_t instead?
195  double m_start;
196  double m_end;
197  };
198 
199  class match_data : public base_list<match_element>
200  {
201  public:
202 
204  : base_list<match_element> (), m_named_pats ()
205  { }
206 
207  match_data (const std::list<match_element>& l, const string_vector& np)
208  : base_list<match_element> (l), m_named_pats (np)
209  { }
210 
211  OCTAVE_DEFAULT_COPY_MOVE_DELETE (match_data)
212 
213  string_vector named_patterns () const { return m_named_pats; }
214 
215  private:
216 
217  string_vector m_named_pats;
218  };
219 
220 private:
221 
222  // The pattern we've been asked to match.
223  std::string m_pattern;
224 
225  opts m_options;
226 
227  // Internal data describing the regular expression.
228  void *m_code;
229 
230  string_vector m_named_pats;
231  int m_names;
232  Array<int> m_named_idx;
233  std::string m_who;
234 
235  void free ();
236 
237  void compile_internal ();
238 };
239 
240 OCTAVE_END_NAMESPACE(octave)
241 
242 #endif
Definition: dMatrix.h:42
match_data(const std::list< match_element > &l, const string_vector &np)
Definition: lo-regexp.h:207
Matrix token_extents() const
Definition: lo-regexp.h:181
double end() const
Definition: lo-regexp.h:183
match_element(const string_vector &nt, const string_vector &t, const std::string &ms, const Matrix &te, double s, double e)
Definition: lo-regexp.h:169
double start() const
Definition: lo-regexp.h:182
string_vector tokens() const
Definition: lo-regexp.h:180
string_vector named_tokens() const
Definition: lo-regexp.h:179
void dotexceptnewline(bool val)
Definition: lo-regexp.h:140
void lineanchors(bool val)
Definition: lo-regexp.h:143
bool case_insensitive() const
Definition: lo-regexp.h:146
bool freespacing() const
Definition: lo-regexp.h:149
void case_insensitive(bool val)
Definition: lo-regexp.h:139
~opts()=default
void freespacing(bool val)
Definition: lo-regexp.h:142
opts(const opts &)=default
bool dotexceptnewline() const
Definition: lo-regexp.h:147
bool emptymatch() const
Definition: lo-regexp.h:148
void emptymatch(bool val)
Definition: lo-regexp.h:141
bool lineanchors() const
Definition: lo-regexp.h:150
void once(bool val)
Definition: lo-regexp.h:144
bool once() const
Definition: lo-regexp.h:151
void compile(const std::string &pat, const regexp::opts &opt=regexp::opts())
Definition: lo-regexp.h:66
static std::string replace(const std::string &pat, const std::string &buffer, const std::string &replacement, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:114
std::string replace(const std::string &buffer, const std::string &replacement) const
Definition: lo-regexp.cc:612
regexp(const std::string &pat="", const regexp::opts &opt=regexp::opts(), const std::string &w="regexp")
Definition: lo-regexp.h:51
static bool is_match(const std::string &pat, const std::string &buffer, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:94
regexp(const regexp &)=default
bool is_match(const std::string &buffer) const
Definition: lo-regexp.cc:584
match_data match(const std::string &buffer) const
Definition: lo-regexp.cc:328
~regexp()
Definition: lo-regexp.h:64
static Array< bool > is_match(const std::string &pat, const string_vector &buffer, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:104
static match_data match(const std::string &pat, const std::string &buffer, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:84
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
#define OCTAVE_API
Definition: main.cc:55
std::complex< double > w(std::complex< double > z, double relerr=0)
void free(void *)