00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #if !defined (octave_regexp_match_h)
00025 #define octave_regexp_match_h 1
00026
00027 #include <list>
00028 #include <sstream>
00029 #include <string>
00030
00031 #include "Array.h"
00032 #include "Matrix.h"
00033 #include "base-list.h"
00034 #include "str-vec.h"
00035
00036 class regexp
00037 {
00038 public:
00039
00040 class opts;
00041 class match_data;
00042
00043 regexp (const std::string& pat = "",
00044 const regexp::opts& opt = regexp::opts (),
00045 const std::string& w = "regexp")
00046 : pattern (pat), options (opt), data (0), named_pats (),
00047 nnames (0), named_idx (), who (w)
00048 {
00049 compile_internal ();
00050 }
00051
00052 regexp (const regexp& rx)
00053 : pattern (rx.pattern), data (rx.data), named_pats (rx.named_pats),
00054 nnames (rx.nnames), named_idx (rx.named_idx)
00055 { }
00056
00057 regexp& operator = (const regexp& rx)
00058 {
00059 if (this != &rx)
00060 {
00061 pattern = rx.pattern;
00062 data = rx.data;
00063 named_pats = rx.named_pats;
00064 nnames = rx.nnames;
00065 named_idx = rx.named_idx;
00066 }
00067
00068 return *this;
00069 }
00070
00071 ~regexp (void) { free (); }
00072
00073 void compile (const std::string& pat,
00074 const regexp::opts& opt = regexp::opts ())
00075 {
00076 pattern = pat;
00077 options = opt;
00078 compile_internal ();
00079 }
00080
00081 match_data match (const std::string& buffer);
00082
00083 bool is_match (const std::string& buffer);
00084
00085 Array<bool> is_match (const string_vector& buffer);
00086
00087 std::string replace (const std::string& buffer,
00088 const std::string& replacement);
00089
00090 struct opts
00091 {
00092 public:
00093
00094 opts (void)
00095 : x_case_insensitive (false), x_dotexceptnewline (false),
00096 x_freespacing (false), x_lineanchors (false), x_once (false) { }
00097
00098 opts (const opts& o)
00099 : x_case_insensitive (o.x_case_insensitive),
00100 x_dotexceptnewline (o.x_dotexceptnewline),
00101 x_freespacing (o.x_freespacing),
00102 x_lineanchors (o.x_lineanchors),
00103 x_once (o.x_once)
00104 { }
00105
00106 opts& operator = (const opts& o)
00107 {
00108 if (this != &o)
00109 {
00110 x_case_insensitive = o.x_case_insensitive;
00111 x_dotexceptnewline = o.x_dotexceptnewline;
00112 x_freespacing = o.x_freespacing;
00113 x_lineanchors = o.x_lineanchors;
00114 x_once = o.x_once;
00115 }
00116
00117 return *this;
00118 }
00119
00120 ~opts (void) { }
00121
00122 void case_insensitive (bool val) { x_case_insensitive = val; }
00123 void dotexceptnewline (bool val) { x_dotexceptnewline = val; }
00124 void freespacing (bool val) { x_freespacing = val; }
00125 void lineanchors (bool val) { x_lineanchors = val; }
00126 void once (bool val) { x_once = val; }
00127
00128 bool case_insensitive (void) const { return x_case_insensitive; }
00129 bool dotexceptnewline (void) const { return x_dotexceptnewline; }
00130 bool freespacing (void) const { return x_freespacing; }
00131 bool lineanchors (void) const { return x_lineanchors; }
00132 bool once (void) const { return x_once; }
00133
00134 private:
00135
00136 bool x_case_insensitive;
00137 bool x_dotexceptnewline;
00138 bool x_freespacing;
00139 bool x_lineanchors;
00140 bool x_once;
00141 };
00142
00143 class match_element
00144 {
00145 public:
00146
00147 match_element (const string_vector& nt, const string_vector& t,
00148 const std::string& ms, const Matrix& te,
00149 double s, double e)
00150 : x_match_string (ms), x_named_tokens (nt), x_tokens (t),
00151 x_token_extents (te), x_start (s), x_end (e)
00152 { }
00153
00154 match_element (const match_element &a)
00155 : x_match_string (a.x_match_string),
00156 x_named_tokens (a.x_named_tokens), x_tokens (a.x_tokens),
00157 x_token_extents (a.x_token_extents),
00158 x_start (a.x_start), x_end (a.x_end)
00159 { }
00160
00161 std::string match_string (void) const { return x_match_string; }
00162 string_vector named_tokens (void) const { return x_named_tokens; }
00163 string_vector tokens (void) const { return x_tokens; }
00164 Matrix token_extents (void) const { return x_token_extents; }
00165 double start (void) const { return x_start; }
00166 double end (void) const { return x_end; }
00167
00168 private:
00169
00170 std::string x_match_string;
00171 string_vector x_named_tokens;
00172 string_vector x_tokens;
00173 Matrix x_token_extents;
00174 double x_start;
00175 double x_end;
00176 };
00177
00178 class match_data : public octave_base_list<match_element>
00179 {
00180 public:
00181
00182 match_data (void)
00183 : octave_base_list<match_element> (), named_pats ()
00184 { }
00185
00186 match_data (const std::list<match_element>& l, const string_vector& np)
00187 : octave_base_list<match_element> (l), named_pats (np)
00188 { }
00189
00190 match_data (const match_data& rx_lst)
00191 : octave_base_list<match_element> (rx_lst),
00192 named_pats (rx_lst.named_pats)
00193 { }
00194
00195 match_data& operator = (const match_data& rx_lst)
00196 {
00197 if (this != &rx_lst)
00198 {
00199 octave_base_list<match_element>::operator = (rx_lst);
00200 named_pats = rx_lst.named_pats;
00201 }
00202
00203 return *this;
00204 }
00205
00206 ~match_data (void) { }
00207
00208 string_vector named_patterns (void) { return named_pats; }
00209
00210 private:
00211
00212 string_vector named_pats;
00213 };
00214
00215 private:
00216
00217
00218 std::string pattern;
00219
00220 opts options;
00221
00222
00223 void *data;
00224
00225 std::string m;
00226 string_vector named_pats;
00227 int nnames;
00228 Array<int> named_idx;
00229 std::string who;
00230
00231 void free (void);
00232
00233 void compile_internal (void);
00234 };
00235
00236 inline regexp::match_data
00237 regexp_match (const std::string& pat,
00238 const std::string& buffer,
00239 const regexp::opts& opt = regexp::opts (),
00240 const std::string& who = "regexp")
00241 {
00242 regexp rx (pat, opt, who);
00243
00244 return rx.match (buffer);
00245 }
00246
00247 inline bool
00248 is_regexp_match (const std::string& pat,
00249 const std::string& buffer,
00250 const regexp::opts& opt = regexp::opts (),
00251 const std::string& who = "regexp")
00252 {
00253 regexp rx (pat, opt, who);
00254
00255 return rx.is_match (buffer);
00256 }
00257
00258 inline Array<bool>
00259 is_regexp_match (const std::string& pat,
00260 const string_vector& buffer,
00261 const regexp::opts& opt = regexp::opts (),
00262 const std::string& who = "regexp")
00263 {
00264 regexp rx (pat, opt, who);
00265
00266 return rx.is_match (buffer);
00267 }
00268
00269 inline std::string
00270 regexp_replace (const std::string& pat,
00271 const std::string& buffer,
00272 const std::string& replacement,
00273 const regexp::opts& opt = regexp::opts (),
00274 const std::string& who = "regexp")
00275 {
00276 regexp rx (pat, opt, who);
00277
00278 return rx.replace (buffer, replacement);
00279 }
00280
00281 #endif