GNU Octave 7.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-string.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2016-2022 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-string.h"
31
32#include <algorithm>
33#include <cctype>
34#include <cstring>
35#include <iomanip>
36#include <string>
37
38#include "Array.h"
39#include "lo-ieee.h"
40#include "lo-mappers.h"
41#include "uniconv-wrappers.h"
42#include "unistr-wrappers.h"
43#include "unwind-prot.h"
44
45template <typename T>
46static bool
47str_data_cmp (const typename T::value_type *a, const typename T::value_type *b,
48 const typename T::size_type n)
49{
50 for (typename T::size_type i = 0; i < n; ++i)
51 if (a[i] != b[i])
52 return false;
53 return true;
54}
55
56template <typename T>
57static bool
58str_data_cmpi (const typename T::value_type *a, const typename T::value_type *b,
59 const typename T::size_type n)
60{
61 for (typename T::size_type i = 0; i < n; ++i)
62 if (std::tolower (a[i]) != std::tolower (b[i]))
63 return false;
64 return true;
65}
66
67
68// Templates to handle std::basic_string, std::vector, Array, and char*.
69template <typename T>
70typename T::size_type
71numel (const T& str)
72{
73 return str.size ();
74}
75
76template <>
78numel (const Array<char>& str)
79{
80 return str.numel ();
81}
82
83template <typename T>
84typename T::size_type
85strlen (const typename T::value_type *str)
86{
87 return std::strlen (str);
88}
89
90template <typename T>
91bool
92sizes_cmp (const T& str_a, const T& str_b)
93{
94 return str_a.size () == str_b.size ();
95}
96
97template <>
98bool
99sizes_cmp (const Array<char>& str_a, const Array<char>& str_b)
100{
101 return str_a.dims () == str_b.dims ();
102}
103
104template <typename T>
105bool
106sizes_cmp (const T& str_a, const typename T::value_type *str_b)
107{
108 return str_a.size () == strlen<T> (str_b);
109}
110
111template <>
112bool
113sizes_cmp (const Array<char>& str_a, const char *str_b)
114{
115 return (str_a.isvector () && str_a.rows () == 1
116 && str_a.numel () == strlen<Array<char>> (str_b));
117}
118
119
120template<typename T>
121bool
122octave::string::strcmp (const T& str_a, const T& str_b)
123{
124 return (sizes_cmp (str_a, str_b)
125 && str_data_cmp<T> (str_a.data (), str_b.data (), numel (str_a)));
126}
127
128template<typename T>
129bool
130octave::string::strcmp (const T& str_a, const typename T::value_type *str_b)
131{
132 return (sizes_cmp (str_a, str_b)
133 && str_data_cmp<T> (str_a.data (), str_b, numel (str_a)));
134}
135
136
137template<typename T>
138bool
139octave::string::strcmpi (const T& str_a, const T& str_b)
140{
141 return (sizes_cmp (str_a, str_b)
142 && str_data_cmpi<T> (str_a.data (), str_b.data (), numel (str_a)));
143}
144
145template<typename T>
146bool
147octave::string::strcmpi (const T& str_a, const typename T::value_type *str_b)
148{
149 return (sizes_cmp (str_a, str_b)
150 && str_data_cmpi<T> (str_a.data (), str_b, numel (str_a)));
151}
152
153
154template<typename T>
155bool
156octave::string::strncmp (const T& str_a, const T& str_b,
157 const typename T::size_type n)
158{
159 typename T::size_type neff;
160 auto len_a = numel (str_a);
161 auto len_b = numel (str_b);
162 neff = std::min (std::max (len_a, len_b), n);
163
164 return (len_a >= neff && len_b >= neff
165 && str_data_cmp<T> (str_a.data (), str_b.data (), neff));
166}
167
168template<typename T>
169bool
170octave::string::strncmp (const T& str_a, const typename T::value_type *str_b,
171 const typename T::size_type n)
172{
173 typename T::size_type neff;
174 auto len_a = numel (str_a);
175 auto len_b = strlen<T> (str_b);
176 neff = std::min (std::max (len_a, len_b), n);
177
178 return (len_a >= neff && len_b >= neff
179 && str_data_cmp<T> (str_a.data (), str_b, neff));
180}
181
182
183template<typename T>
184bool
185octave::string::strncmpi (const T& str_a, const T& str_b,
186 const typename T::size_type n)
187{
188 typename T::size_type neff;
189 auto len_a = numel (str_a);
190 auto len_b = numel (str_b);
191 neff = std::min (std::max (len_a, len_b), n);
192
193 return (len_a >= neff && len_b >= neff
194 && str_data_cmpi<T> (str_a.data (), str_b.data (), neff));
195}
196
197template<typename T>
198bool
199octave::string::strncmpi (const T& str_a, const typename T::value_type *str_b,
200 const typename T::size_type n)
201{
202 typename T::size_type neff;
203 auto len_a = numel (str_a);
204 auto len_b = strlen<T> (str_b);
205 neff = std::min (std::max (len_a, len_b), n);
206
207 return (len_a >= neff && len_b >= neff
208 && str_data_cmpi<T> (str_a.data (), str_b, neff));
209}
210
211
212// Instantiations we need
213#define INSTANTIATE_OCTAVE_STRING(T, API) \
214 template API bool octave::string::strcmp<T> (const T&, const T&); \
215 template API bool \
216 octave::string::strcmp<T> (const T&, const typename T::value_type*); \
217 template API bool octave::string::strcmpi<T> (const T&, const T&); \
218 template API bool \
219 octave::string::strcmpi<T> (const T&, const typename T::value_type*); \
220 template API bool \
221 octave::string::strncmp<T> (const T&, const T&, \
222 const typename T::size_type); \
223 template API bool \
224 octave::string::strncmp<T> (const T&, const typename T::value_type*, \
225 const typename T::size_type); \
226 template API bool \
227 octave::string::strncmpi<T> (const T&, const T&, \
228 const typename T::size_type n); \
229 template API bool \
230 octave::string::strncmpi<T> (const T&, const typename T::value_type*, \
231 const typename T::size_type);
232
233// We could also instantiate std::vector<char> but would it be
234// useful for anyone?
237
238#undef INSTANTIATE_OCTAVE_STRING
239
240static inline bool
242{ return c == 'i' || c == 'j'; }
243
244static double
245single_num (std::istringstream& is)
246{
247 double num = 0.0;
248
249 char c = is.peek ();
250
251 // Skip spaces.
252 while (isspace (c))
253 {
254 is.get ();
255 c = is.peek ();
256 }
257
258 if (std::toupper (c) == 'I')
259 {
260 // It's infinity.
261 is.get ();
262 char c1 = is.get ();
263 char c2 = is.get ();
264 if (std::tolower (c1) == 'n' && std::tolower (c2) == 'f')
265 {
267 is.peek (); // May set EOF bit.
268 }
269 else
270 is.setstate (std::ios::failbit); // indicate that read has failed.
271 }
272 else if (c == 'N')
273 {
274 // It's NA or NaN
275 is.get ();
276 char c1 = is.get ();
277 if (c1 == 'A')
278 {
279 num = octave_NA;
280 is.peek (); // May set EOF bit.
281 }
282 else
283 {
284 char c2 = is.get ();
285 if (c1 == 'a' && c2 == 'N')
286 {
288 is.peek (); // May set EOF bit.
289 }
290 else
291 is.setstate (std::ios::failbit); // indicate that read has failed.
292 }
293 }
294 else
295 is >> num;
296
297 return num;
298}
299
300static std::istringstream&
301extract_num (std::istringstream& is, double& num, bool& imag, bool& have_sign)
302{
303 have_sign = imag = false;
304
305 char c = is.peek ();
306
307 // Skip leading spaces.
308 while (isspace (c))
309 {
310 is.get ();
311 c = is.peek ();
312 }
313
314 bool negative = false;
315
316 // Accept leading sign.
317 if (c == '+' || c == '-')
318 {
319 have_sign = true;
320 negative = c == '-';
321 is.get ();
322 c = is.peek ();
323 }
324
325 // Skip spaces after sign.
326 while (isspace (c))
327 {
328 is.get ();
329 c = is.peek ();
330 }
331
332 // Imaginary number (i*num or just i), or maybe 'inf'.
333 if (c == 'i')
334 {
335 // possible infinity.
336 is.get ();
337 c = is.peek ();
338
339 if (is.eof ())
340 {
341 // just 'i' and string is finished. Return immediately.
342 imag = true;
343 num = (negative ? -1.0 : 1.0);
344 return is;
345 }
346 else
347 {
348 if (std::tolower (c) != 'n')
349 imag = true;
350 is.unget ();
351 }
352 }
353 else if (c == 'j')
354 imag = true;
355
356 // It's i*num or just i
357 if (imag)
358 {
359 is.get ();
360 c = is.peek ();
361 // Skip spaces after imaginary unit.
362 while (isspace (c))
363 {
364 is.get ();
365 c = is.peek ();
366 }
367
368 if (c == '*')
369 {
370 // Multiplier follows, we extract it as a number.
371 is.get ();
372 num = single_num (is);
373 if (is.good ())
374 c = is.peek ();
375 }
376 else
377 num = 1.0;
378 }
379 else
380 {
381 // It's num, num*i, or numi.
382 num = single_num (is);
383 if (is.good ())
384 {
385 c = is.peek ();
386
387 // Skip spaces after number.
388 while (isspace (c))
389 {
390 is.get ();
391 c = is.peek ();
392 }
393
394 if (c == '*')
395 {
396 is.get ();
397 c = is.peek ();
398
399 // Skip spaces after operator.
400 while (isspace (c))
401 {
402 is.get ();
403 c = is.peek ();
404 }
405
406 if (is_imag_unit (c))
407 {
408 imag = true;
409 is.get ();
410 c = is.peek ();
411 }
412 else
413 is.setstate (std::ios::failbit); // indicate read has failed.
414 }
415 else if (is_imag_unit (c))
416 {
417 imag = true;
418 is.get ();
419 c = is.peek ();
420 }
421 }
422 }
423
424 if (is.good ())
425 {
426 // Skip trailing spaces.
427 while (isspace (c))
428 {
429 is.get ();
430 c = is.peek ();
431 }
432 }
433
434 if (negative)
435 num = -num;
436
437 return is;
438}
439
440static inline void
441set_component (Complex& c, double num, bool imag)
442{
443#if defined (HAVE_CXX_COMPLEX_SETTERS)
444 if (imag)
445 c.imag (num);
446 else
447 c.real (num);
448#elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
449 if (imag)
450 c.imag () = num;
451 else
452 c.real () = num;
453#else
454 if (imag)
455 c = Complex (c.real (), num);
456 else
457 c = Complex (num, c.imag ());
458#endif
459}
460
462octave::string::str2double (const std::string& str_arg)
463{
464 Complex val (0.0, 0.0);
465
466 std::string str = str_arg;
467
468 // FIXME: removing all commas doesn't allow actual parsing.
469 // Example: "1,23.45" is wrong, but passes Octave.
470 str.erase (std::remove (str.begin (), str.end(), ','), str.end ());
471 std::istringstream is (str);
472
473 double num;
474 bool i1, i2, s1, s2;
475
476 if (is.eof ())
478 else if (! extract_num (is, num, i1, s1))
480 else
481 {
482 set_component (val, num, i1);
483
484 if (! is.eof ())
485 {
486 if (! extract_num (is, num, i2, s2) || i1 == i2 || ! s2)
488 else
489 set_component (val, num, i2);
490 }
491 }
492
493 return val;
494}
495
496std::string
497octave::string::u8_to_encoding (const std::string& who,
498 const std::string& u8_string,
499 const std::string& encoding)
500{
501 const uint8_t *src = reinterpret_cast<const uint8_t *>
502 (u8_string.c_str ());
503 std::size_t srclen = u8_string.length ();
504
505 std::size_t length;
506 char *native_str = octave_u8_conv_to_encoding (encoding.c_str (), src,
507 srclen, &length);
508
509 if (! native_str)
510 {
511 if (errno == ENOSYS)
512 (*current_liboctave_error_handler)
513 ("%s: iconv() is not supported. Installing GNU libiconv and then "
514 "re-compiling Octave could fix this.", who.c_str ());
515 else
517 ("%s: converting from UTF-8 to codepage '%s' failed: %s",
518 who.c_str (), encoding.c_str (), std::strerror (errno));
519 }
520
521 octave::unwind_action free_native_str ([=] () { ::free (native_str); });
522
523 std::string retval = std::string (native_str, length);
524
525 return retval;
526}
527
528std::string
529octave::string::u8_from_encoding (const std::string& who,
530 const std::string& native_string,
531 const std::string& encoding)
532{
533 const char *src = native_string.c_str ();
534 std::size_t srclen = native_string.length ();
535
536 std::size_t length;
537 uint8_t *utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src,
538 srclen, &length);
539 if (! utf8_str)
540 {
541 if (errno == ENOSYS)
542 (*current_liboctave_error_handler)
543 ("%s: iconv() is not supported. Installing GNU libiconv and then "
544 "re-compiling Octave could fix this.", who.c_str ());
545 else
547 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
548 who.c_str (), encoding.c_str (), std::strerror (errno));
549 }
550
551 octave::unwind_action free_utf8_str ([=] () { ::free (utf8_str); });
552
553 std::string retval = std::string (reinterpret_cast<char *> (utf8_str), length);
554
555 return retval;
556}
557
558unsigned int
559octave::string::u8_validate (const std::string& who,
560 std::string& in_str,
562{
563 std::string out_str;
564
565 unsigned int num_replacements = 0;
566 const char *in_chr = in_str.c_str ();
567 const char *inv_utf8 = in_chr;
568 const char *const in_end = in_chr + in_str.length ();
569 while (inv_utf8 && in_chr < in_end)
570 {
571 inv_utf8 = reinterpret_cast<const char *>
572 (octave_u8_check_wrapper (reinterpret_cast<const uint8_t *> (in_chr),
573 in_end - in_chr));
574
575 if (inv_utf8 == nullptr)
576 out_str.append (in_chr, in_end - in_chr);
577 else
578 {
579 num_replacements++;
580 out_str.append (in_chr, inv_utf8 - in_chr);
581 in_chr = inv_utf8 + 1;
582
583 if (type == U8_REPLACEMENT_CHAR)
584 out_str.append ("\xef\xbf\xbd");
585 else if (type == U8_ISO_8859_1)
586 {
587 std::string fallback = "iso-8859-1";
588 std::size_t lengthp;
589 uint8_t *val_utf8 = octave_u8_conv_from_encoding
590 (fallback.c_str (), inv_utf8, 1, &lengthp);
591
592 if (! val_utf8)
593 (*current_liboctave_error_handler)
594 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
595 who.c_str (), fallback.c_str (), std::strerror (errno));
596
597 octave::unwind_action free_val_utf8
598 ([=] () { ::free (val_utf8); });
599
600 out_str.append (reinterpret_cast<const char *> (val_utf8),
601 lengthp);
602 }
603 }
604 }
605
606 in_str = out_str;
607 return num_replacements;
608}
609
610template <typename T>
611std::string
613{
614 std::string s;
615
616 if (len <= 0)
617 len = 10;
618
619 static const T out_of_range_top
620 = static_cast<T>(std::numeric_limits<int>::max ()) + 1.;
621 static const T out_of_range_bottom
622 = static_cast<T>(std::numeric_limits<int>::min ()) - 1.;
623 if (octave::math::isinf (val))
624 {
625 if (val > 0)
626 s = "1/0";
627 else
628 s = "-1/0";
629 }
630 else if (octave::math::isnan (val))
631 s = "0/0";
632 else if (val <= out_of_range_bottom || val >= out_of_range_top
633 || octave::math::x_nint (val) == val)
634 {
635 std::ostringstream buf;
636 buf.flags (std::ios::fixed);
637 buf << std::setprecision (0) << octave::math::round (val);
638 s = buf.str ();
639 }
640 else
641 {
642 T lastn = 1;
643 T lastd = 0;
644 T n = octave::math::round (val);
645 T d = 1;
646 T frac = val - n;
647 int m = 0;
648
649 std::ostringstream init_buf;
650 init_buf.flags (std::ios::fixed);
651 init_buf << std::setprecision (0) << static_cast<int> (n);
652 s = init_buf.str ();
653
654 while (true)
655 {
656 T flip = 1 / frac;
657 T step = octave::math::round (flip);
658 T nextn = n;
659 T nextd = d;
660
661 // Have we converged to 1/intmax ?
662 if (std::abs (flip) > out_of_range_top)
663 {
664 lastn = n;
665 lastd = d;
666 break;
667 }
668
669 frac = flip - step;
670 n = step * n + lastn;
671 d = step * d + lastd;
672 lastn = nextn;
673 lastd = nextd;
674
675 std::ostringstream buf;
676 buf.flags (std::ios::fixed);
677 buf << std::setprecision (0) << static_cast<int> (n)
678 << '/' << static_cast<int> (d);
679 m++;
680
681 if (n < 0 && d < 0)
682 {
683 // Double negative, string can be two characters longer.
684 if (buf.str ().length () > static_cast<unsigned int> (len + 2))
685 break;
686 }
687 else
688 {
689 if (buf.str ().length () > static_cast<unsigned int> (len))
690 break;
691 }
692
693 if (std::abs (n) >= out_of_range_top
694 || std::abs (d) >= out_of_range_top)
695 break;
696
697 s = buf.str ();
698 }
699
700 if (lastd < 0)
701 {
702 // Move negative sign from denominator to numerator
703 lastd = - lastd;
704 lastn = - lastn;
705 std::ostringstream buf;
706 buf.flags (std::ios::fixed);
707 buf << std::setprecision (0) << static_cast<int> (lastn)
708 << '/' << static_cast<int> (lastd);
709 s = buf.str ();
710 }
711 }
712
713 return s;
714}
715
716// instantiate the template for float and double
717template OCTAVE_API std::string rational_approx <float> (float val, int len);
718template OCTAVE_API std::string rational_approx <double> (double val, int len);
#define Inf
Definition: Faddeeva.cc:260
#define NaN
Definition: Faddeeva.cc:261
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:411
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:487
octave_idx_type rows(void) const
Definition: Array.h:449
bool isvector(void) const
Size of the specified dimension.
Definition: Array.h:609
ColumnVector imag(const ComplexColumnVector &a)
Definition: dColVector.cc:143
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition: lo-error.c:41
#define octave_NA
Definition: lo-ieee.h:41
F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE * d
#define OCTAVE_API
Definition: main.in.cc:55
T x_nint(T x)
Definition: lo-mappers.h:269
bool isnan(bool)
Definition: lo-mappers.h:178
bool isinf(double x)
Definition: lo-mappers.h:203
double round(double x)
Definition: lo-mappers.h:136
OCTAVE_API std::string u8_to_encoding(const std::string &who, const std::string &u8_string, const std::string &encoding)
Definition: oct-string.cc:497
OCTAVE_API std::string u8_from_encoding(const std::string &who, const std::string &native_string, const std::string &encoding)
Definition: oct-string.cc:529
OCTAVE_API Complex str2double(const std::string &str_arg)
Definition: oct-string.cc:462
OCTAVE_API bool strncmp(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same.
OCTAVE_API unsigned int u8_validate(const std::string &who, std::string &in_string, const u8_fallback_type type=U8_REPLACEMENT_CHAR)
Definition: oct-string.cc:559
OCTAVE_API bool strncmpi(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same, ignoring case.
OCTAVE_API bool strcmp(const T &str_a, const T &str_b)
True if strings are the same.
OCTAVE_API bool strcmpi(const T &str_a, const T &str_b)
True if strings are the same, ignoring case.
std::complex< double > Complex
Definition: oct-cmplx.h:33
static void set_component(Complex &c, double num, bool imag)
Definition: oct-string.cc:441
static double single_num(std::istringstream &is)
Definition: oct-string.cc:245
static bool str_data_cmp(const typename T::value_type *a, const typename T::value_type *b, const typename T::size_type n)
Definition: oct-string.cc:47
static bool is_imag_unit(int c)
Definition: oct-string.cc:241
#define INSTANTIATE_OCTAVE_STRING(T, API)
Definition: oct-string.cc:213
static bool str_data_cmpi(const typename T::value_type *a, const typename T::value_type *b, const typename T::size_type n)
Definition: oct-string.cc:58
template OCTAVE_API std::string rational_approx< float >(float val, int len)
T::size_type numel(const T &str)
Definition: oct-string.cc:71
static std::istringstream & extract_num(std::istringstream &is, double &num, bool &imag, bool &have_sign)
Definition: oct-string.cc:301
bool sizes_cmp(const T &str_a, const T &str_b)
Definition: oct-string.cc:92
T::size_type strlen(const typename T::value_type *str)
Definition: oct-string.cc:85
template OCTAVE_API std::string rational_approx< double >(double val, int len)
std::string rational_approx(T val, int len)
Definition: oct-string.cc:612
void free(void *)
static T abs(T x)
Definition: pr-output.cc:1678
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61