GNU Octave 11.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
oct-string.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2016-2026 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-string.h"
31
32#include <algorithm>
33#include <cctype>
34#include <cstring>
35#if defined (OCTAVE_HAVE_STD_FROM_CHARS_DOUBLE)
36# include <charconv>
37#endif
38#include <iomanip>
39#include <string>
40#include <unordered_set>
41
42#if defined (OCTAVE_HAVE_FAST_FLOAT)
43# include <fast_float/fast_float.h>
44#endif
45
46#include "Array-oct.h"
47#include "iconv-wrappers.h"
48#include "lo-ieee.h"
49#include "mappers.h"
50#include "oct-locbuf.h"
51#include "uniconv-wrappers.h"
52#include "unistr-wrappers.h"
53#include "unwind-prot.h"
54
55template <typename T>
56static bool
57str_data_cmp (const typename T::value_type *a, const typename T::value_type *b,
58 const typename T::size_type n)
59{
60 for (typename T::size_type i = 0; i < n; ++i)
61 if (a[i] != b[i])
62 return false;
63 return true;
64}
65
66template <typename T>
67static bool
68str_data_cmpi (const typename T::value_type *a, const typename T::value_type *b,
69 const typename T::size_type n)
70{
71 for (typename T::size_type i = 0; i < n; ++i)
72 if (std::tolower (a[i]) != std::tolower (b[i]))
73 return false;
74 return true;
75}
76
77
78// Templates to handle std::basic_string, std::vector, Array, and char*.
79template <typename T>
80typename T::size_type
81numel (const T& str)
82{
83 return str.size ();
84}
85
86template <>
88numel (const Array<char>& str)
89{
90 return str.numel ();
91}
92
93template <typename T>
94typename T::size_type
95strlen (const typename T::value_type *str)
96{
97 return std::strlen (str);
98}
99
100template <typename T>
101bool
102sizes_cmp (const T& str_a, const T& str_b)
103{
104 return str_a.size () == str_b.size ();
105}
106
107template <>
108bool
109sizes_cmp (const Array<char>& str_a, const Array<char>& str_b)
110{
111 return str_a.dims () == str_b.dims ();
112}
113
114template <typename T>
115bool
116sizes_cmp (const T& str_a, const typename T::value_type *str_b)
117{
118 return str_a.size () == strlen<T> (str_b);
119}
120
121template <>
122bool
123sizes_cmp (const Array<char>& str_a, const char *str_b)
124{
125 return (str_a.isvector () && str_a.rows () == 1
126 && str_a.numel () == strlen<Array<char>> (str_b));
127}
128
129
130template<typename T>
131bool
132octave::string::strcmp (const T& str_a, const T& str_b)
133{
134 return (sizes_cmp (str_a, str_b)
135 && str_data_cmp<T> (str_a.data (), str_b.data (), numel (str_a)));
136}
137
138template<typename T>
139bool
140octave::string::strcmp (const T& str_a, const typename T::value_type *str_b)
141{
142 return (sizes_cmp (str_a, str_b)
143 && str_data_cmp<T> (str_a.data (), str_b, numel (str_a)));
144}
145
146
147template<typename T>
148bool
149octave::string::strcmpi (const T& str_a, const T& str_b)
150{
151 return (sizes_cmp (str_a, str_b)
152 && str_data_cmpi<T> (str_a.data (), str_b.data (), numel (str_a)));
153}
154
155template<typename T>
156bool
157octave::string::strcmpi (const T& str_a, const typename T::value_type *str_b)
158{
159 return (sizes_cmp (str_a, str_b)
160 && str_data_cmpi<T> (str_a.data (), str_b, numel (str_a)));
161}
162
163
164template<typename T>
165bool
166octave::string::strncmp (const T& str_a, const T& str_b,
167 const typename T::size_type n)
168{
169 typename T::size_type neff;
170 auto len_a = numel (str_a);
171 auto len_b = numel (str_b);
172 neff = std::min (std::max (len_a, len_b), n);
173
174 return (len_a >= neff && len_b >= neff
175 && str_data_cmp<T> (str_a.data (), str_b.data (), neff));
176}
177
178template<typename T>
179bool
180octave::string::strncmp (const T& str_a, const typename T::value_type *str_b,
181 const typename T::size_type n)
182{
183 typename T::size_type neff;
184 auto len_a = numel (str_a);
185 auto len_b = strlen<T> (str_b);
186 neff = std::min (std::max (len_a, len_b), n);
187
188 return (len_a >= neff && len_b >= neff
189 && str_data_cmp<T> (str_a.data (), str_b, neff));
190}
191
192
193template<typename T>
194bool
195octave::string::strncmpi (const T& str_a, const T& str_b,
196 const typename T::size_type n)
197{
198 typename T::size_type neff;
199 auto len_a = numel (str_a);
200 auto len_b = numel (str_b);
201 neff = std::min (std::max (len_a, len_b), n);
202
203 return (len_a >= neff && len_b >= neff
204 && str_data_cmpi<T> (str_a.data (), str_b.data (), neff));
205}
206
207template<typename T>
208bool
209octave::string::strncmpi (const T& str_a, const typename T::value_type *str_b,
210 const typename T::size_type n)
211{
212 typename T::size_type neff;
213 auto len_a = numel (str_a);
214 auto len_b = strlen<T> (str_b);
215 neff = std::min (std::max (len_a, len_b), n);
216
217 return (len_a >= neff && len_b >= neff
218 && str_data_cmpi<T> (str_a.data (), str_b, neff));
219}
220
221
222// Instantiations we need
223#define INSTANTIATE_OCTAVE_STRING(T, API) \
224 template API bool octave::string::strcmp<T> (const T&, const T&); \
225 template API bool \
226 octave::string::strcmp<T> (const T&, const typename T::value_type*); \
227 template API bool octave::string::strcmpi<T> (const T&, const T&); \
228 template API bool \
229 octave::string::strcmpi<T> (const T&, const typename T::value_type*); \
230 template API bool \
231 octave::string::strncmp<T> (const T&, const T&, \
232 const typename T::size_type); \
233 template API bool \
234 octave::string::strncmp<T> (const T&, const typename T::value_type*, \
235 const typename T::size_type); \
236 template API bool \
237 octave::string::strncmpi<T> (const T&, const T&, \
238 const typename T::size_type n); \
239 template API bool \
240 octave::string::strncmpi<T> (const T&, const typename T::value_type*, \
241 const typename T::size_type);
242
243// We could also instantiate std::vector<char> but would it be
244// useful for anyone?
247
248#undef INSTANTIATE_OCTAVE_STRING
249
250static inline bool
251is_imag_unit (int c)
252{ return c == 'i' || c == 'j'; }
253
254static double
255single_num (std::istringstream& is)
256{
257 double num = 0.0;
258
259 // Get the remaining string from current position
260 std::string str;
261 std::streampos start_pos = is.tellg ();
262 std::getline (is, str);
263
264 const char *first = str.data ();
265 const char *last = first + str.size ();
266
267 // Skip leading whitespace
268 while (first != last && std::isspace (static_cast<unsigned char> (*first)))
269 ++first;
270
271 if (first == last)
272 {
273 is.seekg (start_pos);
274 is.setstate (std::ios::failbit);
275 return num;
276 }
277
278 // Special check for "NA" value (not handled by from_chars)
279 if (first + 2 <= last && first[0] == 'N' && first[1] == 'A' &&
280 (first + 2 == last || std::isspace (static_cast<unsigned char> (first[2]))))
281 {
282 num = octave_NA;
283 is.seekg (start_pos + static_cast<std::streamoff> (first - str.data () + 2));
284 return num;
285 }
286
287 // Use from_chars for all other numbers (including Inf, -Inf, NaN)
288#if defined (OCTAVE_HAVE_STD_FROM_CHARS_DOUBLE)
289 auto [ptr, ec] = std::from_chars (first, last, num);
290#elif defined (OCTAVE_HAVE_FAST_FLOAT)
291 auto [ptr, ec] = fast_float::from_chars (first, last, num);
292#else
293# error "Cannot convert string to floating-point number. This should be unreachable."
294#endif
295
296 if (ec == std::errc {})
297 {
298 // Successfully parsed a number
299 std::streamoff chars_consumed = static_cast<std::streamoff> (ptr - str.data ());
300 is.seekg (start_pos + chars_consumed);
301
302 // Check if we are at EOF after consuming the number
303 if (ptr == last)
304 is.seekg (0, std::ios::end);
305 }
306 else
307 {
308 // Parse failed
309 switch (ec)
310 {
311 case std::errc::invalid_argument:
312 // No valid number could be parsed
313 is.seekg (start_pos);
314 is.setstate (std::ios::failbit);
315 break;
316 case std::errc::result_out_of_range:
317 // Number is out of range for double
318 // Determine sign and set to appropriate infinity
319 {
320 // Check for negative sign
321 const char* p = first;
322 while (p < ptr && std::isspace (static_cast<unsigned char> (*p)))
323 ++p;
324
325 if (p < ptr && *p == '-')
326 num = -octave::numeric_limits<double>::Inf ();
327 else
328 num = octave::numeric_limits<double>::Inf ();
329
330 std::streamoff chars_consumed = static_cast<std::streamoff> (ptr - str.data ());
331 is.seekg (start_pos + chars_consumed);
332 }
333 break;
334
335 default:
336 // Unexpected error
337 is.seekg (start_pos);
338 is.setstate (std::ios::failbit);
339 break;
340 }
341 }
342 return num;
343}
344
345static std::istringstream&
346extract_num (std::istringstream& is, double& num, bool& imag, bool& have_sign)
347{
348 have_sign = imag = false;
349
350 char c = is.peek ();
351
352 // Skip leading spaces.
353 while (isspace (c))
354 {
355 is.get ();
356 c = is.peek ();
357 }
358
359 bool negative = false;
360
361 // Accept leading sign.
362 if (c == '+' || c == '-')
363 {
364 have_sign = true;
365 negative = c == '-';
366 is.get ();
367 c = is.peek ();
368 }
369
370 // Skip spaces after sign.
371 while (isspace (c))
372 {
373 is.get ();
374 c = is.peek ();
375 }
376
377 // Imaginary number (i*num or just i), or maybe 'inf'.
378 if (c == 'i')
379 {
380 // possible infinity.
381 is.get ();
382 c = is.peek ();
383
384 if (is.eof ())
385 {
386 // just 'i' and string is finished. Return immediately.
387 imag = true;
388 num = (negative ? -1.0 : 1.0);
389 return is;
390 }
391 else
392 {
393 if (std::tolower (c) != 'n')
394 imag = true;
395 is.unget ();
396 }
397 }
398 else if (c == 'j')
399 imag = true;
400
401 // It's i*num or just i
402 if (imag)
403 {
404 is.get ();
405 c = is.peek ();
406 // Skip spaces after imaginary unit.
407 while (isspace (c))
408 {
409 is.get ();
410 c = is.peek ();
411 }
412
413 if (c == '*')
414 {
415 // Multiplier follows, we extract it as a number.
416 is.get ();
417 num = single_num (is);
418 if (is.good ())
419 c = is.peek ();
420 }
421 else
422 num = 1.0;
423 }
424 else
425 {
426 // It's num, num*i, or numi.
427 num = single_num (is);
428 if (is.good ())
429 {
430 c = is.peek ();
431
432 // Skip spaces after number.
433 while (isspace (c))
434 {
435 is.get ();
436 c = is.peek ();
437 }
438
439 if (c == '*')
440 {
441 is.get ();
442 c = is.peek ();
443
444 // Skip spaces after operator.
445 while (isspace (c))
446 {
447 is.get ();
448 c = is.peek ();
449 }
450
451 if (is_imag_unit (c))
452 {
453 imag = true;
454 is.get ();
455 c = is.peek ();
456 }
457 else
458 is.setstate (std::ios::failbit); // indicate read has failed.
459 }
460 else if (is_imag_unit (c))
461 {
462 imag = true;
463 is.get ();
464 c = is.peek ();
465 }
466 }
467 }
468
469 if (is.good ())
470 {
471 // Skip trailing spaces.
472 while (isspace (c))
473 {
474 is.get ();
475 c = is.peek ();
476 }
477 }
478
479 if (negative)
480 num = -num;
481
482 return is;
483}
484
485static inline void
486set_component (Complex& c, double num, bool imag)
487{
488#if defined (HAVE_CXX_COMPLEX_SETTERS)
489 if (imag)
490 c.imag (num);
491 else
492 c.real (num);
493#elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
494 if (imag)
495 c.imag () = num;
496 else
497 c.real () = num;
498#else
499 if (imag)
500 c = Complex (c.real (), num);
501 else
502 c = Complex (num, c.imag ());
503#endif
504}
505
507octave::string::str2double (const std::string& str_arg)
508{
509 Complex val (0.0, 0.0);
510
511 std::string str = str_arg;
512
513 // FIXME: removing all commas doesn't allow actual parsing.
514 // Example: "1,23.45" is wrong, but passes Octave.
515 str.erase (std::remove (str.begin (), str.end(), ','), str.end ());
516 std::istringstream is (str);
517
518 double num;
519 bool i1, i2, s1, s2;
520
521 if (is.eof ())
522 val = octave::numeric_limits<double>::NaN ();
523 else if (! extract_num (is, num, i1, s1))
524 val = octave::numeric_limits<double>::NaN ();
525 else
526 {
527 set_component (val, num, i1);
528
529 if (! is.eof ())
530 {
531 if (! extract_num (is, num, i2, s2) || i1 == i2 || ! s2)
532 val = octave::numeric_limits<double>::NaN ();
533 else
534 set_component (val, num, i2);
535 }
536 }
537
538 return val;
539}
540
541bool
542octave::string::any_non_ascii_chars (const std::string &s)
543{
544 for (unsigned char c : s)
545 if (c & 0x80)
546 return true;
547
548 return false;
549}
550
551std::string
552octave::string::u8_to_encoding (const std::string& who,
553 const std::string& u8_string,
554 const std::string& encoding)
555{
556 const uint8_t *src = reinterpret_cast<const uint8_t *>
557 (u8_string.c_str ());
558 std::size_t srclen = u8_string.length ();
559
560 std::size_t length;
561 char *native_str = octave_u8_conv_to_encoding (encoding.c_str (), src,
562 srclen, &length);
563
564 if (! native_str)
565 {
566 if (errno == ENOSYS)
567 (*current_liboctave_error_handler)
568 ("%s: iconv() is not supported. Installing GNU libiconv and then "
569 "re-compiling Octave could fix this.", who.c_str ());
570 else
572 ("%s: converting from UTF-8 to codepage '%s' failed: %s",
573 who.c_str (), encoding.c_str (), std::strerror (errno));
574 }
575
576 octave::unwind_action free_native_str ([native_str] () { ::free (native_str); });
577
578 std::string retval = std::string (native_str, length);
579
580 return retval;
581}
582
583std::string
584octave::string::u8_from_encoding (const std::string& who,
585 const std::string& native_string,
586 const std::string& encoding)
587{
588 const char *src = native_string.c_str ();
589 std::size_t srclen = native_string.length ();
590
591 std::size_t length;
592 uint8_t *utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src,
593 srclen, &length);
594 if (! utf8_str)
595 {
596 if (errno == ENOSYS)
597 (*current_liboctave_error_handler)
598 ("%s: iconv() is not supported. Installing GNU libiconv and then "
599 "re-compiling Octave could fix this.", who.c_str ());
600 else
602 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
603 who.c_str (), encoding.c_str (), std::strerror (errno));
604 }
605
606 octave::unwind_action free_utf8_str ([utf8_str] () { ::free (utf8_str); });
607
608 std::string retval = std::string (reinterpret_cast<char *> (utf8_str), length);
609
610 return retval;
611}
612
613unsigned int
614octave::string::u8_validate (const std::string& who,
615 std::string& in_str,
616 const octave::string::u8_fallback_type type)
617{
618 std::string out_str;
619
620 unsigned int num_replacements = 0;
621 const char *in_chr = in_str.c_str ();
622 const char *inv_utf8 = in_chr;
623 const char *const in_end = in_chr + in_str.length ();
624 while (inv_utf8 && in_chr < in_end)
625 {
626 inv_utf8 = reinterpret_cast<const char *>
627 (octave_u8_check_wrapper (reinterpret_cast<const uint8_t *> (in_chr),
628 in_end - in_chr));
629
630 if (inv_utf8 == nullptr)
631 out_str.append (in_chr, in_end - in_chr);
632 else
633 {
634 num_replacements++;
635 out_str.append (in_chr, inv_utf8 - in_chr);
636 in_chr = inv_utf8 + 1;
637
638 if (type == U8_REPLACEMENT_CHAR)
639 out_str.append ("\xef\xbf\xbd");
640 else if (type == U8_ISO_8859_1)
641 {
642 std::string fallback = "iso-8859-1";
643 std::size_t lengthp;
644 uint8_t *val_utf8 = octave_u8_conv_from_encoding
645 (fallback.c_str (), inv_utf8, 1, &lengthp);
646
647 if (! val_utf8)
648 (*current_liboctave_error_handler)
649 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
650 who.c_str (), fallback.c_str (), std::strerror (errno));
651
652 octave::unwind_action free_val_utf8 ([val_utf8] () { ::free (val_utf8); });
653
654 out_str.append (reinterpret_cast<const char *> (val_utf8),
655 lengthp);
656 }
657 }
658 }
659
660 in_str = out_str;
661 return num_replacements;
662}
663
664std::string
665octave::string::u16_to_encoding (const std::string& who,
666 const std::u16string& u16_string,
667 const std::string& encoding)
668{
669 const uint16_t *src = reinterpret_cast<const uint16_t *>
670 (u16_string.c_str ());
671 std::size_t srclen = u16_string.length ();
672
673 std::size_t length;
674 char *native_str = octave_u16_conv_to_encoding (encoding.c_str (), src,
675 srclen, &length);
676
677 if (! native_str)
678 {
679 if (errno == ENOSYS)
680 (*current_liboctave_error_handler)
681 ("%s: iconv() is not supported. Installing GNU libiconv and then "
682 "re-compiling Octave could fix this.", who.c_str ());
683 else
685 ("%s: converting from UTF-16 to codepage '%s' failed: %s",
686 who.c_str (), encoding.c_str (), std::strerror (errno));
687 }
688
689 octave::unwind_action free_native_str ([native_str] () { ::free (native_str); });
690
691 std::string retval = std::string (native_str, length);
692
693 return retval;
694}
695
696std::vector<std::string>
697octave::string::get_encoding_list ()
698{
699 static std::vector<std::string> encoding_list;
700
701 if (encoding_list.empty ())
702 {
703#if defined (HAVE_ICONVLIST)
704 // get number of supported encodings
705 std::size_t count = 0;
707 [] (unsigned int num, const char * const *, void *data) -> int
708 {
709 std::size_t *count_ptr = static_cast<std::size_t *> (data);
710 *count_ptr = num;
711 return 0;
712 },
713 &count);
714
715 if (count == static_cast<size_t> (-1))
716 {
717 encoding_list.push_back ("UTF-8");
718 return encoding_list;
719 }
720
721# if defined (HAVE_ICONV_CANONICALIZE)
722 // use unordered_set to skip canonicalized aliases
723 std::unordered_set<std::string> encoding_set;
724 encoding_set.reserve (count);
725
726 // populate vector with name of encodings
728 [] (unsigned int num, const char * const *names, void *data) -> int
729 {
730 std::unordered_set<std::string> *encoding_set_ptr
731 = static_cast<std::unordered_set<std::string> *> (data);
732 for (std::size_t i = 0; i < num; i++)
733 {
734 const char *canonicalized_enc
736 encoding_set_ptr->insert (canonicalized_enc);
737 }
738 return 0;
739 },
740 &encoding_set);
741
742 encoding_list.assign (encoding_set.begin (), encoding_set.end ());
743# endif
744
745#else
746 // Use hardcoded list of encodings as a fallback for platforms without
747 // iconvlist (or another way of programmatically querrying a list of
748 // supported encodings).
749 // This list is inspired by the encodings supported by Geany.
750 encoding_list
751 = {"ISO-8859-1",
752 "ISO-8859-2",
753 "ISO-8859-3",
754 "ISO-8859-4",
755 "ISO-8859-5",
756 "ISO-8859-6",
757 "ISO-8859-7",
758 "ISO-8859-8",
759 "ISO-8859-9",
760 "ISO-8859-10",
761 "ISO-8859-13",
762 "ISO-8859-14",
763 "ISO-8859-15",
764 "ISO-8859-16",
765
766 "UTF-7",
767 "UTF-8",
768 "UTF-16LE",
769 "UTF-16BE",
770 "UTF-32LE",
771 "UTF-32BE",
772 "UCS-2LE",
773 "UCS-2BE",
774
775 "ARMSCII-8",
776 "BIG5",
777 "BIG5-HKSCS",
778 "CP866",
779
780 "EUC-JP",
781 "EUC-KR",
782 "EUC-TW",
783
784 "GB18030",
785 "GB_2312-80",
786 "GBK",
787 "HZ",
788
789 "IBM850",
790 "IBM852",
791 "IBM855",
792 "IBM857",
793 "IBM862",
794 "IBM864",
795
796 "ISO-2022-JP",
797 "ISO-2022-KR",
798 "JOHAB",
799 "KOI8-R",
800 "KOI8-U",
801
802 "SHIFT_JIS",
803 "TCVN",
804 "TIS-620",
805 "UHC",
806 "VISCII",
807
808 "CP1250",
809 "CP1251",
810 "CP1252",
811 "CP1253",
812 "CP1254",
813 "CP1255",
814 "CP1256",
815 "CP1257",
816 "CP1258",
817
818 "CP932"
819 };
820
821 // FIXME: Should we check whether those are actually valid encoding
822 // identifiers?
823#endif
824
825 // sort list of encodings
826 std::sort (encoding_list.begin (), encoding_list.end ());
827 }
828
829 return encoding_list;
830}
831
832typedef octave::string::codecvt_u8::InternT InternT;
833typedef octave::string::codecvt_u8::ExternT ExternT;
834typedef octave::string::codecvt_u8::StateT StateT;
835
836typename std::codecvt<InternT, ExternT, StateT>::result
837octave::string::codecvt_u8::do_out
838 (StateT& /* state */,
839 const InternT* from, const InternT* from_end, const InternT*& from_next,
840 ExternT* to, ExternT* to_end, ExternT*& to_next) const
841{
842 to_next = to;
843 if (from_end <= from)
844 {
845 from_next = from_end;
846 return std::codecvt<InternT, ExternT, StateT>::noconv;
847 }
848
849 // Check if buffer ends in a complete UTF-8 surrogate.
850 // FIXME: If this is the last call before a stream is closed, we should
851 // convert trailing bytes even if they look incomplete.
852 // How can we detect that?
853 std::size_t pop_end = 0;
854 if ((*(from_end-1) & 0b10000000) == 0b10000000)
855 {
856 // The last byte is part of a surrogate. Check if it is complete.
857
858 // number of bytes of the surrogate in the buffer
859 std::size_t num_bytes_in_buf = 1;
860 // Find initial byte of surrogate
861 while (((*(from_end-num_bytes_in_buf) & 0b11000000) != 0b11000000)
862 && (num_bytes_in_buf < 4)
863 && (from_end-num_bytes_in_buf > from))
864 num_bytes_in_buf++;
865
866 // If the start of the surrogate is not in the buffer, we need to
867 // continue with the invalid UTF-8 sequence to avoid an infinite loop.
868 // Check if we found an initial byte and if there are enough bytes in the
869 // buffer to complete the surrogate.
870 if ((((*(from_end-num_bytes_in_buf) & 0b11100000) == 0b11000000)
871 && (num_bytes_in_buf < 2)) // incomplete 2-byte surrogate
872 || (((*(from_end-num_bytes_in_buf) & 0b11110000) == 0b11100000)
873 && (num_bytes_in_buf < 3)) // incomplete 3-byte surrogate
874 || (((*(from_end-num_bytes_in_buf) & 0b11111000) == 0b11110000)
875 && (num_bytes_in_buf < 4))) // incomplete 4-byte surrogate
876 pop_end = num_bytes_in_buf;
877 }
878 from_next = from_end - pop_end;
879
880 std::size_t srclen = (from_end-from-pop_end) * sizeof (InternT);
881 std::size_t length = (to_end-to) * sizeof (ExternT);
882 if (srclen < 1 || length < 1)
883 return std::codecvt<InternT, ExternT, StateT>::partial;
884
885 // Convert from UTF-8 to output encoding
886 const uint8_t *u8_str = reinterpret_cast<const uint8_t *> (from);
887 char *enc_str = octave_u8_conv_to_encoding (m_enc.c_str (), u8_str, srclen,
888 &length);
889
890 if (length < 1)
891 return std::codecvt<InternT, ExternT, StateT>::partial;
892
893 size_t max = (to_end - to) * sizeof (ExternT);
894 // FIXME: If the output encoding is a multibyte or variable byte encoding,
895 // we should ensure that we don't cut off a "partial" surrogate from
896 // the output.
897 // Can this ever happen?
898 if (length < max)
899 max = length;
900
901 // copy conversion result to output
902 std::copy_n (enc_str, max, to);
903 ::free (enc_str);
904
905 from_next = from + srclen;
906 to_next = to + max;
907
908 return ((pop_end > 0 || max < length)
909 ? std::codecvt<InternT, ExternT, StateT>::partial
910 : std::codecvt<InternT, ExternT, StateT>::ok);
911}
912
913typename std::codecvt<InternT, ExternT, StateT>::result
914octave::string::codecvt_u8::do_in
915 (StateT& /* state */,
916 const ExternT* from, const ExternT* from_end, const ExternT*& from_next,
917 InternT* to, InternT* to_end, InternT*& to_next) const
918{
919 // Convert from input encoding to UTF-8
920 std::size_t srclen = (from_end-from) * sizeof (ExternT);
921 std::size_t lengthp = (to_end-to) * sizeof (InternT);
922 const char *enc_str = reinterpret_cast<const char *> (from);
923 uint8_t *u8_str = octave_u8_conv_from_encoding (m_enc.c_str (),
924 enc_str, srclen, &lengthp);
925
926 std::size_t max = to_end - to;
927 if (lengthp < max)
928 max = lengthp;
929
930 // copy conversion result to output
931 std::copy_n (u8_str, max, to);
932 ::free (u8_str);
933
934 from_next = from + srclen;
935 to_next = to + max;
936
937 return std::codecvt<InternT, ExternT, StateT>::ok;
938}
939
940int octave::string::codecvt_u8::do_length
941 (StateT& /* state */, const ExternT *src, const ExternT *end,
942 std::size_t max) const
943{
944 // return number of external characters that produce MAX internal ones
945 std::size_t srclen = end-src;
946 OCTAVE_LOCAL_BUFFER (std::size_t, offsets, srclen);
947 std::size_t lengthp = max;
948 octave_u8_conv_from_encoding_offsets (m_enc.c_str (), src, srclen, offsets,
949 &lengthp);
950 std::size_t ext_char;
951 for (ext_char = 0; ext_char < srclen; ext_char++)
952 {
953 if (offsets[ext_char] != static_cast<size_t> (-1)
954 && offsets[ext_char] >= max)
955 break;
956 }
957
958 return ext_char;
959}
960
961
962template <typename T>
963std::string
965{
966 std::string s;
967
968 if (len <= 0)
969 len = 10;
970
971 static constexpr T out_of_range_top
972 = static_cast<T> (std::numeric_limits<int>::max ()) + 1.0;
973 static constexpr T out_of_range_bottom
974 = static_cast<T> (std::numeric_limits<int>::min ()) - 1.0;
975
976 if (octave::math::isinf (val))
977 {
978 if (val > 0)
979 s = "1/0";
980 else
981 s = "-1/0";
982 }
983 else if (octave::math::isnan (val))
984 s = "0/0";
985 else if (val <= out_of_range_bottom || val >= out_of_range_top
986 || octave::math::is_integer (val))
987 {
988 std::ostringstream buf;
989 buf.flags (std::ios::fixed);
990 buf << std::setprecision (0) << octave::math::round (val);
991 s = buf.str ();
992 }
993 else
994 {
995 T lastn = 1;
996 T lastd = 0;
997 T n = octave::math::round (val);
998 T d = 1;
999 T frac = val - n;
1000
1001 std::ostringstream init_buf;
1002 init_buf.flags (std::ios::fixed);
1003 init_buf << std::setprecision (0) << static_cast<int> (n);
1004 s = init_buf.str ();
1005
1006 while (true)
1007 {
1008 T flip = 1 / frac;
1009 T step = octave::math::round (flip);
1010 T nextn = n;
1011 T nextd = d;
1012
1013 // Have we converged to 1/intmax ?
1014 if (std::abs (flip) > out_of_range_top)
1015 {
1016 lastn = n;
1017 lastd = d;
1018 break;
1019 }
1020
1021 frac = flip - step;
1022 n = step * n + lastn;
1023 d = step * d + lastd;
1024 lastn = nextn;
1025 lastd = nextd;
1026
1027 if (std::abs (n) >= out_of_range_top
1028 || std::abs (d) >= out_of_range_top)
1029 break;
1030
1031 std::ostringstream buf;
1032 buf.flags (std::ios::fixed);
1033 buf << std::setprecision (0) << static_cast<int> (n)
1034 << '/' << static_cast<int> (d);
1035
1036 if (n < 0 && d < 0)
1037 {
1038 // Double negative, string can be two characters longer.
1039 if (buf.str ().length () > static_cast<unsigned int> (len + 2))
1040 break;
1041 }
1042 else
1043 {
1044 if (buf.str ().length () > static_cast<unsigned int> (len))
1045 break;
1046 }
1047
1048 s = buf.str ();
1049 }
1050
1051 if (lastd < 0)
1052 {
1053 // Move negative sign from denominator to numerator
1054 lastd = - lastd;
1055 lastn = - lastn;
1056 std::ostringstream buf;
1057 buf.flags (std::ios::fixed);
1058 buf << std::setprecision (0) << static_cast<int> (lastn)
1059 << '/' << static_cast<int> (lastd);
1060 s = buf.str ();
1061 }
1062 }
1063
1064 return s;
1065}
1066
1067// instantiate the template for float and double
1068template OCTAVE_API std::string rational_approx<float> (float val, int len);
1069template OCTAVE_API std::string rational_approx<double> (double val, int len);
charNDArray max(char d, const charNDArray &m)
Definition chNDArray.cc:230
N Dimensional Array with copy-on-write semantics.
Definition Array-base.h:130
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition Array-base.h:529
bool isvector() const
Size of the specified dimension.
Definition Array-base.h:677
octave_idx_type rows() const
Definition Array-base.h:485
octave_idx_type numel() const
Number of elements in the array.
Definition Array-base.h:440
ColumnVector imag(const ComplexColumnVector &a)
void octave_iconvlist_wrapper(int(*do_one)(unsigned int namescount, const char *const *names, void *data), void *data)
const char * octave_iconv_canonicalize_wrapper(const char *name)
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition lo-error.c:41
#define octave_NA
Definition lo-ieee.h:43
#define OCTAVE_API
Definition main.in.cc:55
std::complex< double > Complex
Definition oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
template std::string rational_approx< double >(double val, int len)
template std::string rational_approx< float >(float val, int len)
octave::string::codecvt_u8::ExternT ExternT
#define INSTANTIATE_OCTAVE_STRING(T, API)
octave::string::codecvt_u8::InternT InternT
octave::string::codecvt_u8::StateT StateT
T::size_type numel(const T &str)
Definition oct-string.cc:81
bool sizes_cmp(const T &str_a, const T &str_b)
T::size_type strlen(const typename T::value_type *str)
Definition oct-string.cc:95
std::string rational_approx(T val, int len)
@ U8_ISO_8859_1
Definition oct-string.h:159
@ U8_REPLACEMENT_CHAR
Definition oct-string.h:158
void free(void *)
F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE * d
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding_offsets(const char *fromcode, const char *src, size_t srclen, size_t *offsets, size_t *lengthp)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition xerbla.cc:61