GNU Octave 10.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
oct-string.cc
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2016-2025 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26#if defined (HAVE_CONFIG_H)
27# include "config.h"
28#endif
29
30#include "oct-string.h"
31
32#include <algorithm>
33#include <cctype>
34#include <cstring>
35#include <iomanip>
36#include <string>
37#include <unordered_set>
38
39#include "Array.h"
40#include "iconv-wrappers.h"
41#include "lo-ieee.h"
42#include "lo-mappers.h"
43#include "oct-locbuf.h"
44#include "uniconv-wrappers.h"
45#include "unistr-wrappers.h"
46#include "unwind-prot.h"
47
48template <typename T>
49static bool
50str_data_cmp (const typename T::value_type *a, const typename T::value_type *b,
51 const typename T::size_type n)
52{
53 for (typename T::size_type i = 0; i < n; ++i)
54 if (a[i] != b[i])
55 return false;
56 return true;
57}
58
59template <typename T>
60static bool
61str_data_cmpi (const typename T::value_type *a, const typename T::value_type *b,
62 const typename T::size_type n)
63{
64 for (typename T::size_type i = 0; i < n; ++i)
65 if (std::tolower (a[i]) != std::tolower (b[i]))
66 return false;
67 return true;
68}
69
70
71// Templates to handle std::basic_string, std::vector, Array, and char*.
72template <typename T>
73typename T::size_type
74numel (const T& str)
75{
76 return str.size ();
77}
78
79template <>
81numel (const Array<char>& str)
82{
83 return str.numel ();
84}
85
86template <typename T>
87typename T::size_type
88strlen (const typename T::value_type *str)
89{
90 return std::strlen (str);
91}
92
93template <typename T>
94bool
95sizes_cmp (const T& str_a, const T& str_b)
96{
97 return str_a.size () == str_b.size ();
98}
99
100template <>
101bool
102sizes_cmp (const Array<char>& str_a, const Array<char>& str_b)
103{
104 return str_a.dims () == str_b.dims ();
105}
106
107template <typename T>
108bool
109sizes_cmp (const T& str_a, const typename T::value_type *str_b)
110{
111 return str_a.size () == strlen<T> (str_b);
112}
113
114template <>
115bool
116sizes_cmp (const Array<char>& str_a, const char *str_b)
117{
118 return (str_a.isvector () && str_a.rows () == 1
119 && str_a.numel () == strlen<Array<char>> (str_b));
120}
121
122
123template<typename T>
124bool
125octave::string::strcmp (const T& str_a, const T& str_b)
126{
127 return (sizes_cmp (str_a, str_b)
128 && str_data_cmp<T> (str_a.data (), str_b.data (), numel (str_a)));
129}
130
131template<typename T>
132bool
133octave::string::strcmp (const T& str_a, const typename T::value_type *str_b)
134{
135 return (sizes_cmp (str_a, str_b)
136 && str_data_cmp<T> (str_a.data (), str_b, numel (str_a)));
137}
138
139
140template<typename T>
141bool
142octave::string::strcmpi (const T& str_a, const T& str_b)
143{
144 return (sizes_cmp (str_a, str_b)
145 && str_data_cmpi<T> (str_a.data (), str_b.data (), numel (str_a)));
146}
147
148template<typename T>
149bool
150octave::string::strcmpi (const T& str_a, const typename T::value_type *str_b)
151{
152 return (sizes_cmp (str_a, str_b)
153 && str_data_cmpi<T> (str_a.data (), str_b, numel (str_a)));
154}
155
156
157template<typename T>
158bool
159octave::string::strncmp (const T& str_a, const T& str_b,
160 const typename T::size_type n)
161{
162 typename T::size_type neff;
163 auto len_a = numel (str_a);
164 auto len_b = numel (str_b);
165 neff = std::min (std::max (len_a, len_b), n);
166
167 return (len_a >= neff && len_b >= neff
168 && str_data_cmp<T> (str_a.data (), str_b.data (), neff));
169}
170
171template<typename T>
172bool
173octave::string::strncmp (const T& str_a, const typename T::value_type *str_b,
174 const typename T::size_type n)
175{
176 typename T::size_type neff;
177 auto len_a = numel (str_a);
178 auto len_b = strlen<T> (str_b);
179 neff = std::min (std::max (len_a, len_b), n);
180
181 return (len_a >= neff && len_b >= neff
182 && str_data_cmp<T> (str_a.data (), str_b, neff));
183}
184
185
186template<typename T>
187bool
188octave::string::strncmpi (const T& str_a, const T& str_b,
189 const typename T::size_type n)
190{
191 typename T::size_type neff;
192 auto len_a = numel (str_a);
193 auto len_b = numel (str_b);
194 neff = std::min (std::max (len_a, len_b), n);
195
196 return (len_a >= neff && len_b >= neff
197 && str_data_cmpi<T> (str_a.data (), str_b.data (), neff));
198}
199
200template<typename T>
201bool
202octave::string::strncmpi (const T& str_a, const typename T::value_type *str_b,
203 const typename T::size_type n)
204{
205 typename T::size_type neff;
206 auto len_a = numel (str_a);
207 auto len_b = strlen<T> (str_b);
208 neff = std::min (std::max (len_a, len_b), n);
209
210 return (len_a >= neff && len_b >= neff
211 && str_data_cmpi<T> (str_a.data (), str_b, neff));
212}
213
214
215// Instantiations we need
216#define INSTANTIATE_OCTAVE_STRING(T, API) \
217 template API bool octave::string::strcmp<T> (const T&, const T&); \
218 template API bool \
219 octave::string::strcmp<T> (const T&, const typename T::value_type*); \
220 template API bool octave::string::strcmpi<T> (const T&, const T&); \
221 template API bool \
222 octave::string::strcmpi<T> (const T&, const typename T::value_type*); \
223 template API bool \
224 octave::string::strncmp<T> (const T&, const T&, \
225 const typename T::size_type); \
226 template API bool \
227 octave::string::strncmp<T> (const T&, const typename T::value_type*, \
228 const typename T::size_type); \
229 template API bool \
230 octave::string::strncmpi<T> (const T&, const T&, \
231 const typename T::size_type n); \
232 template API bool \
233 octave::string::strncmpi<T> (const T&, const typename T::value_type*, \
234 const typename T::size_type);
235
236// We could also instantiate std::vector<char> but would it be
237// useful for anyone?
240
241#undef INSTANTIATE_OCTAVE_STRING
242
243static inline bool
244is_imag_unit (int c)
245{ return c == 'i' || c == 'j'; }
246
247static double
248single_num (std::istringstream& is)
249{
250 double num = 0.0;
251
252 char c = is.peek ();
253
254 // Skip spaces.
255 while (isspace (c))
256 {
257 is.get ();
258 c = is.peek ();
259 }
260
261 if (std::toupper (c) == 'I')
262 {
263 // It's infinity.
264 is.get ();
265 char c1 = is.get ();
266 char c2 = is.get ();
267 if (std::tolower (c1) == 'n' && std::tolower (c2) == 'f')
268 {
269 num = octave::numeric_limits<double>::Inf ();
270 is.peek (); // May set EOF bit.
271 }
272 else
273 is.setstate (std::ios::failbit); // indicate that read has failed.
274 }
275 else if (c == 'N')
276 {
277 // It's NA or NaN
278 is.get ();
279 char c1 = is.get ();
280 if (c1 == 'A')
281 {
282 num = octave_NA;
283 is.peek (); // May set EOF bit.
284 }
285 else
286 {
287 char c2 = is.get ();
288 if (c1 == 'a' && c2 == 'N')
289 {
290 num = octave::numeric_limits<double>::NaN ();
291 is.peek (); // May set EOF bit.
292 }
293 else
294 is.setstate (std::ios::failbit); // indicate that read has failed.
295 }
296 }
297 else
298 is >> num;
299
300 return num;
301}
302
303static std::istringstream&
304extract_num (std::istringstream& is, double& num, bool& imag, bool& have_sign)
305{
306 have_sign = imag = false;
307
308 char c = is.peek ();
309
310 // Skip leading spaces.
311 while (isspace (c))
312 {
313 is.get ();
314 c = is.peek ();
315 }
316
317 bool negative = false;
318
319 // Accept leading sign.
320 if (c == '+' || c == '-')
321 {
322 have_sign = true;
323 negative = c == '-';
324 is.get ();
325 c = is.peek ();
326 }
327
328 // Skip spaces after sign.
329 while (isspace (c))
330 {
331 is.get ();
332 c = is.peek ();
333 }
334
335 // Imaginary number (i*num or just i), or maybe 'inf'.
336 if (c == 'i')
337 {
338 // possible infinity.
339 is.get ();
340 c = is.peek ();
341
342 if (is.eof ())
343 {
344 // just 'i' and string is finished. Return immediately.
345 imag = true;
346 num = (negative ? -1.0 : 1.0);
347 return is;
348 }
349 else
350 {
351 if (std::tolower (c) != 'n')
352 imag = true;
353 is.unget ();
354 }
355 }
356 else if (c == 'j')
357 imag = true;
358
359 // It's i*num or just i
360 if (imag)
361 {
362 is.get ();
363 c = is.peek ();
364 // Skip spaces after imaginary unit.
365 while (isspace (c))
366 {
367 is.get ();
368 c = is.peek ();
369 }
370
371 if (c == '*')
372 {
373 // Multiplier follows, we extract it as a number.
374 is.get ();
375 num = single_num (is);
376 if (is.good ())
377 c = is.peek ();
378 }
379 else
380 num = 1.0;
381 }
382 else
383 {
384 // It's num, num*i, or numi.
385 num = single_num (is);
386 if (is.good ())
387 {
388 c = is.peek ();
389
390 // Skip spaces after number.
391 while (isspace (c))
392 {
393 is.get ();
394 c = is.peek ();
395 }
396
397 if (c == '*')
398 {
399 is.get ();
400 c = is.peek ();
401
402 // Skip spaces after operator.
403 while (isspace (c))
404 {
405 is.get ();
406 c = is.peek ();
407 }
408
409 if (is_imag_unit (c))
410 {
411 imag = true;
412 is.get ();
413 c = is.peek ();
414 }
415 else
416 is.setstate (std::ios::failbit); // indicate read has failed.
417 }
418 else if (is_imag_unit (c))
419 {
420 imag = true;
421 is.get ();
422 c = is.peek ();
423 }
424 }
425 }
426
427 if (is.good ())
428 {
429 // Skip trailing spaces.
430 while (isspace (c))
431 {
432 is.get ();
433 c = is.peek ();
434 }
435 }
436
437 if (negative)
438 num = -num;
439
440 return is;
441}
442
443static inline void
444set_component (Complex& c, double num, bool imag)
445{
446#if defined (HAVE_CXX_COMPLEX_SETTERS)
447 if (imag)
448 c.imag (num);
449 else
450 c.real (num);
451#elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
452 if (imag)
453 c.imag () = num;
454 else
455 c.real () = num;
456#else
457 if (imag)
458 c = Complex (c.real (), num);
459 else
460 c = Complex (num, c.imag ());
461#endif
462}
463
465octave::string::str2double (const std::string& str_arg)
466{
467 Complex val (0.0, 0.0);
468
469 std::string str = str_arg;
470
471 // FIXME: removing all commas doesn't allow actual parsing.
472 // Example: "1,23.45" is wrong, but passes Octave.
473 str.erase (std::remove (str.begin (), str.end(), ','), str.end ());
474 std::istringstream is (str);
475
476 double num;
477 bool i1, i2, s1, s2;
478
479 if (is.eof ())
480 val = octave::numeric_limits<double>::NaN ();
481 else if (! extract_num (is, num, i1, s1))
482 val = octave::numeric_limits<double>::NaN ();
483 else
484 {
485 set_component (val, num, i1);
486
487 if (! is.eof ())
488 {
489 if (! extract_num (is, num, i2, s2) || i1 == i2 || ! s2)
490 val = octave::numeric_limits<double>::NaN ();
491 else
492 set_component (val, num, i2);
493 }
494 }
495
496 return val;
497}
498
499std::string
500octave::string::u8_to_encoding (const std::string& who,
501 const std::string& u8_string,
502 const std::string& encoding)
503{
504 const uint8_t *src = reinterpret_cast<const uint8_t *>
505 (u8_string.c_str ());
506 std::size_t srclen = u8_string.length ();
507
508 std::size_t length;
509 char *native_str = octave_u8_conv_to_encoding (encoding.c_str (), src,
510 srclen, &length);
511
512 if (! native_str)
513 {
514 if (errno == ENOSYS)
515 (*current_liboctave_error_handler)
516 ("%s: iconv() is not supported. Installing GNU libiconv and then "
517 "re-compiling Octave could fix this.", who.c_str ());
518 else
520 ("%s: converting from UTF-8 to codepage '%s' failed: %s",
521 who.c_str (), encoding.c_str (), std::strerror (errno));
522 }
523
524 octave::unwind_action free_native_str ([native_str] () { ::free (native_str); });
525
526 std::string retval = std::string (native_str, length);
527
528 return retval;
529}
530
531std::string
532octave::string::u8_from_encoding (const std::string& who,
533 const std::string& native_string,
534 const std::string& encoding)
535{
536 const char *src = native_string.c_str ();
537 std::size_t srclen = native_string.length ();
538
539 std::size_t length;
540 uint8_t *utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src,
541 srclen, &length);
542 if (! utf8_str)
543 {
544 if (errno == ENOSYS)
545 (*current_liboctave_error_handler)
546 ("%s: iconv() is not supported. Installing GNU libiconv and then "
547 "re-compiling Octave could fix this.", who.c_str ());
548 else
550 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
551 who.c_str (), encoding.c_str (), std::strerror (errno));
552 }
553
554 octave::unwind_action free_utf8_str ([utf8_str] () { ::free (utf8_str); });
555
556 std::string retval = std::string (reinterpret_cast<char *> (utf8_str), length);
557
558 return retval;
559}
560
561unsigned int
562octave::string::u8_validate (const std::string& who,
563 std::string& in_str,
564 const octave::string::u8_fallback_type type)
565{
566 std::string out_str;
567
568 unsigned int num_replacements = 0;
569 const char *in_chr = in_str.c_str ();
570 const char *inv_utf8 = in_chr;
571 const char *const in_end = in_chr + in_str.length ();
572 while (inv_utf8 && in_chr < in_end)
573 {
574 inv_utf8 = reinterpret_cast<const char *>
575 (octave_u8_check_wrapper (reinterpret_cast<const uint8_t *> (in_chr),
576 in_end - in_chr));
577
578 if (inv_utf8 == nullptr)
579 out_str.append (in_chr, in_end - in_chr);
580 else
581 {
582 num_replacements++;
583 out_str.append (in_chr, inv_utf8 - in_chr);
584 in_chr = inv_utf8 + 1;
585
586 if (type == U8_REPLACEMENT_CHAR)
587 out_str.append ("\xef\xbf\xbd");
588 else if (type == U8_ISO_8859_1)
589 {
590 std::string fallback = "iso-8859-1";
591 std::size_t lengthp;
592 uint8_t *val_utf8 = octave_u8_conv_from_encoding
593 (fallback.c_str (), inv_utf8, 1, &lengthp);
594
595 if (! val_utf8)
596 (*current_liboctave_error_handler)
597 ("%s: converting from codepage '%s' to UTF-8 failed: %s",
598 who.c_str (), fallback.c_str (), std::strerror (errno));
599
600 octave::unwind_action free_val_utf8 ([val_utf8] () { ::free (val_utf8); });
601
602 out_str.append (reinterpret_cast<const char *> (val_utf8),
603 lengthp);
604 }
605 }
606 }
607
608 in_str = out_str;
609 return num_replacements;
610}
611
612std::string
613octave::string::u16_to_encoding (const std::string& who,
614 const std::u16string& u16_string,
615 const std::string& encoding)
616{
617 const uint16_t *src = reinterpret_cast<const uint16_t *>
618 (u16_string.c_str ());
619 std::size_t srclen = u16_string.length ();
620
621 std::size_t length;
622 char *native_str = octave_u16_conv_to_encoding (encoding.c_str (), src,
623 srclen, &length);
624
625 if (! native_str)
626 {
627 if (errno == ENOSYS)
628 (*current_liboctave_error_handler)
629 ("%s: iconv() is not supported. Installing GNU libiconv and then "
630 "re-compiling Octave could fix this.", who.c_str ());
631 else
633 ("%s: converting from UTF-16 to codepage '%s' failed: %s",
634 who.c_str (), encoding.c_str (), std::strerror (errno));
635 }
636
637 octave::unwind_action free_native_str ([native_str] () { ::free (native_str); });
638
639 std::string retval = std::string (native_str, length);
640
641 return retval;
642}
643
644std::vector<std::string>
645octave::string::get_encoding_list ()
646{
647 static std::vector<std::string> encoding_list;
648
649 if (encoding_list.empty ())
650 {
651#if defined (HAVE_ICONVLIST)
652 // get number of supported encodings
653 std::size_t count = 0;
655 [] (unsigned int num, const char * const *, void *data) -> int
656 {
657 std::size_t *count_ptr = static_cast<std::size_t *> (data);
658 *count_ptr = num;
659 return 0;
660 },
661 &count);
662
663 if (count == static_cast<size_t> (-1))
664 {
665 encoding_list.push_back ("UTF-8");
666 return encoding_list;
667 }
668
669# if defined (HAVE_ICONV_CANONICALIZE)
670 // use unordered_set to skip canonicalized aliases
671 std::unordered_set<std::string> encoding_set;
672 encoding_set.reserve (count);
673
674 // populate vector with name of encodings
676 [] (unsigned int num, const char * const *names, void *data) -> int
677 {
678 std::unordered_set<std::string> *encoding_set_ptr
679 = static_cast<std::unordered_set<std::string> *> (data);
680 for (std::size_t i = 0; i < num; i++)
681 {
682 const char *canonicalized_enc
684 encoding_set_ptr->insert (canonicalized_enc);
685 }
686 return 0;
687 },
688 &encoding_set);
689
690 encoding_list.assign (encoding_set.begin (), encoding_set.end ());
691# endif
692
693#else
694 // Use hardcoded list of encodings as a fallback for platforms without
695 // iconvlist (or another way of programmatically querrying a list of
696 // supported encodings).
697 // This list is inspired by the encodings supported by Geany.
698 encoding_list
699 = {"ISO-8859-1",
700 "ISO-8859-2",
701 "ISO-8859-3",
702 "ISO-8859-4",
703 "ISO-8859-5",
704 "ISO-8859-6",
705 "ISO-8859-7",
706 "ISO-8859-8",
707 "ISO-8859-9",
708 "ISO-8859-10",
709 "ISO-8859-13",
710 "ISO-8859-14",
711 "ISO-8859-15",
712 "ISO-8859-16",
713
714 "UTF-7",
715 "UTF-8",
716 "UTF-16LE",
717 "UTF-16BE",
718 "UTF-32LE",
719 "UTF-32BE",
720 "UCS-2LE",
721 "UCS-2BE",
722
723 "ARMSCII-8",
724 "BIG5",
725 "BIG5-HKSCS",
726 "CP866",
727
728 "EUC-JP",
729 "EUC-KR",
730 "EUC-TW",
731
732 "GB18030",
733 "GB_2312-80",
734 "GBK",
735 "HZ",
736
737 "IBM850",
738 "IBM852",
739 "IBM855",
740 "IBM857",
741 "IBM862",
742 "IBM864",
743
744 "ISO-2022-JP",
745 "ISO-2022-KR",
746 "JOHAB",
747 "KOI8-R",
748 "KOI8-U",
749
750 "SHIFT_JIS",
751 "TCVN",
752 "TIS-620",
753 "UHC",
754 "VISCII",
755
756 "CP1250",
757 "CP1251",
758 "CP1252",
759 "CP1253",
760 "CP1254",
761 "CP1255",
762 "CP1256",
763 "CP1257",
764 "CP1258",
765
766 "CP932"
767 };
768
769 // FIXME: Should we check whether those are actually valid encoding
770 // identifiers?
771#endif
772
773 // sort list of encodings
774 std::sort (encoding_list.begin (), encoding_list.end ());
775 }
776
777 return encoding_list;
778}
779
780typedef octave::string::codecvt_u8::InternT InternT;
781typedef octave::string::codecvt_u8::ExternT ExternT;
782typedef octave::string::codecvt_u8::StateT StateT;
783
784typename std::codecvt<InternT, ExternT, StateT>::result
785octave::string::codecvt_u8::do_out
786 (StateT& /* state */,
787 const InternT* from, const InternT* from_end, const InternT*& from_next,
788 ExternT* to, ExternT* to_end, ExternT*& to_next) const
789{
790 to_next = to;
791 if (from_end <= from)
792 {
793 from_next = from_end;
794 return std::codecvt<InternT, ExternT, StateT>::noconv;
795 }
796
797 // Check if buffer ends in a complete UTF-8 surrogate.
798 // FIXME: If this is the last call before a stream is closed, we should
799 // convert trailing bytes even if they look incomplete.
800 // How can we detect that?
801 std::size_t pop_end = 0;
802 if ((*(from_end-1) & 0b10000000) == 0b10000000)
803 {
804 // The last byte is part of a surrogate. Check if it is complete.
805
806 // number of bytes of the surrogate in the buffer
807 std::size_t num_bytes_in_buf = 1;
808 // Find initial byte of surrogate
809 while (((*(from_end-num_bytes_in_buf) & 0b11000000) != 0b11000000)
810 && (num_bytes_in_buf < 4)
811 && (from_end-num_bytes_in_buf > from))
812 num_bytes_in_buf++;
813
814 // If the start of the surrogate is not in the buffer, we need to
815 // continue with the invalid UTF-8 sequence to avoid an infinite loop.
816 // Check if we found an initial byte and if there are enough bytes in the
817 // buffer to complete the surrogate.
818 if ((((*(from_end-num_bytes_in_buf) & 0b11100000) == 0b11000000)
819 && (num_bytes_in_buf < 2)) // incomplete 2-byte surrogate
820 || (((*(from_end-num_bytes_in_buf) & 0b11110000) == 0b11100000)
821 && (num_bytes_in_buf < 3)) // incomplete 3-byte surrogate
822 || (((*(from_end-num_bytes_in_buf) & 0b11111000) == 0b11110000)
823 && (num_bytes_in_buf < 4))) // incomplete 4-byte surrogate
824 pop_end = num_bytes_in_buf;
825 }
826 from_next = from_end - pop_end;
827
828 std::size_t srclen = (from_end-from-pop_end) * sizeof (InternT);
829 std::size_t length = (to_end-to) * sizeof (ExternT);
830 if (srclen < 1 || length < 1)
831 return std::codecvt<InternT, ExternT, StateT>::partial;
832
833 // Convert from UTF-8 to output encoding
834 const uint8_t *u8_str = reinterpret_cast<const uint8_t *> (from);
835 char *enc_str = octave_u8_conv_to_encoding (m_enc.c_str (), u8_str, srclen,
836 &length);
837
838 if (length < 1)
839 return std::codecvt<InternT, ExternT, StateT>::partial;
840
841 size_t max = (to_end - to) * sizeof (ExternT);
842 // FIXME: If the output encoding is a multibyte or variable byte encoding,
843 // we should ensure that we don't cut off a "partial" surrogate from
844 // the output.
845 // Can this ever happen?
846 if (length < max)
847 max = length;
848
849 // copy conversion result to output
850 std::copy_n (enc_str, max, to);
851 ::free (enc_str);
852
853 from_next = from + srclen;
854 to_next = to + max;
855
856 return ((pop_end > 0 || max < length)
857 ? std::codecvt<InternT, ExternT, StateT>::partial
858 : std::codecvt<InternT, ExternT, StateT>::ok);
859}
860
861typename std::codecvt<InternT, ExternT, StateT>::result
862octave::string::codecvt_u8::do_in
863 (StateT& /* state */,
864 const ExternT* from, const ExternT* from_end, const ExternT*& from_next,
865 InternT* to, InternT* to_end, InternT*& to_next) const
866{
867 // Convert from input encoding to UTF-8
868 std::size_t srclen = (from_end-from) * sizeof (ExternT);
869 std::size_t lengthp = (to_end-to) * sizeof (InternT);
870 const char *enc_str = reinterpret_cast<const char *> (from);
871 uint8_t *u8_str = octave_u8_conv_from_encoding (m_enc.c_str (),
872 enc_str, srclen, &lengthp);
873
874 std::size_t max = to_end - to;
875 if (lengthp < max)
876 max = lengthp;
877
878 // copy conversion result to output
879 std::copy_n (u8_str, max, to);
880 ::free (u8_str);
881
882 from_next = from + srclen;
883 to_next = to + max;
884
885 return std::codecvt<InternT, ExternT, StateT>::ok;
886}
887
888int octave::string::codecvt_u8::do_length
889 (StateT& /* state */, const ExternT *src, const ExternT *end,
890 std::size_t max) const
891{
892 // return number of external characters that produce MAX internal ones
893 std::size_t srclen = end-src;
894 OCTAVE_LOCAL_BUFFER (std::size_t, offsets, srclen);
895 std::size_t lengthp = max;
896 octave_u8_conv_from_encoding_offsets (m_enc.c_str (), src, srclen, offsets,
897 &lengthp);
898 std::size_t ext_char;
899 for (ext_char = 0; ext_char < srclen; ext_char++)
900 {
901 if (offsets[ext_char] != static_cast<size_t> (-1)
902 && offsets[ext_char] >= max)
903 break;
904 }
905
906 return ext_char;
907}
908
909
910template <typename T>
911std::string
913{
914 std::string s;
915
916 if (len <= 0)
917 len = 10;
918
919 static constexpr T out_of_range_top
920 = static_cast<T> (std::numeric_limits<int>::max ()) + 1.0;
921 static constexpr T out_of_range_bottom
922 = static_cast<T> (std::numeric_limits<int>::min ()) - 1.0;
923
924 if (octave::math::isinf (val))
925 {
926 if (val > 0)
927 s = "1/0";
928 else
929 s = "-1/0";
930 }
931 else if (octave::math::isnan (val))
932 s = "0/0";
933 else if (val <= out_of_range_bottom || val >= out_of_range_top
934 || octave::math::x_nint (val) == val)
935 {
936 std::ostringstream buf;
937 buf.flags (std::ios::fixed);
938 buf << std::setprecision (0) << octave::math::round (val);
939 s = buf.str ();
940 }
941 else
942 {
943 T lastn = 1;
944 T lastd = 0;
945 T n = octave::math::round (val);
946 T d = 1;
947 T frac = val - n;
948
949 std::ostringstream init_buf;
950 init_buf.flags (std::ios::fixed);
951 init_buf << std::setprecision (0) << static_cast<int> (n);
952 s = init_buf.str ();
953
954 while (true)
955 {
956 T flip = 1 / frac;
957 T step = octave::math::round (flip);
958 T nextn = n;
959 T nextd = d;
960
961 // Have we converged to 1/intmax ?
962 if (std::abs (flip) > out_of_range_top)
963 {
964 lastn = n;
965 lastd = d;
966 break;
967 }
968
969 frac = flip - step;
970 n = step * n + lastn;
971 d = step * d + lastd;
972 lastn = nextn;
973 lastd = nextd;
974
975 if (std::abs (n) >= out_of_range_top
976 || std::abs (d) >= out_of_range_top)
977 break;
978
979 std::ostringstream buf;
980 buf.flags (std::ios::fixed);
981 buf << std::setprecision (0) << static_cast<int> (n)
982 << '/' << static_cast<int> (d);
983
984 if (n < 0 && d < 0)
985 {
986 // Double negative, string can be two characters longer.
987 if (buf.str ().length () > static_cast<unsigned int> (len + 2))
988 break;
989 }
990 else
991 {
992 if (buf.str ().length () > static_cast<unsigned int> (len))
993 break;
994 }
995
996 s = buf.str ();
997 }
998
999 if (lastd < 0)
1000 {
1001 // Move negative sign from denominator to numerator
1002 lastd = - lastd;
1003 lastn = - lastn;
1004 std::ostringstream buf;
1005 buf.flags (std::ios::fixed);
1006 buf << std::setprecision (0) << static_cast<int> (lastn)
1007 << '/' << static_cast<int> (lastd);
1008 s = buf.str ();
1009 }
1010 }
1011
1012 return s;
1013}
1014
1015// instantiate the template for float and double
1016template OCTAVE_API std::string rational_approx <float> (float val, int len);
1017template OCTAVE_API std::string rational_approx <double> (double val, int len);
charNDArray max(char d, const charNDArray &m)
Definition chNDArray.cc:230
N Dimensional Array with copy-on-write semantics.
Definition Array.h:130
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition Array.h:507
bool isvector() const
Size of the specified dimension.
Definition Array.h:655
octave_idx_type rows() const
Definition Array.h:463
octave_idx_type numel() const
Number of elements in the array.
Definition Array.h:418
ColumnVector imag(const ComplexColumnVector &a)
void octave_iconvlist_wrapper(int(*do_one)(unsigned int namescount, const char *const *names, void *data), void *data)
const char * octave_iconv_canonicalize_wrapper(const char *name)
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition lo-error.c:41
#define octave_NA
Definition lo-ieee.h:43
F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE * d
#define OCTAVE_API
Definition main.in.cc:55
std::complex< double > Complex
Definition oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition oct-locbuf.h:44
template std::string rational_approx< double >(double val, int len)
template std::string rational_approx< float >(float val, int len)
octave::string::codecvt_u8::ExternT ExternT
#define INSTANTIATE_OCTAVE_STRING(T, API)
octave::string::codecvt_u8::InternT InternT
octave::string::codecvt_u8::StateT StateT
T::size_type numel(const T &str)
Definition oct-string.cc:74
bool sizes_cmp(const T &str_a, const T &str_b)
Definition oct-string.cc:95
T::size_type strlen(const typename T::value_type *str)
Definition oct-string.cc:88
std::string rational_approx(T val, int len)
@ U8_ISO_8859_1
Definition oct-string.h:155
@ U8_REPLACEMENT_CHAR
Definition oct-string.h:154
void free(void *)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding_offsets(const char *fromcode, const char *src, size_t srclen, size_t *offsets, size_t *lengthp)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition xerbla.cc:61