GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-string.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2016-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include "oct-string.h"
31 
32 #include <algorithm>
33 #include <cctype>
34 #include <cstring>
35 #include <iomanip>
36 #include <string>
37 #include <unordered_set>
38 
39 #include "Array.h"
40 #include "iconv-wrappers.h"
41 #include "lo-ieee.h"
42 #include "lo-mappers.h"
43 #include "oct-locbuf.h"
44 #include "uniconv-wrappers.h"
45 #include "unistr-wrappers.h"
46 #include "unwind-prot.h"
47 
48 template <typename T>
49 static bool
50 str_data_cmp (const typename T::value_type *a, const typename T::value_type *b,
51  const typename T::size_type n)
52 {
53  for (typename T::size_type i = 0; i < n; ++i)
54  if (a[i] != b[i])
55  return false;
56  return true;
57 }
58 
59 template <typename T>
60 static bool
61 str_data_cmpi (const typename T::value_type *a, const typename T::value_type *b,
62  const typename T::size_type n)
63 {
64  for (typename T::size_type i = 0; i < n; ++i)
65  if (std::tolower (a[i]) != std::tolower (b[i]))
66  return false;
67  return true;
68 }
69 
70 
71 // Templates to handle std::basic_string, std::vector, Array, and char*.
72 template <typename T>
73 typename T::size_type
74 numel (const T& str)
75 {
76  return str.size ();
77 }
78 
79 template <>
81 numel (const Array<char>& str)
82 {
83  return str.numel ();
84 }
85 
86 template <typename T>
87 typename T::size_type
88 strlen (const typename T::value_type *str)
89 {
90  return std::strlen (str);
91 }
92 
93 template <typename T>
94 bool
95 sizes_cmp (const T& str_a, const T& str_b)
96 {
97  return str_a.size () == str_b.size ();
98 }
99 
100 template <>
101 bool
102 sizes_cmp (const Array<char>& str_a, const Array<char>& str_b)
103 {
104  return str_a.dims () == str_b.dims ();
105 }
106 
107 template <typename T>
108 bool
109 sizes_cmp (const T& str_a, const typename T::value_type *str_b)
110 {
111  return str_a.size () == strlen<T> (str_b);
112 }
113 
114 template <>
115 bool
116 sizes_cmp (const Array<char>& str_a, const char *str_b)
117 {
118  return (str_a.isvector () && str_a.rows () == 1
119  && str_a.numel () == strlen<Array<char>> (str_b));
120 }
121 
122 
123 template<typename T>
124 bool
125 octave::string::strcmp (const T& str_a, const T& str_b)
126 {
127  return (sizes_cmp (str_a, str_b)
128  && str_data_cmp<T> (str_a.data (), str_b.data (), numel (str_a)));
129 }
130 
131 template<typename T>
132 bool
133 octave::string::strcmp (const T& str_a, const typename T::value_type *str_b)
134 {
135  return (sizes_cmp (str_a, str_b)
136  && str_data_cmp<T> (str_a.data (), str_b, numel (str_a)));
137 }
138 
139 
140 template<typename T>
141 bool
142 octave::string::strcmpi (const T& str_a, const T& str_b)
143 {
144  return (sizes_cmp (str_a, str_b)
145  && str_data_cmpi<T> (str_a.data (), str_b.data (), numel (str_a)));
146 }
147 
148 template<typename T>
149 bool
150 octave::string::strcmpi (const T& str_a, const typename T::value_type *str_b)
151 {
152  return (sizes_cmp (str_a, str_b)
153  && str_data_cmpi<T> (str_a.data (), str_b, numel (str_a)));
154 }
155 
156 
157 template<typename T>
158 bool
159 octave::string::strncmp (const T& str_a, const T& str_b,
160  const typename T::size_type n)
161 {
162  typename T::size_type neff;
163  auto len_a = numel (str_a);
164  auto len_b = numel (str_b);
165  neff = std::min (std::max (len_a, len_b), n);
166 
167  return (len_a >= neff && len_b >= neff
168  && str_data_cmp<T> (str_a.data (), str_b.data (), neff));
169 }
170 
171 template<typename T>
172 bool
173 octave::string::strncmp (const T& str_a, const typename T::value_type *str_b,
174  const typename T::size_type n)
175 {
176  typename T::size_type neff;
177  auto len_a = numel (str_a);
178  auto len_b = strlen<T> (str_b);
179  neff = std::min (std::max (len_a, len_b), n);
180 
181  return (len_a >= neff && len_b >= neff
182  && str_data_cmp<T> (str_a.data (), str_b, neff));
183 }
184 
185 
186 template<typename T>
187 bool
188 octave::string::strncmpi (const T& str_a, const T& str_b,
189  const typename T::size_type n)
190 {
191  typename T::size_type neff;
192  auto len_a = numel (str_a);
193  auto len_b = numel (str_b);
194  neff = std::min (std::max (len_a, len_b), n);
195 
196  return (len_a >= neff && len_b >= neff
197  && str_data_cmpi<T> (str_a.data (), str_b.data (), neff));
198 }
199 
200 template<typename T>
201 bool
202 octave::string::strncmpi (const T& str_a, const typename T::value_type *str_b,
203  const typename T::size_type n)
204 {
205  typename T::size_type neff;
206  auto len_a = numel (str_a);
207  auto len_b = strlen<T> (str_b);
208  neff = std::min (std::max (len_a, len_b), n);
209 
210  return (len_a >= neff && len_b >= neff
211  && str_data_cmpi<T> (str_a.data (), str_b, neff));
212 }
213 
214 
215 // Instantiations we need
216 #define INSTANTIATE_OCTAVE_STRING(T, API) \
217  template API bool octave::string::strcmp<T> (const T&, const T&); \
218  template API bool \
219  octave::string::strcmp<T> (const T&, const typename T::value_type*); \
220  template API bool octave::string::strcmpi<T> (const T&, const T&); \
221  template API bool \
222  octave::string::strcmpi<T> (const T&, const typename T::value_type*); \
223  template API bool \
224  octave::string::strncmp<T> (const T&, const T&, \
225  const typename T::size_type); \
226  template API bool \
227  octave::string::strncmp<T> (const T&, const typename T::value_type*, \
228  const typename T::size_type); \
229  template API bool \
230  octave::string::strncmpi<T> (const T&, const T&, \
231  const typename T::size_type n); \
232  template API bool \
233  octave::string::strncmpi<T> (const T&, const typename T::value_type*, \
234  const typename T::size_type);
235 
236 // We could also instantiate std::vector<char> but would it be
237 // useful for anyone?
240 
241 #undef INSTANTIATE_OCTAVE_STRING
242 
243 static inline bool
244 is_imag_unit (int c)
245 { return c == 'i' || c == 'j'; }
246 
247 static double
248 single_num (std::istringstream& is)
249 {
250  double num = 0.0;
251 
252  char c = is.peek ();
253 
254  // Skip spaces.
255  while (isspace (c))
256  {
257  is.get ();
258  c = is.peek ();
259  }
260 
261  if (std::toupper (c) == 'I')
262  {
263  // It's infinity.
264  is.get ();
265  char c1 = is.get ();
266  char c2 = is.get ();
267  if (std::tolower (c1) == 'n' && std::tolower (c2) == 'f')
268  {
270  is.peek (); // May set EOF bit.
271  }
272  else
273  is.setstate (std::ios::failbit); // indicate that read has failed.
274  }
275  else if (c == 'N')
276  {
277  // It's NA or NaN
278  is.get ();
279  char c1 = is.get ();
280  if (c1 == 'A')
281  {
282  num = octave_NA;
283  is.peek (); // May set EOF bit.
284  }
285  else
286  {
287  char c2 = is.get ();
288  if (c1 == 'a' && c2 == 'N')
289  {
291  is.peek (); // May set EOF bit.
292  }
293  else
294  is.setstate (std::ios::failbit); // indicate that read has failed.
295  }
296  }
297  else
298  is >> num;
299 
300  return num;
301 }
302 
303 static std::istringstream&
304 extract_num (std::istringstream& is, double& num, bool& imag, bool& have_sign)
305 {
306  have_sign = imag = false;
307 
308  char c = is.peek ();
309 
310  // Skip leading spaces.
311  while (isspace (c))
312  {
313  is.get ();
314  c = is.peek ();
315  }
316 
317  bool negative = false;
318 
319  // Accept leading sign.
320  if (c == '+' || c == '-')
321  {
322  have_sign = true;
323  negative = c == '-';
324  is.get ();
325  c = is.peek ();
326  }
327 
328  // Skip spaces after sign.
329  while (isspace (c))
330  {
331  is.get ();
332  c = is.peek ();
333  }
334 
335  // Imaginary number (i*num or just i), or maybe 'inf'.
336  if (c == 'i')
337  {
338  // possible infinity.
339  is.get ();
340  c = is.peek ();
341 
342  if (is.eof ())
343  {
344  // just 'i' and string is finished. Return immediately.
345  imag = true;
346  num = (negative ? -1.0 : 1.0);
347  return is;
348  }
349  else
350  {
351  if (std::tolower (c) != 'n')
352  imag = true;
353  is.unget ();
354  }
355  }
356  else if (c == 'j')
357  imag = true;
358 
359  // It's i*num or just i
360  if (imag)
361  {
362  is.get ();
363  c = is.peek ();
364  // Skip spaces after imaginary unit.
365  while (isspace (c))
366  {
367  is.get ();
368  c = is.peek ();
369  }
370 
371  if (c == '*')
372  {
373  // Multiplier follows, we extract it as a number.
374  is.get ();
375  num = single_num (is);
376  if (is.good ())
377  c = is.peek ();
378  }
379  else
380  num = 1.0;
381  }
382  else
383  {
384  // It's num, num*i, or numi.
385  num = single_num (is);
386  if (is.good ())
387  {
388  c = is.peek ();
389 
390  // Skip spaces after number.
391  while (isspace (c))
392  {
393  is.get ();
394  c = is.peek ();
395  }
396 
397  if (c == '*')
398  {
399  is.get ();
400  c = is.peek ();
401 
402  // Skip spaces after operator.
403  while (isspace (c))
404  {
405  is.get ();
406  c = is.peek ();
407  }
408 
409  if (is_imag_unit (c))
410  {
411  imag = true;
412  is.get ();
413  c = is.peek ();
414  }
415  else
416  is.setstate (std::ios::failbit); // indicate read has failed.
417  }
418  else if (is_imag_unit (c))
419  {
420  imag = true;
421  is.get ();
422  c = is.peek ();
423  }
424  }
425  }
426 
427  if (is.good ())
428  {
429  // Skip trailing spaces.
430  while (isspace (c))
431  {
432  is.get ();
433  c = is.peek ();
434  }
435  }
436 
437  if (negative)
438  num = -num;
439 
440  return is;
441 }
442 
443 static inline void
444 set_component (Complex& c, double num, bool imag)
445 {
446 #if defined (HAVE_CXX_COMPLEX_SETTERS)
447  if (imag)
448  c.imag (num);
449  else
450  c.real (num);
451 #elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
452  if (imag)
453  c.imag () = num;
454  else
455  c.real () = num;
456 #else
457  if (imag)
458  c = Complex (c.real (), num);
459  else
460  c = Complex (num, c.imag ());
461 #endif
462 }
463 
464 Complex
465 octave::string::str2double (const std::string& str_arg)
466 {
467  Complex val (0.0, 0.0);
468 
469  std::string str = str_arg;
470 
471  // FIXME: removing all commas doesn't allow actual parsing.
472  // Example: "1,23.45" is wrong, but passes Octave.
473  str.erase (std::remove (str.begin (), str.end(), ','), str.end ());
474  std::istringstream is (str);
475 
476  double num;
477  bool i1, i2, s1, s2;
478 
479  if (is.eof ())
481  else if (! extract_num (is, num, i1, s1))
483  else
484  {
485  set_component (val, num, i1);
486 
487  if (! is.eof ())
488  {
489  if (! extract_num (is, num, i2, s2) || i1 == i2 || ! s2)
491  else
492  set_component (val, num, i2);
493  }
494  }
495 
496  return val;
497 }
498 
499 std::string
500 octave::string::u8_to_encoding (const std::string& who,
501  const std::string& u8_string,
502  const std::string& encoding)
503 {
504  const uint8_t *src = reinterpret_cast<const uint8_t *>
505  (u8_string.c_str ());
506  std::size_t srclen = u8_string.length ();
507 
508  std::size_t length;
509  char *native_str = octave_u8_conv_to_encoding (encoding.c_str (), src,
510  srclen, &length);
511 
512  if (! native_str)
513  {
514  if (errno == ENOSYS)
515  (*current_liboctave_error_handler)
516  ("%s: iconv() is not supported. Installing GNU libiconv and then "
517  "re-compiling Octave could fix this.", who.c_str ());
518  else
520  ("%s: converting from UTF-8 to codepage '%s' failed: %s",
521  who.c_str (), encoding.c_str (), std::strerror (errno));
522  }
523 
524  octave::unwind_action free_native_str ([=] () { ::free (native_str); });
525 
526  std::string retval = std::string (native_str, length);
527 
528  return retval;
529 }
530 
531 std::string
532 octave::string::u8_from_encoding (const std::string& who,
533  const std::string& native_string,
534  const std::string& encoding)
535 {
536  const char *src = native_string.c_str ();
537  std::size_t srclen = native_string.length ();
538 
539  std::size_t length;
540  uint8_t *utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src,
541  srclen, &length);
542  if (! utf8_str)
543  {
544  if (errno == ENOSYS)
545  (*current_liboctave_error_handler)
546  ("%s: iconv() is not supported. Installing GNU libiconv and then "
547  "re-compiling Octave could fix this.", who.c_str ());
548  else
550  ("%s: converting from codepage '%s' to UTF-8 failed: %s",
551  who.c_str (), encoding.c_str (), std::strerror (errno));
552  }
553 
554  octave::unwind_action free_utf8_str ([=] () { ::free (utf8_str); });
555 
556  std::string retval = std::string (reinterpret_cast<char *> (utf8_str), length);
557 
558  return retval;
559 }
560 
561 unsigned int
562 octave::string::u8_validate (const std::string& who,
563  std::string& in_str,
565 {
566  std::string out_str;
567 
568  unsigned int num_replacements = 0;
569  const char *in_chr = in_str.c_str ();
570  const char *inv_utf8 = in_chr;
571  const char *const in_end = in_chr + in_str.length ();
572  while (inv_utf8 && in_chr < in_end)
573  {
574  inv_utf8 = reinterpret_cast<const char *>
575  (octave_u8_check_wrapper (reinterpret_cast<const uint8_t *> (in_chr),
576  in_end - in_chr));
577 
578  if (inv_utf8 == nullptr)
579  out_str.append (in_chr, in_end - in_chr);
580  else
581  {
582  num_replacements++;
583  out_str.append (in_chr, inv_utf8 - in_chr);
584  in_chr = inv_utf8 + 1;
585 
586  if (type == U8_REPLACEMENT_CHAR)
587  out_str.append ("\xef\xbf\xbd");
588  else if (type == U8_ISO_8859_1)
589  {
590  std::string fallback = "iso-8859-1";
591  std::size_t lengthp;
592  uint8_t *val_utf8 = octave_u8_conv_from_encoding
593  (fallback.c_str (), inv_utf8, 1, &lengthp);
594 
595  if (! val_utf8)
596  (*current_liboctave_error_handler)
597  ("%s: converting from codepage '%s' to UTF-8 failed: %s",
598  who.c_str (), fallback.c_str (), std::strerror (errno));
599 
600  octave::unwind_action free_val_utf8
601  ([=] () { ::free (val_utf8); });
602 
603  out_str.append (reinterpret_cast<const char *> (val_utf8),
604  lengthp);
605  }
606  }
607  }
608 
609  in_str = out_str;
610  return num_replacements;
611 }
612 
613 std::string
614 octave::string::u16_to_encoding (const std::string& who,
615  const std::u16string& u16_string,
616  const std::string& encoding)
617 {
618  const uint16_t *src = reinterpret_cast<const uint16_t *>
619  (u16_string.c_str ());
620  std::size_t srclen = u16_string.length ();
621 
622  std::size_t length;
623  char *native_str = octave_u16_conv_to_encoding (encoding.c_str (), src,
624  srclen, &length);
625 
626  if (! native_str)
627  {
628  if (errno == ENOSYS)
629  (*current_liboctave_error_handler)
630  ("%s: iconv() is not supported. Installing GNU libiconv and then "
631  "re-compiling Octave could fix this.", who.c_str ());
632  else
634  ("%s: converting from UTF-16 to codepage '%s' failed: %s",
635  who.c_str (), encoding.c_str (), std::strerror (errno));
636  }
637 
638  octave::unwind_action free_native_str ([=] () { ::free (native_str); });
639 
640  std::string retval = std::string (native_str, length);
641 
642  return retval;
643 }
644 
645 std::vector<std::string>
647 {
648  static std::vector<std::string> encoding_list;
649 
650  if (encoding_list.empty ())
651  {
652 #if defined (HAVE_ICONVLIST)
653  // get number of supported encodings
654  std::size_t count = 0;
656  [] (unsigned int num, const char * const *, void *data) -> int
657  {
658  std::size_t *count_ptr = static_cast<std::size_t *> (data);
659  *count_ptr = num;
660  return 0;
661  },
662  &count);
663 
664  if (count == static_cast<size_t> (-1))
665  {
666  encoding_list.push_back ("UTF-8");
667  return encoding_list;
668  }
669 
670 # if defined (HAVE_ICONV_CANONICALIZE)
671  // use unordered_set to skip canonicalized aliases
672  std::unordered_set<std::string> encoding_set;
673  encoding_set.reserve (count);
674 
675  // populate vector with name of encodings
677  [] (unsigned int num, const char * const *names, void *data) -> int
678  {
679  std::unordered_set<std::string> *encoding_set_ptr
680  = static_cast<std::unordered_set<std::string> *> (data);
681  for (std::size_t i = 0; i < num; i++)
682  {
683  const char *canonicalized_enc
685  encoding_set_ptr->insert (canonicalized_enc);
686  }
687  return 0;
688  },
689  &encoding_set);
690 
691  encoding_list.assign (encoding_set.begin (), encoding_set.end ());
692 # endif
693 
694 #else
695  // Use hardcoded list of encodings as a fallback for platforms without
696  // iconvlist (or another way of programmatically querrying a list of
697  // supported encodings).
698  // This list is inspired by the encodings supported by Geany.
699  encoding_list
700  = {"ISO-8859-1",
701  "ISO-8859-2",
702  "ISO-8859-3",
703  "ISO-8859-4",
704  "ISO-8859-5",
705  "ISO-8859-6",
706  "ISO-8859-7",
707  "ISO-8859-8",
708  "ISO-8859-9",
709  "ISO-8859-10",
710  "ISO-8859-13",
711  "ISO-8859-14",
712  "ISO-8859-15",
713  "ISO-8859-16",
714 
715  "UTF-7",
716  "UTF-8",
717  "UTF-16LE",
718  "UTF-16BE",
719  "UTF-32LE",
720  "UTF-32BE",
721  "UCS-2LE",
722  "UCS-2BE",
723 
724  "ARMSCII-8",
725  "BIG5",
726  "BIG5-HKSCS",
727  "CP866",
728 
729  "EUC-JP",
730  "EUC-KR",
731  "EUC-TW",
732 
733  "GB18030",
734  "GB_2312-80",
735  "GBK",
736  "HZ",
737 
738  "IBM850",
739  "IBM852",
740  "IBM855",
741  "IBM857",
742  "IBM862",
743  "IBM864",
744 
745  "ISO-2022-JP",
746  "ISO-2022-KR",
747  "JOHAB",
748  "KOI8-R",
749  "KOI8-U",
750 
751  "SHIFT_JIS",
752  "TCVN",
753  "TIS-620",
754  "UHC",
755  "VISCII",
756 
757  "CP1250",
758  "CP1251",
759  "CP1252",
760  "CP1253",
761  "CP1254",
762  "CP1255",
763  "CP1256",
764  "CP1257",
765  "CP1258",
766 
767  "CP932"
768  };
769 
770  // FIXME: Should we check whether those are actually valid encoding
771  // identifiers?
772 #endif
773 
774  // sort list of encodings
775  std::sort (encoding_list.begin (), encoding_list.end ());
776  }
777 
778  return encoding_list;
779 }
780 
784 
785 typename std::codecvt<InternT, ExternT, StateT>::result
786 octave::string::codecvt_u8::do_out
787  (StateT& /* state */,
788  const InternT* from, const InternT* from_end, const InternT*& from_next,
789  ExternT* to, ExternT* to_end, ExternT*& to_next) const
790 {
791  to_next = to;
792  if (from_end <= from)
793  {
794  from_next = from_end;
795  return std::codecvt<InternT, ExternT, StateT>::noconv;
796  }
797 
798  // Check if buffer ends in a complete UTF-8 surrogate.
799  // FIXME: If this is the last call before a stream is closed, we should
800  // convert trailing bytes even if they look incomplete.
801  // How can we detect that?
802  std::size_t pop_end = 0;
803  if ((*(from_end-1) & 0b10000000) == 0b10000000)
804  {
805  // The last byte is part of a surrogate. Check if it is complete.
806 
807  // number of bytes of the surrogate in the buffer
808  std::size_t num_bytes_in_buf = 1;
809  // Find initial byte of surrogate
810  while (((*(from_end-num_bytes_in_buf) & 0b11000000) != 0b11000000)
811  && (num_bytes_in_buf < 4)
812  && (from_end-num_bytes_in_buf > from))
813  num_bytes_in_buf++;
814 
815  // If the start of the surrogate is not in the buffer, we need to
816  // continue with the invalid UTF-8 sequence to avoid an infinite loop.
817  // Check if we found an initial byte and if there are enough bytes in the
818  // buffer to complete the surrogate.
819  if ((((*(from_end-num_bytes_in_buf) & 0b11100000) == 0b11000000)
820  && (num_bytes_in_buf < 2)) // incomplete 2-byte surrogate
821  || (((*(from_end-num_bytes_in_buf) & 0b11110000) == 0b11100000)
822  && (num_bytes_in_buf < 3)) // incomplete 3-byte surrogate
823  || (((*(from_end-num_bytes_in_buf) & 0b11111000) == 0b11110000)
824  && (num_bytes_in_buf < 4))) // incomplete 4-byte surrogate
825  pop_end = num_bytes_in_buf;
826  }
827  from_next = from_end - pop_end;
828 
829  std::size_t srclen = (from_end-from-pop_end) * sizeof (InternT);
830  std::size_t length = (to_end-to) * sizeof (ExternT);
831  if (srclen < 1 || length < 1)
832  return std::codecvt<InternT, ExternT, StateT>::partial;
833 
834  // Convert from UTF-8 to output encoding
835  const uint8_t *u8_str = reinterpret_cast<const uint8_t *> (from);
836  char *enc_str = octave_u8_conv_to_encoding (m_enc.c_str (), u8_str, srclen,
837  &length);
838 
839  if (length < 1)
840  return std::codecvt<InternT, ExternT, StateT>::partial;
841 
842  size_t max = (to_end - to) * sizeof (ExternT);
843  // FIXME: If the output encoding is a multibyte or variable byte encoding,
844  // we should ensure that we don't cut off a "partial" surrogate from
845  // the output.
846  // Can this ever happen?
847  if (length < max)
848  max = length;
849 
850  // copy conversion result to output
851  std::copy_n (enc_str, max, to);
852  ::free (enc_str);
853 
854  from_next = from + srclen;
855  to_next = to + max;
856 
857  return ((pop_end > 0 || max < length)
858  ? std::codecvt<InternT, ExternT, StateT>::partial
859  : std::codecvt<InternT, ExternT, StateT>::ok);
860 }
861 
862 typename std::codecvt<InternT, ExternT, StateT>::result
863 octave::string::codecvt_u8::do_in
864  (StateT& /* state */,
865  const ExternT* from, const ExternT* from_end, const ExternT*& from_next,
866  InternT* to, InternT* to_end, InternT*& to_next) const
867 {
868  // Convert from input encoding to UTF-8
869  std::size_t srclen = (from_end-from) * sizeof (ExternT);
870  std::size_t lengthp = (to_end-to) * sizeof (InternT);
871  const char *enc_str = reinterpret_cast<const char *> (from);
872  uint8_t *u8_str = octave_u8_conv_from_encoding (m_enc.c_str (),
873  enc_str, srclen, &lengthp);
874 
875  std::size_t max = to_end - to;
876  if (lengthp < max)
877  max = lengthp;
878 
879  // copy conversion result to output
880  std::copy_n (u8_str, max, to);
881  ::free (u8_str);
882 
883  from_next = from + srclen;
884  to_next = to + max;
885 
886  return std::codecvt<InternT, ExternT, StateT>::ok;
887 }
888 
889 int octave::string::codecvt_u8::do_length
890  (StateT& /* state */, const ExternT *src, const ExternT *end,
891  std::size_t max) const
892 {
893  // return number of external characters that produce MAX internal ones
894  std::size_t srclen = end-src;
895  OCTAVE_LOCAL_BUFFER (std::size_t, offsets, srclen);
896  std::size_t lengthp = max;
897  octave_u8_conv_from_encoding_offsets (m_enc.c_str (), src, srclen, offsets,
898  &lengthp);
899  std::size_t ext_char;
900  for (ext_char = 0; ext_char < srclen; ext_char++)
901  {
902  if (offsets[ext_char] != static_cast<size_t> (-1)
903  && offsets[ext_char] >= max)
904  break;
905  }
906 
907  return ext_char;
908 }
909 
910 
911 template <typename T>
912 std::string
913 rational_approx (T val, int len)
914 {
915  std::string s;
916 
917  if (len <= 0)
918  len = 10;
919 
920  static const T out_of_range_top
921  = static_cast<T> (std::numeric_limits<int>::max ()) + 1.;
922  static const T out_of_range_bottom
923  = static_cast<T> (std::numeric_limits<int>::min ()) - 1.;
924  if (octave::math::isinf (val))
925  {
926  if (val > 0)
927  s = "1/0";
928  else
929  s = "-1/0";
930  }
931  else if (octave::math::isnan (val))
932  s = "0/0";
933  else if (val <= out_of_range_bottom || val >= out_of_range_top
934  || octave::math::x_nint (val) == val)
935  {
936  std::ostringstream buf;
937  buf.flags (std::ios::fixed);
938  buf << std::setprecision (0) << octave::math::round (val);
939  s = buf.str ();
940  }
941  else
942  {
943  T lastn = 1;
944  T lastd = 0;
945  T n = octave::math::round (val);
946  T d = 1;
947  T frac = val - n;
948 
949  std::ostringstream init_buf;
950  init_buf.flags (std::ios::fixed);
951  init_buf << std::setprecision (0) << static_cast<int> (n);
952  s = init_buf.str ();
953 
954  while (true)
955  {
956  T flip = 1 / frac;
957  T step = octave::math::round (flip);
958  T nextn = n;
959  T nextd = d;
960 
961  // Have we converged to 1/intmax ?
962  if (std::abs (flip) > out_of_range_top)
963  {
964  lastn = n;
965  lastd = d;
966  break;
967  }
968 
969  frac = flip - step;
970  n = step * n + lastn;
971  d = step * d + lastd;
972  lastn = nextn;
973  lastd = nextd;
974 
975  std::ostringstream buf;
976  buf.flags (std::ios::fixed);
977  buf << std::setprecision (0) << static_cast<int> (n)
978  << '/' << static_cast<int> (d);
979 
980  if (n < 0 && d < 0)
981  {
982  // Double negative, string can be two characters longer.
983  if (buf.str ().length () > static_cast<unsigned int> (len + 2))
984  break;
985  }
986  else
987  {
988  if (buf.str ().length () > static_cast<unsigned int> (len))
989  break;
990  }
991 
992  if (std::abs (n) >= out_of_range_top
993  || std::abs (d) >= out_of_range_top)
994  break;
995 
996  s = buf.str ();
997  }
998 
999  if (lastd < 0)
1000  {
1001  // Move negative sign from denominator to numerator
1002  lastd = - lastd;
1003  lastn = - lastn;
1004  std::ostringstream buf;
1005  buf.flags (std::ios::fixed);
1006  buf << std::setprecision (0) << static_cast<int> (lastn)
1007  << '/' << static_cast<int> (lastd);
1008  s = buf.str ();
1009  }
1010  }
1011 
1012  return s;
1013 }
1014 
1015 // instantiate the template for float and double
1016 template OCTAVE_API std::string rational_approx <float> (float val, int len);
1017 template OCTAVE_API std::string rational_approx <double> (double val, int len);
#define Inf
Definition: Faddeeva.cc:260
#define NaN
Definition: Faddeeva.cc:261
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
bool isvector() const
Size of the specified dimension.
Definition: Array.h:654
octave_idx_type rows() const
Definition: Array.h:459
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:503
octave_idx_type numel() const
Number of elements in the array.
Definition: Array.h:414
ColumnVector imag(const ComplexColumnVector &a)
Definition: dColVector.cc:143
void octave_iconvlist_wrapper(int(*do_one)(unsigned int namescount, const char *const *names, void *data), void *data)
const char * octave_iconv_canonicalize_wrapper(const char *name)
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition: lo-error.c:41
#define octave_NA
Definition: lo-ieee.h:43
bool isinf(double x)
Definition: lo-mappers.h:203
double round(double x)
Definition: lo-mappers.h:136
bool isnan(bool)
Definition: lo-mappers.h:178
T x_nint(T x)
Definition: lo-mappers.h:269
F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE * d
#define OCTAVE_API
Definition: main.cc:55
octave_idx_type n
Definition: mx-inlines.cc:761
std::complex< double > Complex
Definition: oct-cmplx.h:33
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:44
template std::string rational_approx< double >(double val, int len)
template std::string rational_approx< float >(float val, int len)
octave::string::codecvt_u8::ExternT ExternT
Definition: oct-string.cc:782
#define INSTANTIATE_OCTAVE_STRING(T, API)
Definition: oct-string.cc:216
octave::string::codecvt_u8::InternT InternT
Definition: oct-string.cc:781
octave::string::codecvt_u8::StateT StateT
Definition: oct-string.cc:783
T::size_type numel(const T &str)
Definition: oct-string.cc:74
bool sizes_cmp(const T &str_a, const T &str_b)
Definition: oct-string.cc:95
T::size_type strlen(const typename T::value_type *str)
Definition: oct-string.cc:88
std::string rational_approx(T val, int len)
Definition: oct-string.cc:913
bool strncmp(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same.
u8_fallback_type
Definition: oct-string.h:153
@ U8_ISO_8859_1
Definition: oct-string.h:155
@ U8_REPLACEMENT_CHAR
Definition: oct-string.h:154
std::string u8_to_encoding(const std::string &who, const std::string &u8_string, const std::string &encoding)
bool strcmp(const T &str_a, const T &str_b)
Octave string utility functions.
std::vector< std::string > get_encoding_list()
Complex str2double(const std::string &str_arg)
bool strcmpi(const T &str_a, const T &str_b)
True if strings are the same, ignoring case.
std::string u16_to_encoding(const std::string &who, const std::u16string &u16_string, const std::string &encoding)
unsigned int u8_validate(const std::string &who, std::string &in_string, const u8_fallback_type type=U8_REPLACEMENT_CHAR)
bool strncmpi(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same, ignoring case.
std::string u8_from_encoding(const std::string &who, const std::string &native_string, const std::string &encoding)
void free(void *)
uint8_t * octave_u8_conv_from_encoding_offsets(const char *fromcode, const char *src, size_t srclen, size_t *offsets, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61