GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
strfns.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 1994-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include <cctype>
31 
32 #include <queue>
33 #include <sstream>
34 
35 #include "dMatrix.h"
36 #include "localcharset-wrapper.h"
37 #include "uniconv-wrappers.h"
38 #include "unistr-wrappers.h"
39 
40 #include "Cell.h"
41 #include "defun.h"
42 #include "error.h"
43 #include "errwarn.h"
44 #include "ov.h"
45 #include "ovl.h"
46 #include "unwind-prot.h"
47 #include "utils.h"
48 
49 #include "oct-string.h"
50 
52 
53 DEFUN (char, args, ,
54  doc: /* -*- texinfo -*-
55 @deftypefn {} {@var{C} =} char (@var{A})
56 @deftypefnx {} {@var{C} =} char (@var{A}, @dots{})
57 @deftypefnx {} {@var{C} =} char (@var{str1}, @var{str2}, @dots{})
58 @deftypefnx {} {@var{C} =} char (@var{cell_array})
59 Create a string array from one or more numeric matrices, character
60 matrices, or cell arrays.
61 
62 Arguments are concatenated vertically. The returned values are padded with
63 blanks as needed to make each row of the string array have the same length.
64 Empty input strings are significant and will concatenated in the output.
65 
66 For numerical input, each element is converted to the corresponding ASCII
67 character. A range error results if an input is outside the ASCII range
68 (0-255).
69 
70 For cell arrays, each element is concatenated separately. Cell arrays
71 converted through @code{char} can mostly be converted back with
72 @code{cellstr}. For example:
73 
74 @example
75 @group
76 char ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
77  @result{} ["abc "
78  " "
79  "98 "
80  "99 "
81  "d "
82  "str1"
83  "half"]
84 @end group
85 @end example
86 @seealso{strvcat, cellstr}
87 @end deftypefn */)
88 {
89  octave_value retval;
90 
91  int nargin = args.length ();
92 
93  if (nargin == 0)
94  retval = "";
95  else if (nargin == 1)
96  retval = args(0).convert_to_str (true, true,
97  args(0).is_dq_string () ? '"' : '\'');
98  else
99  {
100  int n_elts = 0;
101 
102  int max_len = 0;
103 
104  std::queue<string_vector> args_as_strings;
105 
106  for (int i = 0; i < nargin; i++)
107  {
108  string_vector s = args(i).xstring_vector_value ("char: unable to convert some args to strings");
109 
110  if (s.numel () > 0)
111  n_elts += s.numel ();
112  else
113  n_elts += 1;
114 
115  int s_max_len = s.max_length ();
116 
117  if (s_max_len > max_len)
118  max_len = s_max_len;
119 
120  args_as_strings.push (s);
121  }
122 
123  string_vector result (n_elts);
124 
125  int k = 0;
126 
127  for (int i = 0; i < nargin; i++)
128  {
129  string_vector s = args_as_strings.front ();
130  args_as_strings.pop ();
131 
132  int n = s.numel ();
133 
134  if (n > 0)
135  {
136  for (int j = 0; j < n; j++)
137  {
138  std::string t = s[j];
139  int t_len = t.length ();
140 
141  if (max_len > t_len)
142  t += std::string (max_len - t_len, ' ');
143 
144  result[k++] = t;
145  }
146  }
147  else
148  result[k++] = std::string (max_len, ' ');
149  }
150 
151  retval = octave_value (result, '\'');
152  }
153 
154  return retval;
155 }
156 
157 /*
158 %!assert (char (), '')
159 %!assert (char (100), "d")
160 %!assert (char (100,100), ["d";"d"])
161 %!assert (char ({100,100}), ["d";"d"])
162 %!assert (char ([100,100]), ["dd"])
163 %!assert (char ({100,{100}}), ["d";"d"])
164 %!assert (char (100, [], 100), ["d";" ";"d"])
165 %!assert (char ({100, [], 100}), ["d";" ";"d"])
166 %!assert (char ({100,{100, {""}}}), ["d";"d";" "])
167 %!assert (char (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
168 %!assert (char ("a", "bb", "ccc"), ["a "; "bb "; "ccc"])
169 %!assert (char ([65, 83, 67, 73, 73]), "ASCII")
170 
171 %!test
172 %! x = char ("foo", "bar", "foobar");
173 %! assert (x(1,:), "foo ");
174 %! assert (x(2,:), "bar ");
175 %! assert (x(3,:), "foobar");
176 */
177 
178 DEFUN (strvcat, args, ,
179  doc: /* -*- texinfo -*-
180 @deftypefn {} {@var{C} =} strvcat (@var{A})
181 @deftypefnx {} {@var{C} =} strvcat (@var{A}, @dots{})
182 @deftypefnx {} {@var{C} =} strvcat (@var{str1}, @var{str2}, @dots{})
183 @deftypefnx {} {@var{C} =} strvcat (@var{cell_array})
184 Create a character array from one or more numeric matrices, character
185 matrices, or cell arrays.
186 
187 Arguments are concatenated vertically. The returned values are padded with
188 blanks as needed to make each row of the string array have the same length.
189 Unlike @code{char}, empty strings are removed and will not appear in the
190 output.
191 
192 For numerical input, each element is converted to the corresponding ASCII
193 character. A range error results if an input is outside the ASCII range
194 (0-255).
195 
196 For cell arrays, each element is concatenated separately. Cell arrays
197 converted through @code{strvcat} can mostly be converted back with
198 @code{cellstr}. For example:
199 
200 @example
201 @group
202 strvcat ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
203  @result{} ["abc "
204  "98 "
205  "99 "
206  "d "
207  "str1"
208  "half"]
209 @end group
210 @end example
211 @seealso{char, strcat, cstrcat}
212 @end deftypefn */)
213 {
214  int nargin = args.length ();
215  int n_elts = 0;
216  std::size_t max_len = 0;
217  std::queue<string_vector> args_as_strings;
218 
219  for (int i = 0; i < nargin; i++)
220  {
221  string_vector s = args(i).xstring_vector_value ("strvcat: unable to convert some args to strings");
222 
223  std::size_t n = s.numel ();
224 
225  // do not count empty strings in calculation of number of elements
226  if (n > 0)
227  {
228  for (std::size_t j = 0; j < n; j++)
229  {
230  if (! s[j].empty ())
231  n_elts++;
232  }
233  }
234 
235  std::size_t s_max_len = s.max_length ();
236 
237  if (s_max_len > max_len)
238  max_len = s_max_len;
239 
240  args_as_strings.push (s);
241  }
242 
243  string_vector result (n_elts);
244 
245  octave_idx_type k = 0;
246 
247  for (int i = 0; i < nargin; i++)
248  {
249  string_vector s = args_as_strings.front ();
250  args_as_strings.pop ();
251 
252  std::size_t n = s.numel ();
253 
254  if (n > 0)
255  {
256  for (std::size_t j = 0; j < n; j++)
257  {
258  std::string t = s[j];
259  if (t.length () > 0)
260  {
261  std::size_t t_len = t.length ();
262 
263  if (max_len > t_len)
264  t += std::string (max_len - t_len, ' ');
265 
266  result[k++] = t;
267  }
268  }
269  }
270  }
271 
272  // Cannot use ovl. Relies on overloaded octave_value call.
273  return octave_value (result, '\'');
274 }
275 
276 /*
277 %!assert (strvcat (""), "")
278 %!assert (strvcat (100) == "d")
279 %!assert (strvcat (100,100), ["d";"d"])
280 %!assert (strvcat ({100,100}), ["d";"d"])
281 %!assert (strvcat ([100,100]), ["dd"])
282 %!assert (strvcat ({100,{100}}), ["d";"d"])
283 %!assert (strvcat (100, [], 100), ["d";"d"])
284 %!assert (strvcat ({100, [], 100}), ["d";"d"])
285 %!assert (strvcat ({100,{100, {""}}}), ["d";"d"])
286 %!assert (strvcat (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
287 %!assert (strvcat ("a", "bb", "ccc"), ["a "; "bb "; "ccc"])
288 %!assert (strvcat (), "")
289 */
290 
291 DEFUN (ischar, args, ,
292  doc: /* -*- texinfo -*-
293 @deftypefn {} {@var{tf} =} ischar (@var{x})
294 Return true if @var{x} is a character array.
295 @seealso{isfloat, isinteger, islogical, isnumeric, isstring, iscellstr, isa}
296 @end deftypefn */)
297 {
298  if (args.length () != 1)
299  print_usage ();
300 
301  return ovl (args(0).is_string ());
302 }
303 
304 /*
305 %!assert (ischar ("a"), true)
306 %!assert (ischar (["ab";"cd"]), true)
307 %!assert (ischar ({"ab"}), false)
308 %!assert (ischar (1), false)
309 %!assert (ischar ([1, 2]), false)
310 %!assert (ischar ([]), false)
311 %!assert (ischar ([1, 2; 3, 4]), false)
312 %!assert (ischar (""), true)
313 %!assert (ischar ("test"), true)
314 %!assert (ischar (["test"; "ing"]), true)
315 %!assert (ischar (struct ("foo", "bar")), false)
316 
317 %!error ischar ()
318 %!error ischar ("test", 1)
319 */
320 
321 static octave_value
322 do_strcmp_fcn (const octave_value& arg0, const octave_value& arg1,
323  octave_idx_type n, const char *fcn_name,
324  bool (*array_op) (const Array<char>&, const Array<char>&,
326  bool (*str_op) (const std::string&, const std::string&,
327  std::string::size_type))
328 
329 {
330  octave_value retval;
331 
332  bool s1_string = arg0.is_string ();
333  bool s1_cell = arg0.iscell ();
334  bool s2_string = arg1.is_string ();
335  bool s2_cell = arg1.iscell ();
336 
337  if (s1_string && s2_string)
338  retval = array_op (arg0.char_array_value (), arg1.char_array_value (), n);
339  else if ((s1_string && s2_cell) || (s1_cell && s2_string))
340  {
341  octave_value str_val, cell_val;
342 
343  if (s1_string)
344  {
345  str_val = arg0;
346  cell_val = arg1;
347  }
348  else
349  {
350  str_val = arg1;
351  cell_val = arg0;
352  }
353 
354  const Cell cell = cell_val.cell_value ();
355  const string_vector str = str_val.string_vector_value ();
356  octave_idx_type r = str.numel ();
357 
358  if (r == 0 || r == 1)
359  {
360  // Broadcast the string.
361 
362  boolNDArray output (cell_val.dims (), false);
363 
364  std::string s = (r == 0 ? "" : str[0]);
365 
366  if (cell_val.iscellstr ())
367  {
368  const Array<std::string> cellstr = cell_val.cellstr_value ();
369  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
370  output(i) = str_op (cellstr(i), s, n);
371  }
372  else
373  {
374  // FIXME: should we warn here?
375  for (octave_idx_type i = 0; i < cell.numel (); i++)
376  {
377  if (cell(i).is_string ())
378  output(i) = str_op (cell(i).string_value (), s, n);
379  }
380  }
381 
382  retval = output;
383  }
384  else if (r > 1)
385  {
386  if (cell.numel () == 1)
387  {
388  // Broadcast the cell.
389 
390  const dim_vector dv (r, 1);
391  boolNDArray output (dv, false);
392 
393  if (cell(0).is_string ())
394  {
395  const std::string str2 = cell(0).string_value ();
396 
397  for (octave_idx_type i = 0; i < r; i++)
398  output(i) = str_op (str[i], str2, n);
399  }
400 
401  retval = output;
402  }
403  else
404  {
405  // Must match in all dimensions.
406 
407  boolNDArray output (cell.dims (), false);
408 
409  if (cell.numel () == r)
410  {
411  if (cell_val.iscellstr ())
412  {
413  const Array<std::string> cellstr
414  = cell_val.cellstr_value ();
415  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
416  output(i) = str_op (str[i], cellstr(i), n);
417  }
418  else
419  {
420  // FIXME: should we warn here?
421  for (octave_idx_type i = 0; i < r; i++)
422  {
423  if (cell(i).is_string ())
424  output(i) = str_op (str[i],
425  cell(i).string_value (), n);
426  }
427  }
428 
429  retval = output;
430  }
431  else
432  retval = false;
433  }
434  }
435  }
436  else if (s1_cell && s2_cell)
437  {
438  octave_value cell1_val, cell2_val;
439  octave_idx_type r1 = arg0.numel (), r2;
440 
441  if (r1 == 1)
442  {
443  // Make the singleton cell2.
444 
445  cell1_val = arg1;
446  cell2_val = arg0;
447  }
448  else
449  {
450  cell1_val = arg0;
451  cell2_val = arg1;
452  }
453 
454  const Cell cell1 = cell1_val.cell_value ();
455  const Cell cell2 = cell2_val.cell_value ();
456  r1 = cell1.numel ();
457  r2 = cell2.numel ();
458 
459  const dim_vector size1 = cell1.dims ();
460  const dim_vector size2 = cell2.dims ();
461 
462  boolNDArray output (size1, false);
463 
464  if (r2 == 1)
465  {
466  // Broadcast cell2.
467 
468  if (cell2(0).is_string ())
469  {
470  const std::string str2 = cell2(0).string_value ();
471 
472  if (cell1_val.iscellstr ())
473  {
474  const Array<std::string> cellstr = cell1_val.cellstr_value ();
475  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
476  output(i) = str_op (cellstr(i), str2, n);
477  }
478  else
479  {
480  // FIXME: should we warn here?
481  for (octave_idx_type i = 0; i < r1; i++)
482  {
483  if (cell1(i).is_string ())
484  {
485  const std::string str1 = cell1(i).string_value ();
486  output(i) = str_op (str1, str2, n);
487  }
488  }
489  }
490  }
491  }
492  else
493  {
494  if (size1 != size2)
495  error ("%s: nonconformant cell arrays", fcn_name);
496 
497  if (cell1.iscellstr () && cell2.iscellstr ())
498  {
499  const Array<std::string> cellstr1 = cell1_val.cellstr_value ();
500  const Array<std::string> cellstr2 = cell2_val.cellstr_value ();
501  for (octave_idx_type i = 0; i < r1; i++)
502  output (i) = str_op (cellstr1(i), cellstr2(i), n);
503  }
504  else
505  {
506  // FIXME: should we warn here?
507  for (octave_idx_type i = 0; i < r1; i++)
508  {
509  if (cell1(i).is_string () && cell2(i).is_string ())
510  {
511  const std::string str1 = cell1(i).string_value ();
512  const std::string str2 = cell2(i).string_value ();
513  output(i) = str_op (str1, str2, n);
514  }
515  }
516  }
517  }
518 
519  retval = output;
520  }
521  else
522  retval = false;
523 
524  return retval;
525 }
526 
527 
528 // These are required so that they match the same signature as strncmp
529 // and strncmpi and can therefore be used in do_strcmp_fcn.
530 
531 template <typename T, typename T_size_type>
532 static bool
533 strcmp_ignore_n (const T& s1, const T& s2, T_size_type)
534 { return string::strcmp (s1, s2); }
535 
536 template <typename T, typename T_size_type>
537 static bool
538 strcmpi_ignore_n (const T& s1, const T& s2, T_size_type)
539 { return string::strcmpi (s1, s2); }
540 
541 
542 DEFUN (strcmp, args, ,
543  doc: /* -*- texinfo -*-
544 @deftypefn {} {@var{tf} =} strcmp (@var{str1}, @var{str2})
545 Return 1 if the character strings @var{str1} and @var{str2} are the same,
546 and 0 otherwise.
547 
548 If either @var{str1} or @var{str2} is a cell array of strings, then an array
549 of the same size is returned, containing the values described above for
550 every member of the cell array. The other argument may also be a cell
551 array of strings (of the same size or with only one element), char matrix
552 or character string.
553 
554 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
555 function returns 1 if the character strings are equal, and 0 otherwise.
556 This is just the opposite of the corresponding C library function.
557 @seealso{strcmpi, strncmp, strncmpi}
558 @end deftypefn */)
559 {
560  if (args.length () != 2)
561  print_usage ();
562 
563  return ovl (do_strcmp_fcn (args(0), args(1), 0, "strcmp",
564  strcmp_ignore_n, strcmp_ignore_n));
565 }
566 
567 /*
568 %!shared x
569 %! x = char (zeros (0, 2));
570 %!assert (strcmp ("", x), false)
571 %!assert (strcmp (x, ""), false)
572 %!assert (strcmp (x, x), true)
573 ## %!assert (strcmp ({""}, x), true)
574 ## %!assert (strcmp ({x}, ""), false)
575 ## %!assert (strcmp ({x}, x), true)
576 ## %!assert (strcmp ("", {x}), false)
577 ## %!assert (strcmp (x, {""}), false)
578 ## %!assert (strcmp (x, {x}), true)
579 ## %!assert (strcmp ({x; x}, ""), [false; false])
580 ## %!assert (strcmp ({x; x}, {""}), [false; false])
581 ## %!assert (strcmp ("", {x; x}), [false; false])
582 ## %!assert (strcmp ({""}, {x; x}), [false; false])
583 %!assert (strcmp ({"foo"}, x), false)
584 %!assert (strcmp ({"foo"}, "foo"), true)
585 %!assert (strcmp ({"foo"}, x), false)
586 %!assert (strcmp (x, {"foo"}), false)
587 %!assert (strcmp ("foo", {"foo"}), true)
588 %!assert (strcmp (x, {"foo"}), false)
589 %!shared y
590 %! y = char (zeros (2, 0));
591 %!assert (strcmp ("", y), false)
592 %!assert (strcmp (y, ""), false)
593 %!assert (strcmp (y, y), true)
594 %!assert (strcmp ({""}, y), [true; true])
595 %!assert (strcmp ({y}, ""), true)
596 %!assert (strcmp ({y}, y), [true; true])
597 %!assert (strcmp ("", {y}), true)
598 %!assert (strcmp (y, {""}), [true; true])
599 %!assert (strcmp (y, {y}), [true; true])
600 %!assert (strcmp ({y; y}, ""), [true; true])
601 %!assert (strcmp ({y; y}, {""}), [true; true])
602 %!assert (strcmp ("", {y; y}), [true; true])
603 %!assert (strcmp ({""}, {y; y}), [true; true])
604 %!assert (strcmp ({"foo"}, y), [false; false])
605 %!assert (strcmp ({"foo"}, y), [false; false])
606 %!assert (strcmp (y, {"foo"}), [false; false])
607 %!assert (strcmp (y, {"foo"}), [false; false])
608 %!assert (strcmp ("foobar", "foobar"), true)
609 %!assert (strcmp ("foobar", "fooBar"), false)
610 %!assert (strcmp ("fooba", "foobar"), false)
611 
612 %!error strcmp ()
613 %!error strcmp ("foo", "bar", 3)
614 */
615 
616 DEFUN (strncmp, args, ,
617  doc: /* -*- texinfo -*-
618 @deftypefn {} {@var{tf} =} strncmp (@var{str1}, @var{str2}, @var{n})
619 Return 1 if the first @var{n} characters of strings @var{str1} and @var{str2}
620 are the same, and 0 otherwise.
621 
622 @example
623 @group
624 strncmp ("abce", "abcd", 3)
625  @result{} 1
626 @end group
627 @end example
628 
629 If either @var{str1} or @var{str2} is a cell array of strings, then an array
630 of the same size is returned, containing the values described above for
631 every member of the cell array. The other argument may also be a cell
632 array of strings (of the same size or with only one element), char matrix
633 or character string.
634 
635 @example
636 @group
637 strncmp ("abce", @{"abcd", "bca", "abc"@}, 3)
638  @result{} [1, 0, 1]
639 @end group
640 @end example
641 
642 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmp
643 function returns 1 if the character strings are equal, and 0 otherwise.
644 This is just the opposite of the corresponding C library function.
645 @seealso{strncmpi, strcmp, strcmpi}
646 @end deftypefn */)
647 {
648  if (args.length () != 3)
649  print_usage ();
650 
651  octave_idx_type n = args(2).idx_type_value ();
652 
653  if (n > 0)
654  return ovl (do_strcmp_fcn (args(0), args(1), n, "strncmp",
656  string::strncmp));
657  else
658  error ("strncmp: N must be greater than 0");
659 }
660 
661 /*
662 %!assert (strncmp ("abce", "abc", 3), true)
663 %!assert (strncmp ("abce", "aBc", 3), false)
664 %!assert (strncmp (100, 100, 1), false)
665 %!assert (strncmp ("abce", {"abcd", "bca", "abc"}, 3), logical ([1, 0, 1]))
666 %!assert (strncmp ("abc", {"abcd", "bca", "abc"}, 4), logical ([0, 0, 1]))
667 %!assert (strncmp ({"abcd", "bca", "abc"},"abce", 3), logical ([1, 0, 1]))
668 %!assert (strncmp ({"abcd", "bca", "abc"},{"abcd", "bca", "abe"}, 3),
669 %! logical ([1, 1, 0]))
670 %!assert (strncmp ("abc", {"abcd", 10}, 2), logical ([1, 0]))
671 
672 %!assert <*54373> (strncmp ("abc", "abc", 100))
673 
674 %!error strncmp ()
675 %!error strncmp ("abc", "def")
676 */
677 
678 DEFUNX ("strcmpi", Fstrcmpi, args, ,
679  doc: /* -*- texinfo -*-
680 @deftypefn {} {@var{tf} =} strcmpi (@var{str1}, @var{str2})
681 Return 1 if the character strings @var{str1} and @var{str2} are the same,
682 disregarding case of alphabetic characters, and 0 otherwise.
683 
684 If either @var{str1} or @var{str2} is a cell array of strings, then an array
685 of the same size is returned, containing the values described above for
686 every member of the cell array. The other argument may also be a cell
687 array of strings (of the same size or with only one element), char matrix
688 or character string.
689 
690 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
691 function returns 1 if the character strings are equal, and 0 otherwise.
692 This is just the opposite of the corresponding C library function.
693 
694 @strong{Caution:} National alphabets are not supported.
695 @seealso{strcmp, strncmp, strncmpi}
696 @end deftypefn */)
697 {
698  if (args.length () != 2)
699  print_usage ();
700 
701  return ovl (do_strcmp_fcn (args(0), args(1), 0, "strcmpi",
702  strcmpi_ignore_n, strcmpi_ignore_n));
703 }
704 
705 /*
706 %!assert (strcmpi ("abc123", "ABC123"), true)
707 */
708 
709 DEFUNX ("strncmpi", Fstrncmpi, args, ,
710  doc: /* -*- texinfo -*-
711 @deftypefn {} {@var{tf} =} strncmpi (@var{str1}, @var{str2}, @var{n})
712 Return 1 if the first @var{n} character of @var{s1} and @var{s2} are the
713 same, disregarding case of alphabetic characters, and 0 otherwise.
714 
715 If either @var{str1} or @var{str2} is a cell array of strings, then an array
716 of the same size is returned, containing the values described above for
717 every member of the cell array. The other argument may also be a cell
718 array of strings (of the same size or with only one element), char matrix
719 or character string.
720 
721 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmpi
722 function returns 1 if the character strings are equal, and 0 otherwise.
723 This is just the opposite of the corresponding C library function.
724 
725 @strong{Caution:} National alphabets are not supported.
726 @seealso{strncmp, strcmp, strcmpi}
727 @end deftypefn */)
728 {
729  if (args.length () != 3)
730  print_usage ();
731 
732  octave_idx_type n = args(2).idx_type_value ();
733 
734  if (n > 0)
735  return ovl (do_strcmp_fcn (args(0), args(1), n, "strncmpi",
738  else
739  error ("strncmpi: N must be greater than 0");
740 }
741 
742 /*
743 %!assert (strncmpi ("abc123", "ABC456", 3), true)
744 
745 %!assert <*54373> (strncmpi ("abc", "abC", 100))
746 */
747 
748 DEFUN (str2double, args, ,
749  doc: /* -*- texinfo -*-
750 @deftypefn {} {@var{d} =} str2double (@var{str})
751 Convert a string to a real or complex number.
752 
753 The string must be in one of the following formats where a and b are real
754 numbers and the complex unit is @qcode{'i'} or @qcode{'j'}:
755 
756 @itemize
757 @item a + bi
758 
759 @item a + b*i
760 
761 @item a + i*b
762 
763 @item bi + a
764 
765 @item b*i + a
766 
767 @item i*b + a
768 @end itemize
769 
770 If present, a and/or b are of the form @nospell{[+-]d[,.]d[[eE][+-]d]} where
771 the brackets indicate optional arguments and @qcode{'d'} indicates zero or
772 more digits. The special input values @code{Inf}, @code{NaN}, and @code{NA}
773 are also accepted.
774 
775 @var{str} may be a character string, character matrix, or cell array. For
776 character arrays the conversion is repeated for every row, and a double or
777 complex array is returned. Empty rows in @var{s} are deleted and not
778 returned in the numeric array. For cell arrays each character string
779 element is processed and a double or complex array of the same dimensions as
780 @var{str} is returned.
781 
782 For unconvertible scalar or character string input @code{str2double} returns
783 a NaN@. Similarly, for character array input @code{str2double} returns a
784 NaN for any row of @var{s} that could not be converted. For a cell array,
785 @code{str2double} returns a NaN for any element of @var{s} for which
786 conversion fails. Note that numeric elements in a mixed string/numeric
787 cell array are not strings and the conversion will fail for these elements
788 and return NaN.
789 
790 Programming Note: @code{str2double} can replace @code{str2num}, is more
791 efficient, and avoids the security risk of using @code{eval} on unknown data.
792 @seealso{str2num}
793 @end deftypefn */)
794 {
795  if (args.length () != 1)
796  print_usage ();
797 
798  octave_value retval;
799 
800  if (args(0).is_string ())
801  {
802  if (args(0).rows () == 0 || args(0).columns () == 0)
803  retval = Matrix (1, 1, numeric_limits<double>::NaN ());
804  else if (args(0).rows () == 1 && args(0).ndims () == 2)
805  retval = string::str2double (args(0).string_value ());
806  else
807  {
808  const string_vector sv = args(0).string_vector_value ();
809 
810  retval = sv.map<Complex> (string::str2double);
811  }
812  }
813  else if (args(0).iscell ())
814  {
815  const Cell cell = args(0).cell_value ();
816 
817  ComplexNDArray output (cell.dims (), numeric_limits<double>::NaN ());
818 
819  for (octave_idx_type i = 0; i < cell.numel (); i++)
820  {
821  if (cell(i).is_string ())
822  output(i) = string::str2double (cell(i).string_value ());
823  }
824  retval = output;
825  }
826  else
827  retval = Matrix (1, 1, numeric_limits<double>::NaN ());
828 
829  return retval;
830 }
831 
832 /*
833 %!assert (str2double ("1"), 1)
834 %!assert (str2double ("-.1e-5"), -1e-6)
835 %!testif ; ! __have_feature__ ("LLVM_LIBCXX")
836 %! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
837 %!testif HAVE_LLVM_LIBCXX <47413>
838 %! ## Same test code as above, intended only for test statistics with libc++.
839 %! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
840 %!assert (str2double ("1,222.5"), 1222.5)
841 %!assert (str2double ("i"), i)
842 %!assert (str2double ("2j"), 2i)
843 %!assert (str2double ("2 + j"), 2+j)
844 %!assert (str2double ("i*2 + 3"), 3+2i)
845 %!assert (str2double (".5*i + 3.5"), 3.5+0.5i)
846 %!assert (str2double ("1e-3 + i*.25"), 1e-3 + 0.25i)
847 %!assert (str2double (char ("2 + j","1.25e-3","-05")), [2+i; 1.25e-3; -5])
848 %!assert (str2double ({"2 + j","1.25e-3","-05"}), [2+i, 1.25e-3, -5])
849 %!assert (str2double (1), NaN)
850 %!assert (str2double ("1 2 3 4"), NaN)
851 %!assert (str2double ("Hello World"), NaN)
852 %!assert (str2double ("NaN"), NaN)
853 %!assert (str2double ("NA"), NA)
854 %!assert (str2double ("Inf"), Inf)
855 %!assert (str2double ("iNF"), Inf)
856 %!assert (str2double ("-Inf"), -Inf)
857 %!assert (str2double ("Inf*i"), complex (0, Inf))
858 %!assert (str2double ("iNF*i"), complex (0, Inf))
859 %!assert (str2double ("NaN + Inf*i"), complex (NaN, Inf))
860 %!assert (str2double ("Inf - Inf*i"), complex (Inf, -Inf))
861 %!assert (str2double ("-i*NaN - Inf"), complex (-Inf, -NaN))
862 %!testif ; ! __have_feature__ ("LLVM_LIBCXX")
863 %! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
864 %!testif HAVE_LLVM_LIBCXX <47413>
865 %! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
866 %!testif ; ! __have_feature__ ("LLVM_LIBCXX")
867 %! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
868 %!testif HAVE_LLVM_LIBCXX <47413>
869 %! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
870 %!assert (str2double (zeros (3,1,2)), NaN)
871 %!assert (str2double (''), NaN)
872 %!assert (str2double ([]), NaN)
873 %!assert (str2double (char (zeros (3,0))), NaN)
874 */
875 
876 DEFUN (__native2unicode__, args, ,
877  doc: /* -*- texinfo -*-
878 @deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage})
879 Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}.
880 
881 @seealso{native2unicode, __unicode2native__}
882 @end deftypefn */)
883 {
884  if (args(0).is_string ())
885  return ovl (args(0));
886 
887  std::string tmp = args(1).string_value ();
888  const char *codepage
889  = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());
890 
891  charNDArray native_bytes = args(0).char_array_value ();
892 
893  const char *src = native_bytes.data ();
894  std::size_t srclen = native_bytes.numel ();
895 
896  std::size_t length;
897  uint8_t *utf8_str = nullptr;
898 
899  utf8_str = octave_u8_conv_from_encoding (codepage, src, srclen, &length);
900 
901  if (! utf8_str)
902  {
903  if (errno == ENOSYS)
904  error ("native2unicode: iconv() is not supported. Installing GNU "
905  "libiconv and then re-compiling Octave could fix this.");
906  else
907  error ("native2unicode: converting from codepage '%s' to UTF-8: %s",
908  codepage, std::strerror (errno));
909  }
910 
911  unwind_action free_utf8_str ([=] () { ::free (utf8_str); });
912 
913  octave_idx_type len = length;
914 
915  charNDArray retval (dim_vector (1, len));
916 
917  for (octave_idx_type i = 0; i < len; i++)
918  retval.xelem (i) = utf8_str[i];
919 
920  return ovl (retval);
921 }
922 
923 DEFUN (__unicode2native__, args, ,
924  doc: /* -*- texinfo -*-
925 @deftypefn {} {@var{native_bytes} =} __unicode2native__ (@var{utf8_str}, @var{codepage})
926 Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using
927 @var{codepage}.
928 
929 @seealso{unicode2native, __native2unicode__}
930 @end deftypefn */)
931 {
932  std::string tmp = args(1).string_value ();
933  const char *codepage
934  = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());
935 
936  charNDArray utf8_str = args(0).char_array_value ();
937 
938  const uint8_t *src = reinterpret_cast<const uint8_t *> (utf8_str.data ());
939  std::size_t srclen = utf8_str.numel ();
940 
941  std::size_t length;
942  char *native_bytes = nullptr;
943 
944  native_bytes = octave_u8_conv_to_encoding (codepage, src, srclen, &length);
945 
946  if (! native_bytes)
947  {
948  if (errno == ENOSYS)
949  error ("unicode2native: iconv() is not supported. Installing GNU "
950  "libiconv and then re-compiling Octave could fix this.");
951  else
952  error ("unicode2native: converting from UTF-8 to codepage '%s': %s",
953  codepage, std::strerror (errno));
954  }
955 
956  unwind_action free_native_bytes ([=] () { ::free (native_bytes); });
957 
958  octave_idx_type len = length;
959 
960  uint8NDArray retval (dim_vector (1, len));
961 
962  for (octave_idx_type i = 0; i < len; i++)
963  retval.xelem (i) = native_bytes[i];
964 
965  return ovl (retval);
966 }
967 
968 DEFUN (__locale_charset__, , ,
969  doc: /* -*- texinfo -*-
970 @deftypefn {} {@var{charset} =} __locale_charset__ ()
971 Return the identifier for the charset used if the encoding is set to
972 @qcode{"locale"}.
973 @end deftypefn */)
974 {
975  const char *charset = octave_locale_charset_wrapper ();
976  std::string charset_str (charset);
977  return ovl (charset_str);
978 }
979 
980 DEFUN (unicode_idx, args, ,
981  doc: /* -*- texinfo -*-
982 @deftypefn {} {@var{idx} =} unicode_idx (@var{str})
983 Return an array with the indices for each UTF-8 encoded character in @var{str}.
984 
985 @example
986 @group
987 unicode_idx ("aäbc")
988  @result{} [1, 2, 2, 3, 4]
989 @end group
990 @end example
991 
992 @end deftypefn */)
993 {
994  if (args.length () != 1)
995  print_usage ();
996 
997  charNDArray str = args(0).xchar_array_value ("STR must be a string");
998  Array<octave_idx_type> p (dim_vector (str.ndims (), 1));
999  charNDArray str_p;
1000  if (str.ndims () > 1)
1001  {
1002  for (octave_idx_type i=0; i < str.ndims (); i++)
1003  p(i) = i;
1004  p(0) = 1;
1005  p(1) = 0;
1006  str_p = str.permute (p);
1007  }
1008 
1009  const uint8_t *src = reinterpret_cast<const uint8_t *> (str_p.data ());
1010  octave_idx_type srclen = str.numel ();
1011 
1012  NDArray idx (str_p.dims ());
1013 
1014  octave_idx_type u8_char_num = 1;
1015  for (octave_idx_type i = 0; i < srclen; u8_char_num++)
1016  {
1017  int mblen = octave_u8_strmblen_wrapper (src + i);
1018  if (mblen < 1)
1019  mblen = 1;
1020  for (octave_idx_type j = 0; j < mblen; j++)
1021  idx(i+j) = u8_char_num;
1022  i += mblen;
1023  }
1024 
1025  return ovl (str.ndims () > 1 ? idx.permute (p, true) : idx);
1026 }
1027 
1028 /*
1029 %!assert (unicode_idx (["aäou"; "Ä∞"]), [1 2 2 3 4; 5 5 6 6 6])
1030 */
1031 
1032 DEFUN (__unicode_length__, args, ,
1033  doc: /* -*- texinfo -*-
1034 @deftypefn {} {@var{len} =} __unicode_length__ (@var{str})
1035 Return number of Unicode code points in @var{str}.
1036 
1037 The input @var{str} must be a UTF-8 encoded character vector or cell string.
1038 
1039 @example
1040 @group
1041 length ("aäbc")
1042  @result{} 5
1043 __unicode_length__ ("aäbc")
1044  @result{} 4
1045 @end group
1046 @end example
1047 
1048 @end deftypefn */)
1049 {
1050  if (args.length () != 1)
1051  print_usage ();
1052 
1053  bool arg_char = args(0).is_char_matrix ();
1054 
1055  if (! arg_char && ! args(0).iscellstr ())
1056  error ("STR must be a character array or cell string.");
1057 
1058  octave_value_list retval;
1059 
1060  if (arg_char)
1061  {
1062  charNDArray str = args(0).char_array_value ();
1063  Array<octave_idx_type> p (dim_vector (str.ndims (), 1));
1064  if (str.ndims () > 1)
1065  {
1066  for (octave_idx_type i=0; i < str.ndims (); i++)
1067  p(i) = i;
1068  p(0) = 1;
1069  p(1) = 0;
1070  str = str.permute (p);
1071  }
1072 
1073  const uint8_t *src = reinterpret_cast<const uint8_t *> (str.data ());
1074  octave_idx_type mbsnlen = octave_u8_mbsnlen_wrapper (src, str.numel ());
1075 
1076  retval = ovl (mbsnlen);
1077  }
1078  else
1079  {
1080  const Array<std::string> cellstr = args(0).cellstr_value ();
1081  NDArray output (args(0).dims (), false);
1082  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
1083  {
1084  const uint8_t *src
1085  = reinterpret_cast<const uint8_t *> (cellstr(i).c_str ());
1086  output(i) = octave_u8_mbsnlen_wrapper (src, cellstr(i).size ());
1087  }
1088 
1089  retval = ovl (output);
1090  }
1091 
1092  return retval;
1093 }
1094 
1095 /*
1096 %!assert (__unicode_length__ (""), 0)
1097 %!assert (__unicode_length__ ("aäbc"), 4)
1098 %!assert (__unicode_length__ (["aä"; "öo"]), 4)
1099 %!assert (__unicode_length__ ({"aäbc", "abc"}), [4, 3])
1100 */
1101 
1102 DEFUN (__u8_validate__, args, ,
1103  doc: /* -*- texinfo -*-
1104 @deftypefn {} {@var{out_str} =} __u8_validate__ (in_str, mode)
1105 Return string with valid UTF-8.
1106 
1107 On encountering invalid UTF-8 in @var{in_str}, the bytes are either replaced by
1108 the replacement character @qcode{"�"} (if @var{mode} is omitted or is the
1109 string @qcode{"replace"}) or interpreted as the Unicode code points
1110 U+0080–U+00FF with the same value as the byte (if @var{mode} is the string
1111 @qcode{"unicode"}), thus interpreting the bytes according to ISO-8859-1.
1112 @end deftypefn */)
1113 {
1114  int nargin = args.length ();
1115 
1116  if (nargin < 1 || nargin > 2)
1117  print_usage ();
1118 
1119  // Input check
1120  std::string in_str =
1121  args(0).xstring_value ("__u8_validate__: IN_STR must be a string");
1122 
1123  std::string mode = "replace";
1124  if (nargin == 2)
1125  mode = args(1).xstring_value ("__u8_validate__: MODE must be a string");
1126 
1127  string::u8_fallback_type fb_type;
1128  if (mode == "replace")
1129  fb_type = string::U8_REPLACEMENT_CHAR;
1130  else if (mode == "unicode")
1131  fb_type = string::U8_ISO_8859_1;
1132  else
1133  error (R"(__u8_validate__: MODE must be either "replace" or "unicode")");
1134 
1135  string::u8_validate ("__u8_validate__", in_str, fb_type);
1136 
1137  return ovl (in_str);
1138 }
1139 
1140 DEFUN (newline, args, ,
1141  doc: /* -*- texinfo -*-
1142 @deftypefn {} {@var{c} =} newline
1143 Return the character corresponding to a newline.
1144 
1145 This is equivalent to @qcode{"@backslashchar{}n"}.
1146 
1147 Example Code
1148 
1149 @example
1150 @group
1151 joined_string = [newline "line1" newline "line2"]
1152 @result{}
1153 line1
1154 line2
1155 @end group
1156 @end example
1157 
1158 @seealso{strcat, strjoin, strsplit}
1159 @end deftypefn */)
1160 {
1161  if (args.length () != 0)
1162  print_usage ();
1163 
1164  static octave_value_list retval = ovl ("\n");
1165 
1166  return retval;
1167 }
1168 
1169 /*
1170 %!assert (newline (), "\n")
1171 
1172 %!error newline (1)
1173 ## FIXME: The next error() test requires a semicolon at EOL until
1174 ## bug #59265 is resolved.
1175 %!error [a, b] = newline ();
1176 */
1177 
1178 DEFUN (list_in_columns, args, ,
1179  doc: /* -*- texinfo -*-
1180 @deftypefn {} {@var{str} =} list_in_columns (@var{arg}, @var{width}, @var{prefix})
1181 Return a string containing the elements of @var{arg} listed in columns with
1182 an overall maximum width of @var{width} and optional prefix @var{prefix}.
1183 
1184 The argument @var{arg} must be a cell array of character strings or a
1185 character array.
1186 
1187 If @var{width} is not specified or is an empty matrix, or less than or equal
1188 to zero, the width of the terminal screen is used. Newline characters are
1189 used to break the lines in the output string. For example:
1190 @c Set example in small font to prevent overfull line
1191 
1192 @smallexample
1193 @group
1194 list_in_columns (@{"abc", "def", "ghijkl", "mnop", "qrs", "tuv"@}, 20)
1195  @result{} abc mnop
1196  def qrs
1197  ghijkl tuv
1198 
1199 whos ans
1200  @result{}
1201  Variables in the current scope:
1202 
1203  Attr Name Size Bytes Class
1204  ==== ==== ==== ===== =====
1205  ans 1x37 37 char
1206 
1207  Total is 37 elements using 37 bytes
1208 @end group
1209 @end smallexample
1210 
1211 @seealso{terminal_size}
1212 @end deftypefn */)
1213 {
1214  int nargin = args.length ();
1215 
1216  if (nargin < 1 || nargin > 3)
1217  print_usage ();
1218 
1219  string_vector s = args(0).xstring_vector_value ("list_in_columns: ARG must be a cellstr or char array");
1220 
1221  int width = -1;
1222 
1223  if (nargin > 1 && ! args(1).isempty ())
1224  width = args(1).xint_value ("list_in_columns: WIDTH must be an integer");
1225 
1226  std::string prefix;
1227 
1228  if (nargin > 2)
1229  prefix = args(2).xstring_value ("list_in_columns: PREFIX must be a string");
1230 
1231  std::ostringstream buf;
1232 
1233  s.list_in_columns (buf, width, prefix);
1234 
1235  return ovl (buf.str ());
1236 }
1237 
1238 /*
1239 %!test
1240 %! input = {"abc", "def", "ghijkl", "mnop", "qrs", "tuv"};
1241 %! result = "abc mnop\ndef qrs\nghijkl tuv\n";
1242 %! assert (list_in_columns (input, 20), result);
1243 %!test
1244 %! input = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
1245 %! result = "abc mnop \ndef qrs \nghijkl tuv \n";
1246 %! assert (list_in_columns (input, 20), result);
1247 %!test
1248 %! input = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
1249 %! result = " abc mnop \n def qrs \n ghijkl tuv \n";
1250 %! assert (list_in_columns (input, 20, " "), result);
1251 
1252 %!error list_in_columns ()
1253 %!error list_in_columns (["abc", "def"], 20, 2)
1254 %!error list_in_columns (["abc", "def"], 20, " ", 3)
1255 %!error <list_in_columns: WIDTH must be an integer> list_in_columns (["abc", "def"], "a")
1256 */
1257 
1258 OCTAVE_END_NAMESPACE(octave)
#define NaN
Definition: Faddeeva.cc:261
int ndims() const
Size of the specified dimension.
Definition: Array.h:671
const T * data() const
Size of the specified dimension.
Definition: Array.h:663
Array< T, Alloc > permute(const Array< octave_idx_type > &vec, bool inv=false) const
Size of the specified dimension.
Definition: Array-base.cc:450
const dim_vector & dims() const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:503
T & xelem(octave_idx_type n)
Size of the specified dimension.
Definition: Array.h:524
octave_idx_type numel() const
Number of elements in the array.
Definition: Array.h:414
Definition: Cell.h:43
bool iscellstr() const
Definition: Cell.cc:126
MArray< T > permute(const Array< octave_idx_type > &vec, bool inv=false) const
Definition: MArray.h:90
Definition: dMatrix.h:42
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:94
Cell cell_value() const
bool is_string() const
Definition: ov.h:637
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:897
bool iscell() const
Definition: ov.h:604
octave_idx_type numel() const
Definition: ov.h:559
string_vector string_vector_value(bool pad=false) const
Definition: ov.h:977
octave_idx_type length() const
octave_value convert_to_str(bool pad=false, bool force=false, char type='\'') const
Definition: ov.h:1307
bool iscellstr() const
Definition: ov.h:607
Array< std::string > cellstr_value() const
Definition: ov.h:982
dim_vector dims() const
Definition: ov.h:541
octave_idx_type max_length() const
Definition: str-vec.h:79
std::ostream & list_in_columns(std::ostream &, int width=0, const std::string &prefix="") const
Definition: str-vec.cc:201
octave_idx_type numel() const
Definition: str-vec.h:100
Array< U > map(F fcn) const
Definition: str-vec.h:145
OCTAVE_BEGIN_NAMESPACE(octave) static octave_value daspk_fcn
void print_usage(void)
Definition: defun-int.h:72
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:56
#define DEFUNX(name, fname, args_name, nargout_name, doc)
Macro to define a builtin function with certain internal name.
Definition: defun.h:85
void() error(const char *fmt,...)
Definition: error.cc:988
const char * octave_locale_charset_wrapper(void)
octave_idx_type n
Definition: mx-inlines.cc:761
T * r
Definition: mx-inlines.cc:781
std::complex< double > Complex
Definition: oct-cmplx.h:33
bool strncmp(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same.
u8_fallback_type
Definition: oct-string.h:153
@ U8_ISO_8859_1
Definition: oct-string.h:155
@ U8_REPLACEMENT_CHAR
Definition: oct-string.h:154
bool strcmp(const T &str_a, const T &str_b)
Octave string utility functions.
Complex str2double(const std::string &str_arg)
bool strcmpi(const T &str_a, const T &str_b)
True if strings are the same, ignoring case.
unsigned int u8_validate(const std::string &who, std::string &in_string, const u8_fallback_type type=U8_REPLACEMENT_CHAR)
bool strncmpi(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same, ignoring case.
void free(void *)
return octave_value(v1.char_array_value() . concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string()) ? '\'' :'"'))
octave_value_list ovl(const OV_Args &... args)
Construct an octave_value_list with less typing.
Definition: ovl.h:219
octave_value_list Fstrcmpi(const octave_value_list &args, int)
Definition: strfns.cc:696
octave_value_list Fstrncmpi(const octave_value_list &args, int)
Definition: strfns.cc:727
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
int octave_u8_strmblen_wrapper(const uint8_t *src)
size_t octave_u8_mbsnlen_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61