GNU Octave  9.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
uniconv-wrappers.c
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2017-2024 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 // The conversion functions are provided by gnulib. We don't include
27 // gnulib headers directly in Octave's C++ source files to avoid
28 // problems that may be caused by the way that gnulib overrides standard
29 // library functions.
30 
31 #if defined (HAVE_CONFIG_H)
32 # include "config.h"
33 #endif
34 
35 #include <stdlib.h>
36 #include <string.h>
37 #include <wchar.h>
38 
39 #include "uniconv.h"
40 
41 #include "uniconv-wrappers.h"
42 
43 uint8_t *
44 octave_u8_conv_from_encoding (const char *fromcode, const char *src,
45  size_t srclen, size_t *lengthp)
46 {
47  return u8_conv_from_encoding (fromcode, iconveh_question_mark,
48  src, srclen, NULL, NULL, lengthp);
49 }
50 
51 static char *
52 octave_u8_conv_to_encoding_intern (const char *tocode,
53  enum iconv_ilseq_handler handler,
54  const uint8_t *src, size_t srclen,
55  size_t *offsets, size_t *lengthp)
56 {
57  // FIXME: It looks like the input to u8_conv_to_encoding must be at least
58  // four bytes and zero-terminated to work correctly. Zero-pad input.
59  // Should this be fixed in gnulib or iconv instead?
60  size_t minlen = 4;
61  size_t padlen = (srclen > minlen ? srclen : minlen);
62 
63  // Do not zero-terminate when the output encoding is a UTF encoding, i.e.,
64  // the surrogates are different than a byte.
65  if ((tocode[0] != 'u' && tocode[0] != 'U')
66  || (tocode[1] != 't' && tocode[1] != 'T')
67  || (tocode[2] != 'f' && tocode[2] != 'F'))
68  padlen++;
69 
70  uint8_t *u8_str = NULL;
71  const uint8_t *cu8_str;
72  if (srclen < padlen)
73  {
74  u8_str = (uint8_t *) malloc (padlen);
75  memcpy (u8_str, src, srclen);
76  for (size_t i_pad = 0; i_pad < padlen-srclen; i_pad++)
77  u8_str[srclen+i_pad] = 0;
78  cu8_str = u8_str;
79  }
80  else
81  cu8_str = src;
82 
83  // Convert from UTF-8 to output encoding
84  char *ret = u8_conv_to_encoding (tocode, handler, cu8_str, padlen,
85  offsets, NULL, lengthp);
86 
87  if (srclen < padlen)
88  free ((void *) u8_str);
89 
90  // FIXME: This assumes that "\0" is converted to a single byte. This might
91  // not be true for some exotic output encodings (like UTF-7?).
92  *lengthp = (*lengthp <= (padlen-srclen) ? 0 : *lengthp - (padlen-srclen));
93 
94  return ret;
95 }
96 
97 char *
98 octave_u8_conv_to_encoding (const char *tocode, const uint8_t *src,
99  size_t srclen, size_t *lengthp)
100 {
101  return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
102  src, srclen, NULL, lengthp);
103 }
104 
105 char *
106 octave_u8_conv_to_encoding_strict (const char *tocode, const uint8_t *src,
107  size_t srclen, size_t *lengthp)
108 {
109  return octave_u8_conv_to_encoding_intern (tocode, iconveh_error,
110  src, srclen, NULL, lengthp);
111 }
112 
113 uint16_t *
114 octave_u16_conv_from_encoding (const char *fromcode, const char *src,
115  size_t srclen, size_t *lengthp)
116 {
117  return u16_conv_from_encoding (fromcode, iconveh_question_mark,
118  src, srclen, NULL, NULL, lengthp);
119 }
120 
121 uint16_t *
122 octave_u16_conv_from_encoding_strict (const char *fromcode, const char *src,
123  size_t srclen, size_t *lengthp)
124 {
125  return u16_conv_from_encoding (fromcode, iconveh_error,
126  src, srclen, NULL, NULL, lengthp);
127 }
128 
129 char *
130 octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
131  size_t srclen, size_t *lengthp)
132 {
133  return u16_conv_to_encoding (tocode, iconveh_question_mark,
134  src, srclen, NULL, NULL, lengthp);
135 }
136 
137 char *
138 octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src,
139  size_t srclen, size_t *lengthp)
140 {
141  return u16_conv_to_encoding (tocode, iconveh_error,
142  src, srclen, NULL, NULL, lengthp);
143 }
144 
145 char *
146 octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src,
147  size_t srclen, size_t *lengthp)
148 {
149  return u32_conv_to_encoding (tocode, iconveh_error,
150  src, srclen, NULL, NULL, lengthp);
151 }
152 
153 uint8_t *
155  (const char *fromcode, const char *src, size_t srclen,
156  size_t *offsets, size_t *lengthp)
157 {
158  return u8_conv_from_encoding (fromcode, iconveh_question_mark,
159  src, srclen, offsets, NULL, lengthp);
160 }
161 
162 char *
164  (const char *tocode, const uint8_t *src, size_t srclen,
165  size_t *offsets, size_t *lengthp)
166 {
167  return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
168  src, srclen, offsets, lengthp);
169 }
170 
171 char *
172 u8_from_wchar (const wchar_t *wc)
173 {
174  // Convert wide char array to multibyte UTF-8 char array
175  // The memory at the returned pointer must be freed after use.
176 
177  size_t srclen = wcslen (wc) * sizeof (wchar_t);
178  const char *src = (const char *) wc;
179 
180  size_t length = 0;
181  uint8_t *mbchar = u8_conv_from_encoding ("wchar_t", iconveh_question_mark,
182  src, srclen, NULL, NULL, &length);
183 
184  // result might not be 0 terminated
185  char *retval = malloc (length + 1);
186  if (retval)
187  {
188  memcpy (retval, mbchar, length);
189  free ((void *) mbchar);
190  retval[length] = 0; // 0 terminate string
191  }
192  else
193  free ((void *) mbchar);
194 
195  return retval;
196 }
197 
198 wchar_t *
199 u8_to_wchar (const char *u8)
200 {
201  // Convert multibyte UTF-8 char array to wide char array
202  // The memory at the returned pointer must be freed after use.
203 
204  size_t srclen = strlen (u8);
205  const uint8_t *src = (const uint8_t *) u8;
206 
207  size_t length = 0;
208 
209  char *wchar = u8_conv_to_encoding ("wchar_t", iconveh_question_mark,
210  src, srclen, NULL, NULL, &length);
211  // result might not be 0 terminated
212  wchar_t *retval = malloc (length + 1 * sizeof (wchar_t));
213  if (retval)
214  {
215  memcpy (retval, wchar, length);
216  free ((void *) wchar);
217  retval[length / sizeof (wchar_t)] = 0; // 0 terminate string
218  }
219 
220  else
221  free ((void *) wchar);
222 
223  return retval;
224 }
T::size_type strlen(const typename T::value_type *str)
Definition: oct-string.cc:88
void * malloc(unsigned)
void free(void *)
uint8_t * octave_u8_conv_from_encoding_offsets(const char *fromcode, const char *src, size_t srclen, size_t *offsets, size_t *lengthp)
char * octave_u8_conv_to_encoding_offsets(const char *tocode, const uint8_t *src, size_t srclen, size_t *offsets, size_t *lengthp)
uint16_t * octave_u16_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u32_conv_to_encoding_strict(const char *tocode, const uint32_t *src, size_t srclen, size_t *lengthp)
uint16_t * octave_u16_conv_from_encoding_strict(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding_strict(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
wchar_t * u8_to_wchar(const char *u8)
char * octave_u16_conv_to_encoding(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding_strict(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
char * u8_from_wchar(const wchar_t *wc)