GNU Octave 11.1.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 
Loading...
Searching...
No Matches
uniconv-wrappers.c
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////
2//
3// Copyright (C) 2017-2026 The Octave Project Developers
4//
5// See the file COPYRIGHT.md in the top-level directory of this
6// distribution or <https://octave.org/copyright/>.
7//
8// This file is part of Octave.
9//
10// Octave is free software: you can redistribute it and/or modify it
11// under the terms of the GNU General Public License as published by
12// the Free Software Foundation, either version 3 of the License, or
13// (at your option) any later version.
14//
15// Octave is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with Octave; see the file COPYING. If not, see
22// <https://www.gnu.org/licenses/>.
23//
24////////////////////////////////////////////////////////////////////////
25
26// The conversion functions are provided by gnulib. We don't include
27// gnulib headers directly in Octave's C++ source files to avoid
28// problems that may be caused by the way that gnulib overrides standard
29// library functions.
30
31#if defined (HAVE_CONFIG_H)
32# include "config.h"
33#endif
34
35#include <stdlib.h>
36#include <string.h>
37#include <wchar.h>
38
39#include "uniconv.h"
40
41#include "uniconv-wrappers.h"
42
43uint8_t *
44octave_u8_conv_from_encoding (const char *fromcode, const char *src,
45 size_t srclen, size_t *lengthp)
46{
47 return u8_conv_from_encoding (fromcode, iconveh_question_mark,
48 src, srclen, NULL, NULL, lengthp);
49}
50
51static char *
52octave_u8_conv_to_encoding_intern (const char *tocode,
53 enum iconv_ilseq_handler handler,
54 const uint8_t *src, size_t srclen,
55 size_t *offsets, size_t *lengthp)
56{
57 // FIXME: It looks like the input to u8_conv_to_encoding must be at least
58 // four bytes and zero-terminated to work correctly. Zero-pad input.
59 // Should this be fixed in gnulib or iconv instead?
60 size_t minlen = 4;
61 size_t padlen = (srclen+1 > minlen ? srclen+1 : minlen);
62
63 // Take surrogate size into account for UTF-16 and UTF-32 output encodings.
64 // That is necessary to correctly remove the padding after the encoding
65 // conversion.
66 // FIXME: Are there other encodings that we should support for which the
67 // "encoding surrogate" size is different from one byte?
68 size_t surrogate_size = 1;
69 if (strlen (tocode) > 5)
70 {
71 if ((tocode[0] == 'u' || tocode[0] == 'U')
72 && (tocode[1] == 't' || tocode[1] == 'T')
73 && (tocode[2] == 'f' || tocode[2] == 'F')
74 && tocode[3] == '-')
75 {
76 if (tocode[4] == '1' && tocode[5] == '6')
77 surrogate_size = 2;
78 else if (tocode[4] == '3' && tocode[5] == '2')
79 surrogate_size = 4;
80 }
81 }
82
83 uint8_t *u8_str = (uint8_t *) malloc (padlen);
84 memcpy (u8_str, src, srclen);
85 for (size_t i_pad = 0; i_pad < padlen-srclen; i_pad++)
86 u8_str[srclen+i_pad] = 0;
87 const uint8_t *cu8_str = u8_str;
88
89 // Convert from UTF-8 to output encoding
90 char *ret = u8_conv_to_encoding (tocode, handler, cu8_str, padlen,
91 offsets, NULL, lengthp);
92
93 free ((void *) u8_str);
94
95 // FIXME: This assumes that "\0" is converted to one "encoding surrogate".
96 // This might not be true for some exotic output encodings (like
97 // UTF-7?).
98 *lengthp = (*lengthp <= (padlen-srclen) * surrogate_size
99 ? 0
100 : *lengthp - (padlen-srclen) * surrogate_size);
101
102 return ret;
103}
104
105char *
106octave_u8_conv_to_encoding (const char *tocode, const uint8_t *src,
107 size_t srclen, size_t *lengthp)
108{
109 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
110 src, srclen, NULL, lengthp);
111}
112
113char *
114octave_u8_conv_to_encoding_strict (const char *tocode, const uint8_t *src,
115 size_t srclen, size_t *lengthp)
116{
117 return octave_u8_conv_to_encoding_intern (tocode, iconveh_error,
118 src, srclen, NULL, lengthp);
119}
120
121uint16_t *
122octave_u16_conv_from_encoding (const char *fromcode, const char *src,
123 size_t srclen, size_t *lengthp)
124{
125 return u16_conv_from_encoding (fromcode, iconveh_question_mark,
126 src, srclen, NULL, NULL, lengthp);
127}
128
129uint16_t *
130octave_u16_conv_from_encoding_strict (const char *fromcode, const char *src,
131 size_t srclen, size_t *lengthp)
132{
133 return u16_conv_from_encoding (fromcode, iconveh_error,
134 src, srclen, NULL, NULL, lengthp);
135}
136
137char *
138octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
139 size_t srclen, size_t *lengthp)
140{
141 return u16_conv_to_encoding (tocode, iconveh_question_mark,
142 src, srclen, NULL, NULL, lengthp);
143}
144
145char *
146octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src,
147 size_t srclen, size_t *lengthp)
148{
149 return u16_conv_to_encoding (tocode, iconveh_error,
150 src, srclen, NULL, NULL, lengthp);
151}
152
153char *
154octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src,
155 size_t srclen, size_t *lengthp)
156{
157 return u32_conv_to_encoding (tocode, iconveh_error,
158 src, srclen, NULL, NULL, lengthp);
159}
160
161uint8_t *
163 (const char *fromcode, const char *src, size_t srclen,
164 size_t *offsets, size_t *lengthp)
165{
166 return u8_conv_from_encoding (fromcode, iconveh_question_mark,
167 src, srclen, offsets, NULL, lengthp);
168}
169
170char *
172 (const char *tocode, const uint8_t *src, size_t srclen,
173 size_t *offsets, size_t *lengthp)
174{
175 return octave_u8_conv_to_encoding_intern (tocode, iconveh_question_mark,
176 src, srclen, offsets, lengthp);
177}
178
179char *
180u8_from_wchar (const wchar_t *wc)
181{
182 // Convert wide char array to multibyte UTF-8 char array
183 // The memory at the returned pointer must be freed after use.
184
185 size_t srclen = wcslen (wc) * sizeof (wchar_t);
186 const char *src = (const char *) wc;
187
188 size_t length = 0;
189 uint8_t *mbchar = u8_conv_from_encoding ("wchar_t", iconveh_question_mark,
190 src, srclen, NULL, NULL, &length);
191
192 // result might not be 0 terminated
193 char *retval = malloc (length + 1);
194 if (retval)
195 {
196 memcpy (retval, mbchar, length);
197 free ((void *) mbchar);
198 retval[length] = 0; // 0 terminate string
199 }
200 else
201 free ((void *) mbchar);
202
203 return retval;
204}
205
206wchar_t *
207u8_to_wchar (const char *u8)
208{
209 // Convert multibyte UTF-8 char array to wide char array
210 // The memory at the returned pointer must be freed after use.
211
212 size_t srclen = strlen (u8);
213 const uint8_t *src = (const uint8_t *) u8;
214
215 size_t length = 0;
216
217 char *wchar = u8_conv_to_encoding ("wchar_t", iconveh_question_mark,
218 src, srclen, NULL, NULL, &length);
219 // result might not be 0 terminated
220 wchar_t *retval = malloc (length + 1 * sizeof (wchar_t));
221 if (retval)
222 {
223 memcpy (retval, wchar, length);
224 free ((void *) wchar);
225 retval[length / sizeof (wchar_t)] = 0; // 0 terminate string
226 }
227
228 else
229 free ((void *) wchar);
230
231 return retval;
232}
T::size_type strlen(const typename T::value_type *str)
Definition oct-string.cc:95
void * malloc(unsigned)
void free(void *)
char * octave_u8_conv_to_encoding_strict(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
wchar_t * u8_to_wchar(const char *u8)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
char * octave_u16_conv_to_encoding_strict(const char *tocode, const uint16_t *src, size_t srclen, size_t *lengthp)
uint8_t * octave_u8_conv_from_encoding_offsets(const char *fromcode, const char *src, size_t srclen, size_t *offsets, size_t *lengthp)
char * u8_from_wchar(const wchar_t *wc)
char * octave_u32_conv_to_encoding_strict(const char *tocode, const uint32_t *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding_offsets(const char *tocode, const uint8_t *src, size_t srclen, size_t *offsets, size_t *lengthp)
uint16_t * octave_u16_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
uint16_t * octave_u16_conv_from_encoding_strict(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)