Mercurial > sdl-ios-xcode
comparison src/stdlib/SDL_iconv.c @ 1501:73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 13 Mar 2006 01:08:00 +0000 |
parents | |
children | d403a39389da |
comparison
equal
deleted
inserted
replaced
1500:f58c88a4dff5 | 1501:73dc5d39bbf8 |
---|---|
1 /* | |
2 SDL - Simple DirectMedia Layer | |
3 Copyright (C) 1997-2006 Sam Lantinga | |
4 | |
5 This library is free software; you can redistribute it and/or | |
6 modify it under the terms of the GNU Lesser General Public | |
7 License as published by the Free Software Foundation; either | |
8 version 2.1 of the License, or (at your option) any later version. | |
9 | |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 Lesser General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Lesser General Public | |
16 License along with this library; if not, write to the Free Software | |
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | |
19 Sam Lantinga | |
20 slouken@libsdl.org | |
21 */ | |
22 #include "SDL_config.h" | |
23 | |
24 /* This file contains portable iconv functions for SDL */ | |
25 | |
26 #include "SDL_stdinc.h" | |
27 #include "SDL_endian.h" | |
28 | |
29 #ifdef HAVE_ICONV | |
30 | |
31 #include <errno.h> | |
32 | |
33 size_t SDL_iconv(SDL_iconv_t cd, | |
34 char **inbuf, size_t *inbytesleft, | |
35 char **outbuf, size_t *outbytesleft) | |
36 { | |
37 size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); | |
38 if ( retCode == (size_t)-1 ) { | |
39 switch(errno) { | |
40 case E2BIG: | |
41 return SDL_ICONV_E2BIG; | |
42 case EILSEQ: | |
43 return SDL_ICONV_EILSEQ; | |
44 case EINVAL: | |
45 return SDL_ICONV_EINVAL; | |
46 default: | |
47 return SDL_ICONV_ERROR; | |
48 } | |
49 } | |
50 return retCode; | |
51 } | |
52 | |
53 #else | |
54 | |
55 #define UNICODE_BOM 0xFEFF | |
56 | |
57 #define UNKNOWN_ASCII '?' | |
58 #define UNKNOWN_UNICODE 0xFFFD | |
59 | |
60 enum { | |
61 ENCODING_UNKNOWN, | |
62 ENCODING_ASCII, | |
63 ENCODING_LATIN1, | |
64 ENCODING_UTF8, | |
65 ENCODING_UTF16, /* Needs byte order marker */ | |
66 ENCODING_UTF16BE, | |
67 ENCODING_UTF16LE, | |
68 ENCODING_UTF32, /* Needs byte order marker */ | |
69 ENCODING_UTF32BE, | |
70 ENCODING_UTF32LE, | |
71 ENCODING_UCS2, /* Native byte order assumed */ | |
72 ENCODING_UCS4, /* Native byte order assumed */ | |
73 }; | |
74 #if SDL_BYTEORDER == SDL_BIG_ENDIAN | |
75 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE | |
76 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE | |
77 #else | |
78 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE | |
79 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE | |
80 #endif | |
81 | |
82 struct _SDL_iconv_t | |
83 { | |
84 int src_fmt; | |
85 int dst_fmt; | |
86 }; | |
87 | |
88 static struct { | |
89 const char *name; | |
90 int format; | |
91 } encodings[] = { | |
92 { "ASCII", ENCODING_ASCII }, | |
93 { "US-ASCII", ENCODING_ASCII }, | |
94 { "LATIN1", ENCODING_LATIN1 }, | |
95 { "ISO-8859-1", ENCODING_LATIN1 }, | |
96 { "UTF8", ENCODING_UTF8 }, | |
97 { "UTF-8", ENCODING_UTF8 }, | |
98 { "UTF16", ENCODING_UTF16 }, | |
99 { "UTF-16", ENCODING_UTF16 }, | |
100 { "UTF16BE", ENCODING_UTF16BE }, | |
101 { "UTF-16BE", ENCODING_UTF16BE }, | |
102 { "UTF16LE", ENCODING_UTF16LE }, | |
103 { "UTF-16LE", ENCODING_UTF16LE }, | |
104 { "UTF32", ENCODING_UTF32 }, | |
105 { "UTF-32", ENCODING_UTF32 }, | |
106 { "UTF32BE", ENCODING_UTF32BE }, | |
107 { "UTF-32BE", ENCODING_UTF32BE }, | |
108 { "UTF32LE", ENCODING_UTF32LE }, | |
109 { "UTF-32LE", ENCODING_UTF32LE }, | |
110 { "UCS2", ENCODING_UCS2 }, | |
111 { "UCS-2", ENCODING_UCS2 }, | |
112 { "UCS4", ENCODING_UCS4 }, | |
113 { "UCS-4", ENCODING_UCS4 }, | |
114 }; | |
115 | |
116 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) | |
117 { | |
118 int src_fmt = ENCODING_UNKNOWN; | |
119 int dst_fmt = ENCODING_UNKNOWN; | |
120 int i; | |
121 | |
122 for ( i = 0; i < SDL_arraysize(encodings); ++i ) { | |
123 if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { | |
124 src_fmt = encodings[i].format; | |
125 if ( dst_fmt != ENCODING_UNKNOWN ) { | |
126 break; | |
127 } | |
128 } | |
129 if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { | |
130 dst_fmt = encodings[i].format; | |
131 if ( src_fmt != ENCODING_UNKNOWN ) { | |
132 break; | |
133 } | |
134 } | |
135 } | |
136 if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { | |
137 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); | |
138 if ( cd ) { | |
139 cd->src_fmt = src_fmt; | |
140 cd->dst_fmt = dst_fmt; | |
141 return cd; | |
142 } | |
143 } | |
144 return (SDL_iconv_t)-1; | |
145 } | |
146 | |
147 size_t SDL_iconv(SDL_iconv_t cd, | |
148 char **inbuf, size_t *inbytesleft, | |
149 char **outbuf, size_t *outbytesleft) | |
150 { | |
151 /* For simplicity, we'll convert everything to and from UCS-4 */ | |
152 char *src, *dst; | |
153 size_t srclen, dstlen; | |
154 Uint32 ch; | |
155 size_t total; | |
156 | |
157 if ( !inbuf || !*inbuf ) { | |
158 /* Reset the context */ | |
159 return 0; | |
160 } | |
161 if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { | |
162 return SDL_ICONV_E2BIG; | |
163 } | |
164 src = *inbuf; | |
165 srclen = (inbytesleft ? *inbytesleft : 0); | |
166 dst = *outbuf; | |
167 dstlen = *outbytesleft; | |
168 | |
169 switch ( cd->src_fmt ) { | |
170 case ENCODING_UTF16: | |
171 /* Scan for a byte order marker */ | |
172 { | |
173 Uint8 *p = (Uint8 *)src; | |
174 size_t n = srclen / 2; | |
175 while ( n ) { | |
176 if ( p[0] == 0xFF && p[1] == 0xFE ) { | |
177 cd->src_fmt = ENCODING_UTF16BE; | |
178 break; | |
179 } else if ( p[0] == 0xFE && p[1] == 0xFF ) { | |
180 cd->src_fmt = ENCODING_UTF16LE; | |
181 break; | |
182 } | |
183 p += 2; | |
184 --n; | |
185 } | |
186 if ( n == 0 ) { | |
187 /* We can't tell, default to host order */ | |
188 cd->src_fmt = ENCODING_UTF16NATIVE; | |
189 } | |
190 } | |
191 break; | |
192 case ENCODING_UTF32: | |
193 /* Scan for a byte order marker */ | |
194 { | |
195 Uint8 *p = (Uint8 *)src; | |
196 size_t n = srclen / 4; | |
197 while ( n ) { | |
198 if ( p[0] == 0xFF && p[1] == 0xFE && | |
199 p[2] == 0x00 && p[3] == 0x00 ) { | |
200 cd->src_fmt = ENCODING_UTF32BE; | |
201 break; | |
202 } else if ( p[0] == 0x00 && p[1] == 0x00 && | |
203 p[2] == 0xFE && p[3] == 0xFF ) { | |
204 cd->src_fmt = ENCODING_UTF32LE; | |
205 break; | |
206 } | |
207 p += 4; | |
208 --n; | |
209 } | |
210 if ( n == 0 ) { | |
211 /* We can't tell, default to host order */ | |
212 cd->src_fmt = ENCODING_UTF32NATIVE; | |
213 } | |
214 } | |
215 break; | |
216 } | |
217 | |
218 switch ( cd->dst_fmt ) { | |
219 case ENCODING_UTF16: | |
220 /* Default to host order, need to add byte order marker */ | |
221 if ( dstlen < 2 ) { | |
222 return SDL_ICONV_E2BIG; | |
223 } | |
224 *(Uint16 *)dst = UNICODE_BOM; | |
225 dst += 2; | |
226 dstlen -= 2; | |
227 cd->dst_fmt = ENCODING_UTF16NATIVE; | |
228 break; | |
229 case ENCODING_UTF32: | |
230 /* Default to host order, need to add byte order marker */ | |
231 if ( dstlen < 4 ) { | |
232 return SDL_ICONV_E2BIG; | |
233 } | |
234 *(Uint32 *)dst = UNICODE_BOM; | |
235 dst += 4; | |
236 dstlen -= 4; | |
237 cd->dst_fmt = ENCODING_UTF32NATIVE; | |
238 break; | |
239 } | |
240 | |
241 total = 0; | |
242 while ( srclen > 0 ) { | |
243 /* Decode a character */ | |
244 switch ( cd->src_fmt ) { | |
245 case ENCODING_ASCII: | |
246 { | |
247 Uint8 *p = (Uint8 *)src; | |
248 ch = (Uint32)(p[0] & 0x7F); | |
249 ++src; | |
250 --srclen; | |
251 } | |
252 break; | |
253 case ENCODING_LATIN1: | |
254 { | |
255 Uint8 *p = (Uint8 *)src; | |
256 ch = (Uint32)p[0]; | |
257 ++src; | |
258 --srclen; | |
259 } | |
260 break; | |
261 case ENCODING_UTF8: /* RFC 3629 */ | |
262 { | |
263 Uint8 *p = (Uint8 *)src; | |
264 size_t left = 0; | |
265 SDL_bool overlong = SDL_FALSE; | |
266 if ( p[0] >= 0xFC ) { | |
267 if ( (p[0] & 0xFE) != 0xFC ) { | |
268 /* Skip illegal sequences | |
269 return SDL_ICONV_EILSEQ; | |
270 */ | |
271 ch = UNKNOWN_UNICODE; | |
272 } else { | |
273 if ( p[0] == 0xFC ) { | |
274 overlong = SDL_TRUE; | |
275 } | |
276 ch = (Uint32)(p[0] & 0x01); | |
277 left = 5; | |
278 } | |
279 } else if ( p[0] >= 0xF8 ) { | |
280 if ( (p[0] & 0xFC) != 0xF8 ) { | |
281 /* Skip illegal sequences | |
282 return SDL_ICONV_EILSEQ; | |
283 */ | |
284 ch = UNKNOWN_UNICODE; | |
285 } else { | |
286 if ( p[0] == 0xF8 ) { | |
287 overlong = SDL_TRUE; | |
288 } | |
289 ch = (Uint32)(p[0] & 0x03); | |
290 left = 4; | |
291 } | |
292 } else if ( p[0] >= 0xF0 ) { | |
293 if ( (p[0] & 0xF8) != 0xF0 ) { | |
294 /* Skip illegal sequences | |
295 return SDL_ICONV_EILSEQ; | |
296 */ | |
297 ch = UNKNOWN_UNICODE; | |
298 } else { | |
299 if ( p[0] == 0xF0 ) { | |
300 overlong = SDL_TRUE; | |
301 } | |
302 ch = (Uint32)(p[0] & 0x07); | |
303 left = 3; | |
304 } | |
305 } else if ( p[0] >= 0xE0 ) { | |
306 if ( (p[0] & 0xF0) != 0xE0 ) { | |
307 /* Skip illegal sequences | |
308 return SDL_ICONV_EILSEQ; | |
309 */ | |
310 ch = UNKNOWN_UNICODE; | |
311 } else { | |
312 if ( p[0] == 0xE0 ) { | |
313 overlong = SDL_TRUE; | |
314 } | |
315 ch = (Uint32)(p[0] & 0x0F); | |
316 left = 2; | |
317 } | |
318 } else if ( p[0] >= 0xC0 ) { | |
319 if ( (p[0] & 0xE0) != 0xC0 ) { | |
320 /* Skip illegal sequences | |
321 return SDL_ICONV_EILSEQ; | |
322 */ | |
323 ch = UNKNOWN_UNICODE; | |
324 } else { | |
325 if ( (p[0] & 0xCE) == 0xC0 ) { | |
326 overlong = SDL_TRUE; | |
327 } | |
328 ch = (Uint32)(p[0] & 0x1F); | |
329 left = 1; | |
330 } | |
331 } else { | |
332 if ( (p[0] & 0x80) != 0x00 ) { | |
333 /* Skip illegal sequences | |
334 return SDL_ICONV_EILSEQ; | |
335 */ | |
336 ch = UNKNOWN_UNICODE; | |
337 } else { | |
338 ch = (Uint32)p[0]; | |
339 } | |
340 } | |
341 ++src; | |
342 --srclen; | |
343 if ( srclen < left ) { | |
344 return SDL_ICONV_EINVAL; | |
345 } | |
346 while ( left-- ) { | |
347 ++p; | |
348 if ( (p[0] & 0xC0) != 0x80 ) { | |
349 /* Skip illegal sequences | |
350 return SDL_ICONV_EILSEQ; | |
351 */ | |
352 ch = UNKNOWN_UNICODE; | |
353 break; | |
354 } | |
355 ch <<= 6; | |
356 ch |= (p[0] & 0x3F); | |
357 ++src; | |
358 --srclen; | |
359 } | |
360 if ( overlong ) { | |
361 /* Potential security risk | |
362 return SDL_ICONV_EILSEQ; | |
363 */ | |
364 ch = UNKNOWN_UNICODE; | |
365 } | |
366 if ( (ch >= 0xD800 && ch <= 0xDFFF) || | |
367 (ch == 0xFFFE || ch == 0xFFFF) ) { | |
368 /* Skip illegal sequences | |
369 return SDL_ICONV_EILSEQ; | |
370 */ | |
371 ch = UNKNOWN_UNICODE; | |
372 } | |
373 } | |
374 break; | |
375 case ENCODING_UTF16BE: /* RFC 2781 */ | |
376 { | |
377 Uint8 *p = (Uint8 *)src; | |
378 Uint16 W1, W2; | |
379 if ( srclen < 2 ) { | |
380 return SDL_ICONV_EINVAL; | |
381 } | |
382 W1 = ((Uint32)p[0] << 8) | | |
383 (Uint32)p[1]; | |
384 src += 2; | |
385 srclen -= 2; | |
386 if ( W1 < 0xD800 || W1 > 0xDFFF ) { | |
387 ch = (Uint32)W1; | |
388 break; | |
389 } | |
390 if ( W1 > 0xDBFF ) { | |
391 /* Skip illegal sequences | |
392 return SDL_ICONV_EILSEQ; | |
393 */ | |
394 ch = UNKNOWN_UNICODE; | |
395 break; | |
396 } | |
397 if ( srclen < 2 ) { | |
398 return SDL_ICONV_EINVAL; | |
399 } | |
400 p = src; | |
401 W2 = ((Uint32)p[0] << 8) | | |
402 (Uint32)p[1]; | |
403 src += 2; | |
404 srclen -= 2; | |
405 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { | |
406 /* Skip illegal sequences | |
407 return SDL_ICONV_EILSEQ; | |
408 */ | |
409 ch = UNKNOWN_UNICODE; | |
410 break; | |
411 } | |
412 ch = (((Uint32)(W1 & 0x3FF) << 10) | | |
413 (Uint32)(W2 & 0x3FF)) + 0x10000; | |
414 } | |
415 break; | |
416 case ENCODING_UTF16LE: /* RFC 2781 */ | |
417 { | |
418 Uint8 *p = (Uint8 *)src; | |
419 Uint16 W1, W2; | |
420 if ( srclen < 2 ) { | |
421 return SDL_ICONV_EINVAL; | |
422 } | |
423 W1 = ((Uint32)p[1] << 8) | | |
424 (Uint32)p[0]; | |
425 src += 2; | |
426 srclen -= 2; | |
427 if ( W1 < 0xD800 || W1 > 0xDFFF ) { | |
428 ch = (Uint32)W1; | |
429 break; | |
430 } | |
431 if ( W1 > 0xDBFF ) { | |
432 /* Skip illegal sequences | |
433 return SDL_ICONV_EILSEQ; | |
434 */ | |
435 ch = UNKNOWN_UNICODE; | |
436 break; | |
437 } | |
438 if ( srclen < 2 ) { | |
439 return SDL_ICONV_EINVAL; | |
440 } | |
441 p = src; | |
442 W2 = ((Uint32)p[1] << 8) | | |
443 (Uint32)p[0]; | |
444 src += 2; | |
445 srclen -= 2; | |
446 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { | |
447 /* Skip illegal sequences | |
448 return SDL_ICONV_EILSEQ; | |
449 */ | |
450 ch = UNKNOWN_UNICODE; | |
451 break; | |
452 } | |
453 ch = (((Uint32)(W1 & 0x3FF) << 10) | | |
454 (Uint32)(W2 & 0x3FF)) + 0x10000; | |
455 } | |
456 break; | |
457 case ENCODING_UTF32BE: | |
458 { | |
459 Uint8 *p = (Uint8 *)src; | |
460 if ( srclen < 4 ) { | |
461 return SDL_ICONV_EINVAL; | |
462 } | |
463 ch = ((Uint32)p[0] << 24) | | |
464 ((Uint32)p[1] << 16) | | |
465 ((Uint32)p[2] << 8) | | |
466 (Uint32)p[3]; | |
467 src += 4; | |
468 srclen -= 4; | |
469 } | |
470 break; | |
471 case ENCODING_UTF32LE: | |
472 { | |
473 Uint8 *p = (Uint8 *)src; | |
474 if ( srclen < 4 ) { | |
475 return SDL_ICONV_EINVAL; | |
476 } | |
477 ch = ((Uint32)p[3] << 24) | | |
478 ((Uint32)p[2] << 16) | | |
479 ((Uint32)p[1] << 8) | | |
480 (Uint32)p[0]; | |
481 src += 4; | |
482 srclen -= 4; | |
483 } | |
484 break; | |
485 case ENCODING_UCS2: | |
486 { | |
487 Uint16 *p = (Uint16 *)src; | |
488 if ( srclen < 2 ) { | |
489 return SDL_ICONV_EINVAL; | |
490 } | |
491 ch = *p; | |
492 src += 2; | |
493 srclen -= 2; | |
494 } | |
495 break; | |
496 case ENCODING_UCS4: | |
497 { | |
498 Uint32 *p = (Uint32 *)src; | |
499 if ( srclen < 4 ) { | |
500 return SDL_ICONV_EINVAL; | |
501 } | |
502 ch = *p; | |
503 src += 4; | |
504 srclen -= 4; | |
505 } | |
506 break; | |
507 } | |
508 | |
509 /* Encode a character */ | |
510 switch ( cd->dst_fmt ) { | |
511 case ENCODING_ASCII: | |
512 { | |
513 Uint8 *p = (Uint8 *)dst; | |
514 if ( dstlen < 1 ) { | |
515 return SDL_ICONV_E2BIG; | |
516 } | |
517 if ( ch > 0x7F ) { | |
518 *p = UNKNOWN_ASCII; | |
519 } else { | |
520 *p = (Uint8)ch; | |
521 } | |
522 ++dst; | |
523 --dstlen; | |
524 } | |
525 break; | |
526 case ENCODING_LATIN1: | |
527 { | |
528 Uint8 *p = (Uint8 *)dst; | |
529 if ( dstlen < 1 ) { | |
530 return SDL_ICONV_E2BIG; | |
531 } | |
532 if ( ch > 0xFF ) { | |
533 *p = UNKNOWN_ASCII; | |
534 } else { | |
535 *p = (Uint8)ch; | |
536 } | |
537 ++dst; | |
538 --dstlen; | |
539 } | |
540 break; | |
541 case ENCODING_UTF8: /* RFC 3629 */ | |
542 { | |
543 Uint8 *p = (Uint8 *)dst; | |
544 if ( ch > 0x7FFFFFFF ) { | |
545 ch = UNKNOWN_UNICODE; | |
546 } | |
547 if ( ch <= 0x7F ) { | |
548 if ( dstlen < 1 ) { | |
549 return SDL_ICONV_E2BIG; | |
550 } | |
551 *p = (Uint8)ch; | |
552 ++dst; | |
553 --dstlen; | |
554 } else if ( ch <= 0x7FF ) { | |
555 if ( dstlen < 2 ) { | |
556 return SDL_ICONV_E2BIG; | |
557 } | |
558 p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); | |
559 p[1] = 0x80 | (Uint8)(ch & 0x3F); | |
560 dst += 2; | |
561 dstlen -= 2; | |
562 } else if ( ch <= 0xFFFF ) { | |
563 if ( dstlen < 3 ) { | |
564 return SDL_ICONV_E2BIG; | |
565 } | |
566 p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); | |
567 p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); | |
568 p[2] = 0x80 | (Uint8)(ch & 0x3F); | |
569 dst += 3; | |
570 dstlen -= 3; | |
571 } else if ( ch <= 0x1FFFFF ) { | |
572 if ( dstlen < 4 ) { | |
573 return SDL_ICONV_E2BIG; | |
574 } | |
575 p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); | |
576 p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); | |
577 p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); | |
578 p[3] = 0x80 | (Uint8)(ch & 0x3F); | |
579 dst += 4; | |
580 dstlen -= 4; | |
581 } else if ( ch <= 0x3FFFFFF ) { | |
582 if ( dstlen < 5 ) { | |
583 return SDL_ICONV_E2BIG; | |
584 } | |
585 p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); | |
586 p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); | |
587 p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); | |
588 p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); | |
589 p[4] = 0x80 | (Uint8)(ch & 0x3F); | |
590 dst += 5; | |
591 dstlen -= 5; | |
592 } else { | |
593 if ( dstlen < 6 ) { | |
594 return SDL_ICONV_E2BIG; | |
595 } | |
596 p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); | |
597 p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); | |
598 p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); | |
599 p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); | |
600 p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); | |
601 p[5] = 0x80 | (Uint8)(ch & 0x3F); | |
602 dst += 6; | |
603 dstlen -= 6; | |
604 } | |
605 } | |
606 break; | |
607 case ENCODING_UTF16BE: /* RFC 2781 */ | |
608 { | |
609 Uint8 *p = (Uint8 *)dst; | |
610 if ( ch > 0x10FFFF ) { | |
611 ch = UNKNOWN_UNICODE; | |
612 } | |
613 if ( ch < 0x10000 ) { | |
614 if ( dstlen < 2 ) { | |
615 return SDL_ICONV_E2BIG; | |
616 } | |
617 p[0] = (Uint8)(ch >> 8); | |
618 p[1] = (Uint8)ch; | |
619 dst += 2; | |
620 dstlen -= 2; | |
621 } else { | |
622 Uint16 W1, W2; | |
623 if ( dstlen < 4 ) { | |
624 return SDL_ICONV_E2BIG; | |
625 } | |
626 ch = ch - 0x10000; | |
627 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); | |
628 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); | |
629 p[0] = (Uint8)(W1 >> 8); | |
630 p[1] = (Uint8)W1; | |
631 p[2] = (Uint8)(W2 >> 8); | |
632 p[3] = (Uint8)W2; | |
633 dst += 4; | |
634 dstlen -= 4; | |
635 } | |
636 } | |
637 break; | |
638 case ENCODING_UTF16LE: /* RFC 2781 */ | |
639 { | |
640 Uint8 *p = (Uint8 *)dst; | |
641 if ( ch > 0x10FFFF ) { | |
642 ch = UNKNOWN_UNICODE; | |
643 } | |
644 if ( ch < 0x10000 ) { | |
645 if ( dstlen < 2 ) { | |
646 return SDL_ICONV_E2BIG; | |
647 } | |
648 p[1] = (Uint8)(ch >> 8); | |
649 p[0] = (Uint8)ch; | |
650 dst += 2; | |
651 dstlen -= 2; | |
652 } else { | |
653 Uint16 W1, W2; | |
654 if ( dstlen < 4 ) { | |
655 return SDL_ICONV_E2BIG; | |
656 } | |
657 ch = ch - 0x10000; | |
658 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); | |
659 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); | |
660 p[1] = (Uint8)(W1 >> 8); | |
661 p[0] = (Uint8)W1; | |
662 p[3] = (Uint8)(W2 >> 8); | |
663 p[2] = (Uint8)W2; | |
664 dst += 4; | |
665 dstlen -= 4; | |
666 } | |
667 } | |
668 break; | |
669 case ENCODING_UTF32BE: | |
670 { | |
671 Uint8 *p = (Uint8 *)dst; | |
672 if ( ch > 0x7FFFFFFF ) { | |
673 ch = UNKNOWN_UNICODE; | |
674 } | |
675 if ( dstlen < 4 ) { | |
676 return SDL_ICONV_E2BIG; | |
677 } | |
678 p[0] = (Uint8)(ch >> 24); | |
679 p[1] = (Uint8)(ch >> 16); | |
680 p[2] = (Uint8)(ch >> 8); | |
681 p[3] = (Uint8)ch; | |
682 dst += 4; | |
683 dstlen -= 4; | |
684 } | |
685 break; | |
686 case ENCODING_UTF32LE: | |
687 { | |
688 Uint8 *p = (Uint8 *)dst; | |
689 if ( ch > 0x7FFFFFFF ) { | |
690 ch = UNKNOWN_UNICODE; | |
691 } | |
692 if ( dstlen < 4 ) { | |
693 return SDL_ICONV_E2BIG; | |
694 } | |
695 p[3] = (Uint8)(ch >> 24); | |
696 p[2] = (Uint8)(ch >> 16); | |
697 p[1] = (Uint8)(ch >> 8); | |
698 p[0] = (Uint8)ch; | |
699 dst += 4; | |
700 dstlen -= 4; | |
701 } | |
702 break; | |
703 case ENCODING_UCS2: | |
704 { | |
705 Uint16 *p = (Uint16 *)dst; | |
706 if ( ch > 0xFFFF ) { | |
707 ch = UNKNOWN_UNICODE; | |
708 } | |
709 if ( dstlen < 2 ) { | |
710 return SDL_ICONV_E2BIG; | |
711 } | |
712 *p = (Uint16)ch; | |
713 dst += 2; | |
714 dstlen -= 2; | |
715 } | |
716 break; | |
717 case ENCODING_UCS4: | |
718 { | |
719 Uint32 *p = (Uint32 *)dst; | |
720 if ( ch > 0x7FFFFFFF ) { | |
721 ch = UNKNOWN_UNICODE; | |
722 } | |
723 if ( dstlen < 4 ) { | |
724 return SDL_ICONV_E2BIG; | |
725 } | |
726 *p = ch; | |
727 dst += 4; | |
728 dstlen -= 4; | |
729 } | |
730 break; | |
731 } | |
732 | |
733 /* Update state */ | |
734 *inbuf = src; | |
735 *inbytesleft = srclen; | |
736 *outbuf = dst; | |
737 *outbytesleft = dstlen; | |
738 ++total; | |
739 } | |
740 return total; | |
741 } | |
742 | |
743 int SDL_iconv_close(SDL_iconv_t cd) | |
744 { | |
745 if ( cd && cd != (SDL_iconv_t)-1 ) { | |
746 SDL_free(cd); | |
747 } | |
748 return 0; | |
749 } | |
750 | |
751 #endif /* !HAVE_ICONV */ | |
752 | |
753 char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft) | |
754 { | |
755 SDL_iconv_t cd; | |
756 char *string; | |
757 size_t stringsize; | |
758 char *outbuf; | |
759 size_t outbytesleft; | |
760 size_t retCode = 0; | |
761 | |
762 cd = SDL_iconv_open(tocode, fromcode); | |
763 if ( cd == (SDL_iconv_t)-1 ) { | |
764 return NULL; | |
765 } | |
766 | |
767 stringsize = inbytesleft > 4 ? inbytesleft : 4; | |
768 string = SDL_malloc(stringsize); | |
769 if ( !string ) { | |
770 SDL_iconv_close(cd); | |
771 return NULL; | |
772 } | |
773 outbuf = string; | |
774 outbytesleft = stringsize; | |
775 SDL_memset(outbuf, 0, 4); | |
776 | |
777 while ( inbytesleft > 0 ) { | |
778 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); | |
779 switch (retCode) { | |
780 case SDL_ICONV_E2BIG: | |
781 { | |
782 char *oldstring = string; | |
783 stringsize *= 2; | |
784 string = SDL_realloc(string, stringsize); | |
785 if ( !string ) { | |
786 SDL_iconv_close(cd); | |
787 return NULL; | |
788 } | |
789 outbuf = string + (outbuf - oldstring); | |
790 outbytesleft = stringsize - (outbuf - string); | |
791 SDL_memset(outbuf, 0, 4); | |
792 } | |
793 break; | |
794 case SDL_ICONV_EILSEQ: | |
795 /* Try skipping some input data - not perfect, but... */ | |
796 ++inbuf; | |
797 --inbytesleft; | |
798 break; | |
799 case SDL_ICONV_EINVAL: | |
800 case SDL_ICONV_ERROR: | |
801 /* We can't continue... */ | |
802 inbytesleft = 0; | |
803 break; | |
804 } | |
805 } | |
806 SDL_iconv_close(cd); | |
807 | |
808 return string; | |
809 } |