Mercurial > sdl-ios-xcode
changeset 1501:73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 13 Mar 2006 01:08:00 +0000 |
parents | f58c88a4dff5 |
children | d403a39389da |
files | configure.in include/SDL_config.h.in include/SDL_stdinc.h src/stdlib/SDL_iconv.c src/stdlib/SDL_string.c test/Makefile.in test/testiconv.c test/utf8.txt |
diffstat | 8 files changed, 958 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/configure.in Sun Mar 12 01:47:23 2006 +0000 +++ b/configure.in Mon Mar 13 01:08:00 2006 +0000 @@ -94,7 +94,7 @@ dnl Check for C library headers AC_HEADER_STDC - AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h signal.h) + AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h iconv.h signal.h) dnl Check for typedefs, structures, etc. AC_TYPE_SIZE_T @@ -116,7 +116,7 @@ if test x$ac_cv_func_strtod = xyes; then AC_DEFINE(HAVE_STRTOD) fi - AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp sscanf snprintf vsnprintf sigaction setjmp nanosleep) + AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep) AC_CHECK_LIB(m, pow, [BUILD_LDFLAGS="$BUILD_LDFLAGS -lm"]) fi
--- a/include/SDL_config.h.in Sun Mar 12 01:47:23 2006 +0000 +++ b/include/SDL_config.h.in Mon Mar 13 01:08:00 2006 +0000 @@ -68,6 +68,7 @@ #undef HAVE_STDINT_H #undef HAVE_CTYPE_H #undef HAVE_MATH_H +#undef HAVE_ICONV_H #undef HAVE_SIGNAL_H #undef HAVE_ALTIVEC_H @@ -118,9 +119,11 @@ #undef HAVE_STRNCMP #undef HAVE_STRICMP #undef HAVE_STRCASECMP +#undef HAVE_STRNCASECMP #undef HAVE_SSCANF #undef HAVE_SNPRINTF #undef HAVE_VSNPRINTF +#undef HAVE_ICONV #undef HAVE_SIGACTION #undef HAVE_SETJMP #undef HAVE_NANOSLEEP
--- a/include/SDL_stdinc.h Sun Mar 12 01:47:23 2006 +0000 +++ b/include/SDL_stdinc.h Mon Mar 13 01:08:00 2006 +0000 @@ -70,6 +70,9 @@ #if HAVE_CTYPE_H # include <ctype.h> #endif +#if HAVE_ICONV_H +# include <iconv.h> +#endif /* The number of elements in an array */ #define SDL_arraysize(array) (sizeof(array)/sizeof(array[0])) @@ -518,6 +521,12 @@ extern DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str2); #endif +#if HAVE_STRNCASECMP +#define SDL_strncasecmp strncasecmp +#else +extern DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen); +#endif + #if HAVE_SSCANF #define SDL_sscanf sscanf #else @@ -536,6 +545,32 @@ extern DECLSPEC int SDLCALL SDL_vsnprintf(char *text, size_t maxlen, const char *fmt, va_list ap); #endif +/* The SDL implementation of iconv() returns these error codes */ +#define SDL_ICONV_ERROR (size_t)-1 +#define SDL_ICONV_E2BIG (size_t)-2 +#define SDL_ICONV_EILSEQ (size_t)-3 +#define SDL_ICONV_EINVAL (size_t)-4 + +#if HAVE_ICONV +#define SDL_iconv_t iconv_t +#define SDL_iconv_open iconv_open +#define SDL_iconv_close iconv_close +extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +#else +typedef struct _SDL_iconv_t *SDL_iconv_t; +extern DECLSPEC SDL_iconv_t SDLCALL SDL_iconv_open(const char *tocode, const char *fromcode); +extern DECLSPEC int SDLCALL SDL_iconv_close(SDL_iconv_t cd); +extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +#endif +/* This function converts a string between encodings in one pass, returning a + string that must be freed with SDL_free() or NULL on error. +*/ +extern DECLSPEC char * SDLCALL SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft); +#define SDL_iconv_utf8_ascii(S) SDL_iconv_string("ASCII", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_latin1(S) SDL_iconv_string("LATIN1", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1) + /* Ends C function definitions when using C++ */ #ifdef __cplusplus }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/stdlib/SDL_iconv.c Mon Mar 13 01:08:00 2006 +0000 @@ -0,0 +1,809 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +/* This file contains portable iconv functions for SDL */ + +#include "SDL_stdinc.h" +#include "SDL_endian.h" + +#ifdef HAVE_ICONV + +#include <errno.h> + +size_t SDL_iconv(SDL_iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); + if ( retCode == (size_t)-1 ) { + switch(errno) { + case E2BIG: + return SDL_ICONV_E2BIG; + case EILSEQ: + return SDL_ICONV_EILSEQ; + case EINVAL: + return SDL_ICONV_EINVAL; + default: + return SDL_ICONV_ERROR; + } + } + return retCode; +} + +#else + +#define UNICODE_BOM 0xFEFF + +#define UNKNOWN_ASCII '?' +#define UNKNOWN_UNICODE 0xFFFD + +enum { + ENCODING_UNKNOWN, + ENCODING_ASCII, + ENCODING_LATIN1, + ENCODING_UTF8, + ENCODING_UTF16, /* Needs byte order marker */ + ENCODING_UTF16BE, + ENCODING_UTF16LE, + ENCODING_UTF32, /* Needs byte order marker */ + ENCODING_UTF32BE, + ENCODING_UTF32LE, + ENCODING_UCS2, /* Native byte order assumed */ + ENCODING_UCS4, /* Native byte order assumed */ +}; +#if SDL_BYTEORDER == SDL_BIG_ENDIAN +#define ENCODING_UTF16NATIVE ENCODING_UTF16BE +#define ENCODING_UTF32NATIVE ENCODING_UTF32BE +#else +#define ENCODING_UTF16NATIVE ENCODING_UTF16LE +#define ENCODING_UTF32NATIVE ENCODING_UTF32LE +#endif + +struct _SDL_iconv_t +{ + int src_fmt; + int dst_fmt; +}; + +static struct { + const char *name; + int format; +} encodings[] = { + { "ASCII", ENCODING_ASCII }, + { "US-ASCII", ENCODING_ASCII }, + { "LATIN1", ENCODING_LATIN1 }, + { "ISO-8859-1", ENCODING_LATIN1 }, + { "UTF8", ENCODING_UTF8 }, + { "UTF-8", ENCODING_UTF8 }, + { "UTF16", ENCODING_UTF16 }, + { "UTF-16", ENCODING_UTF16 }, + { "UTF16BE", ENCODING_UTF16BE }, + { "UTF-16BE", ENCODING_UTF16BE }, + { "UTF16LE", ENCODING_UTF16LE }, + { "UTF-16LE", ENCODING_UTF16LE }, + { "UTF32", ENCODING_UTF32 }, + { "UTF-32", ENCODING_UTF32 }, + { "UTF32BE", ENCODING_UTF32BE }, + { "UTF-32BE", ENCODING_UTF32BE }, + { "UTF32LE", ENCODING_UTF32LE }, + { "UTF-32LE", ENCODING_UTF32LE }, + { "UCS2", ENCODING_UCS2 }, + { "UCS-2", ENCODING_UCS2 }, + { "UCS4", ENCODING_UCS4 }, + { "UCS-4", ENCODING_UCS4 }, +}; + +SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) +{ + int src_fmt = ENCODING_UNKNOWN; + int dst_fmt = ENCODING_UNKNOWN; + int i; + + for ( i = 0; i < SDL_arraysize(encodings); ++i ) { + if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { + src_fmt = encodings[i].format; + if ( dst_fmt != ENCODING_UNKNOWN ) { + break; + } + } + if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { + dst_fmt = encodings[i].format; + if ( src_fmt != ENCODING_UNKNOWN ) { + break; + } + } + } + if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { + SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); + if ( cd ) { + cd->src_fmt = src_fmt; + cd->dst_fmt = dst_fmt; + return cd; + } + } + return (SDL_iconv_t)-1; +} + +size_t SDL_iconv(SDL_iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + /* For simplicity, we'll convert everything to and from UCS-4 */ + char *src, *dst; + size_t srclen, dstlen; + Uint32 ch; + size_t total; + + if ( !inbuf || !*inbuf ) { + /* Reset the context */ + return 0; + } + if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { + return SDL_ICONV_E2BIG; + } + src = *inbuf; + srclen = (inbytesleft ? *inbytesleft : 0); + dst = *outbuf; + dstlen = *outbytesleft; + + switch ( cd->src_fmt ) { + case ENCODING_UTF16: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 2; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE ) { + cd->src_fmt = ENCODING_UTF16BE; + break; + } else if ( p[0] == 0xFE && p[1] == 0xFF ) { + cd->src_fmt = ENCODING_UTF16LE; + break; + } + p += 2; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF16NATIVE; + } + } + break; + case ENCODING_UTF32: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 4; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE && + p[2] == 0x00 && p[3] == 0x00 ) { + cd->src_fmt = ENCODING_UTF32BE; + break; + } else if ( p[0] == 0x00 && p[1] == 0x00 && + p[2] == 0xFE && p[3] == 0xFF ) { + cd->src_fmt = ENCODING_UTF32LE; + break; + } + p += 4; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF32NATIVE; + } + } + break; + } + + switch ( cd->dst_fmt ) { + case ENCODING_UTF16: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + *(Uint16 *)dst = UNICODE_BOM; + dst += 2; + dstlen -= 2; + cd->dst_fmt = ENCODING_UTF16NATIVE; + break; + case ENCODING_UTF32: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + *(Uint32 *)dst = UNICODE_BOM; + dst += 4; + dstlen -= 4; + cd->dst_fmt = ENCODING_UTF32NATIVE; + break; + } + + total = 0; + while ( srclen > 0 ) { + /* Decode a character */ + switch ( cd->src_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)(p[0] & 0x7F); + ++src; + --srclen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)p[0]; + ++src; + --srclen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)src; + size_t left = 0; + SDL_bool overlong = SDL_FALSE; + if ( p[0] >= 0xFC ) { + if ( (p[0] & 0xFE) != 0xFC ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xFC ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x01); + left = 5; + } + } else if ( p[0] >= 0xF8 ) { + if ( (p[0] & 0xFC) != 0xF8 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF8 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x03); + left = 4; + } + } else if ( p[0] >= 0xF0 ) { + if ( (p[0] & 0xF8) != 0xF0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x07); + left = 3; + } + } else if ( p[0] >= 0xE0 ) { + if ( (p[0] & 0xF0) != 0xE0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xE0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x0F); + left = 2; + } + } else if ( p[0] >= 0xC0 ) { + if ( (p[0] & 0xE0) != 0xC0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( (p[0] & 0xCE) == 0xC0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x1F); + left = 1; + } + } else { + if ( (p[0] & 0x80) != 0x00 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + ch = (Uint32)p[0]; + } + } + ++src; + --srclen; + if ( srclen < left ) { + return SDL_ICONV_EINVAL; + } + while ( left-- ) { + ++p; + if ( (p[0] & 0xC0) != 0x80 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch <<= 6; + ch |= (p[0] & 0x3F); + ++src; + --srclen; + } + if ( overlong ) { + /* Potential security risk + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + if ( (ch >= 0xD800 && ch <= 0xDFFF) || + (ch == 0xFFFE || ch == 0xFFFF) ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + W1 = ((Uint32)p[0] << 8) | + (Uint32)p[1]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + p = src; + W2 = ((Uint32)p[0] << 8) | + (Uint32)p[1]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + W1 = ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + p = src; + W2 = ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32)p[0] << 24) | + ((Uint32)p[1] << 16) | + ((Uint32)p[2] << 8) | + (Uint32)p[3]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32)p[3] << 24) | + ((Uint32)p[2] << 16) | + ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)src; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + ch = *p; + src += 2; + srclen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = *p; + src += 4; + srclen -= 4; + } + break; + } + + /* Encode a character */ + switch ( cd->dst_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + if ( ch > 0x7F ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + if ( ch > 0xFF ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch <= 0x7F ) { + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + *p = (Uint8)ch; + ++dst; + --dstlen; + } else if ( ch <= 0x7FF ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); + p[1] = 0x80 | (Uint8)(ch & 0x3F); + dst += 2; + dstlen -= 2; + } else if ( ch <= 0xFFFF ) { + if ( dstlen < 3 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); + p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[2] = 0x80 | (Uint8)(ch & 0x3F); + dst += 3; + dstlen -= 3; + } else if ( ch <= 0x1FFFFF ) { + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); + p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[3] = 0x80 | (Uint8)(ch & 0x3F); + dst += 4; + dstlen -= 4; + } else if ( ch <= 0x3FFFFFF ) { + if ( dstlen < 5 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); + p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[4] = 0x80 | (Uint8)(ch & 0x3F); + dst += 5; + dstlen -= 5; + } else { + if ( dstlen < 6 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); + p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[5] = 0x80 | (Uint8)(ch & 0x3F); + dst += 6; + dstlen -= 6; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 8); + p[1] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[0] = (Uint8)(W1 >> 8); + p[1] = (Uint8)W1; + p[2] = (Uint8)(W2 >> 8); + p[3] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[1] = (Uint8)(W1 >> 8); + p[0] = (Uint8)W1; + p[3] = (Uint8)(W2 >> 8); + p[2] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 24); + p[1] = (Uint8)(ch >> 16); + p[2] = (Uint8)(ch >> 8); + p[3] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[3] = (Uint8)(ch >> 24); + p[2] = (Uint8)(ch >> 16); + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)dst; + if ( ch > 0xFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + *p = (Uint16)ch; + dst += 2; + dstlen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + *p = ch; + dst += 4; + dstlen -= 4; + } + break; + } + + /* Update state */ + *inbuf = src; + *inbytesleft = srclen; + *outbuf = dst; + *outbytesleft = dstlen; + ++total; + } + return total; +} + +int SDL_iconv_close(SDL_iconv_t cd) +{ + if ( cd && cd != (SDL_iconv_t)-1 ) { + SDL_free(cd); + } + return 0; +} + +#endif /* !HAVE_ICONV */ + +char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft) +{ + SDL_iconv_t cd; + char *string; + size_t stringsize; + char *outbuf; + size_t outbytesleft; + size_t retCode = 0; + + cd = SDL_iconv_open(tocode, fromcode); + if ( cd == (SDL_iconv_t)-1 ) { + return NULL; + } + + stringsize = inbytesleft > 4 ? inbytesleft : 4; + string = SDL_malloc(stringsize); + if ( !string ) { + SDL_iconv_close(cd); + return NULL; + } + outbuf = string; + outbytesleft = stringsize; + SDL_memset(outbuf, 0, 4); + + while ( inbytesleft > 0 ) { + retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + switch (retCode) { + case SDL_ICONV_E2BIG: + { + char *oldstring = string; + stringsize *= 2; + string = SDL_realloc(string, stringsize); + if ( !string ) { + SDL_iconv_close(cd); + return NULL; + } + outbuf = string + (outbuf - oldstring); + outbytesleft = stringsize - (outbuf - string); + SDL_memset(outbuf, 0, 4); + } + break; + case SDL_ICONV_EILSEQ: + /* Try skipping some input data - not perfect, but... */ + ++inbuf; + --inbytesleft; + break; + case SDL_ICONV_EINVAL: + case SDL_ICONV_ERROR: + /* We can't continue... */ + inbytesleft = 0; + break; + } + } + SDL_iconv_close(cd); + + return string; +}
--- a/src/stdlib/SDL_string.c Sun Mar 12 01:47:23 2006 +0000 +++ b/src/stdlib/SDL_string.c Mon Mar 13 01:08:00 2006 +0000 @@ -661,12 +661,12 @@ } #endif -#ifndef HAVE_STRCASECMP +#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP) int SDL_strcasecmp(const char *str1, const char *str2) { char a = 0; char b = 0; - while (*str1 && *str2) { + while ( *str1 && *str2 ) { a = SDL_tolower(*str1); b = SDL_tolower(*str2); if ( a != b ) @@ -678,6 +678,24 @@ } #endif +#ifndef HAVE_STRNCASECMP +int SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen) +{ + char a = 0; + char b = 0; + while ( *str1 && *str2 && maxlen ) { + a = SDL_tolower(*str1); + b = SDL_tolower(*str2); + if ( a != b ) + break; + ++str1; + ++str2; + --maxlen; + } + return (int)((unsigned char)a - (unsigned char)b); +} +#endif + #ifndef HAVE_SSCANF int SDL_sscanf(const char *text, const char *fmt, ...) {
--- a/test/Makefile.in Sun Mar 12 01:47:23 2006 +0000 +++ b/test/Makefile.in Mon Mar 13 01:08:00 2006 +0000 @@ -7,7 +7,7 @@ CFLAGS = @CFLAGS@ LIBS = @LIBS@ -TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE) +TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testiconv$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE) all: $(TARGETS) @@ -50,6 +50,9 @@ testhread$(EXE): $(srcdir)/testhread.c $(CC) -o $@ $? $(CFLAGS) $(LIBS) +testiconv$(EXE): $(srcdir)/testiconv.c + $(CC) -o $@ $? $(CFLAGS) $(LIBS) + testjoystick$(EXE): $(srcdir)/testjoystick.c $(CC) -o $@ $? $(CFLAGS) $(LIBS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/testiconv.c Mon Mar 13 01:08:00 2006 +0000 @@ -0,0 +1,85 @@ + +#include <stdio.h> + +#include "SDL.h" + +static SDL_bool testutf16(char *data) +{ + Uint32 *p = (Uint32 *)data; + while(*p) { + if ( *p > 0x10FFFF ) { + return SDL_FALSE; + } + ++p; + } + return SDL_TRUE; +} + +static size_t widelen(char *data) +{ + size_t len = 0; + Uint32 *p = (Uint32 *)data; + while(*p++) { + ++len; + } + return len; +} + +int main(int argc, char *argv[]) +{ + const char * formats[] = { + "UTF8", + "UTF-8", + "UTF16BE", + "UTF-16BE", + "UTF16LE", + "UTF-16LE", + "UTF32BE", + "UTF-32BE", + "UTF32LE", + "UTF-32LE", + "UCS4", + "UCS-4", + }; + char buffer[BUFSIZ]; + char *ucs4; + char *test[2]; + int i, j, index = 0; + FILE *file; + int errors = 0; + + if ( !argv[1] ) { + argv[1] = "utf8.txt"; + } + file = fopen(argv[1], "rb"); + if ( !file ) { + fprintf(stderr, "Unable to open %s\n", argv[1]); + return (1); + } + + while ( fgets(buffer, sizeof(buffer), file) ) { + /* Convert to UCS-4 */ + ucs4 = SDL_iconv_string("UCS-4", "UTF-8", buffer, SDL_strlen(buffer)+1); + size_t len = (widelen(ucs4)+1)*4; + for ( i = 0; i < SDL_arraysize(formats); ++i ) { + if ( (SDL_strncasecmp(formats[i], "UTF16", 5) == 0 || + SDL_strncasecmp(formats[i], "UTF-16", 6) == 0) && + !testutf16(ucs4) ) { + continue; + } + test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len); + test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len); + if ( SDL_memcmp(test[1], ucs4, len) != 0 ) { + fprintf(stderr, "FAIL: %s\n", formats[i]); + ++errors; + } + SDL_free(test[0]); + SDL_free(test[1]); + } + test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len); + SDL_free(ucs4); + fputs(test[0], stdout); + SDL_free(test[0]); + } + return (errors ? errors + 1 : 0); +}