From f89164b152b8f8c5861bcb7aa2dd0958b847a5b3 Mon Sep 17 00:00:00 2001 From: Sergii Pylypenko Date: Tue, 2 Jul 2019 22:12:26 +0300 Subject: [PATCH] xerces: replaced SDL_iconv with internal iconv to fix linking --- project/jni/xerces/Android.mk | 2 +- project/jni/xerces/iconv/iconv.h | 30 - .../jni/xerces/include/xercesc/iconv/iconv.h | 41 + .../IconvGNU/IconvGNUTransService.hpp | 2 +- project/jni/xerces/src/iconv.c | 859 ++++++++++++++++++ 5 files changed, 902 insertions(+), 32 deletions(-) delete mode 100644 project/jni/xerces/iconv/iconv.h create mode 100644 project/jni/xerces/include/xercesc/iconv/iconv.h create mode 100644 project/jni/xerces/src/iconv.c diff --git a/project/jni/xerces/Android.mk b/project/jni/xerces/Android.mk index 485b034ee..70b332ce2 100644 --- a/project/jni/xerces/Android.mk +++ b/project/jni/xerces/Android.mk @@ -7,7 +7,7 @@ XERCES_SUBDIRS := $(patsubst $(LOCAL_PATH)/%, %, $(shell find $(LOCAL_PATH)/src/ LOCAL_MODULE := xerces LOCAL_C_INCLUDES := $(LOCAL_PATH) $(LOCAL_PATH)/src $(LOCAL_PATH)/include \ - $(LOCAL_PATH)/.. $(LOCAL_PATH)/include/xercesc/util $(LOCAL_PATH)/include/xercesc/util/MsgLoaders/InMemory \ + $(LOCAL_PATH)/include/xercesc/util $(LOCAL_PATH)/include/xercesc/util/MsgLoaders/InMemory \ $(LOCAL_PATH)/include/xercesc/dom/ $(LOCAL_PATH)/include/xercesc/dom/impl \ $(LOCAL_PATH)/include/xercesc/validators/schema/identity $(LOCAL_PATH)/include/xercesc/util/Transcoders/IconvGNU/ \ $(LOCAL_PATH)/include/xercesc/sax diff --git a/project/jni/xerces/iconv/iconv.h b/project/jni/xerces/iconv/iconv.h deleted file mode 100644 index cae44285f..000000000 --- a/project/jni/xerces/iconv/iconv.h +++ /dev/null @@ -1,30 +0,0 @@ -// SDL_iconv >> iconv wrapper dummy -#ifndef DUMMY_ICONV_H -#define DUMMY_ICONV_H - -#undef HAVE_ICONV -#ifdef __cplusplus -extern "C" { -#endif - -// #include // We'll have to rebuild xerces if we'll include this file and change SDL version, so just provide declarations here - -typedef struct _SDL_iconv_t *SDL_iconv_t; - -extern SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode); -extern int SDL_iconv_close(SDL_iconv_t cd); -extern size_t SDL_iconv(SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); -extern char * SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft); - -#define iconv_t SDL_iconv_t -#define iconv SDL_iconv -#define iconv_open SDL_iconv_open -#define iconv_close SDL_iconv_close - -#ifdef __cplusplus -} -#endif - -#define HAVE_ICONV 1 - -#endif diff --git a/project/jni/xerces/include/xercesc/iconv/iconv.h b/project/jni/xerces/include/xercesc/iconv/iconv.h new file mode 100644 index 000000000..4a4496aa4 --- /dev/null +++ b/project/jni/xerces/include/xercesc/iconv/iconv.h @@ -0,0 +1,41 @@ +// iconv wrapper +#ifndef XERCES_DUMMY_ICONV_H +#define XERCES_DUMMY_ICONV_H + +#undef HAVE_ICONV +#ifdef __cplusplus +extern "C" { +#endif + +#define XERCES_ICONV_ERROR (size_t)-1 +#define XERCES_ICONV_E2BIG (size_t)-2 +#define XERCES_ICONV_EILSEQ (size_t)-3 +#define XERCES_ICONV_EINVAL (size_t)-4 +/*@}*/ + + +#define XERCES_iconv_utf8_locale(S) XERCES_iconv_string("", "UTF-8", S, strlen(S)+1) +#define XERCES_iconv_utf8_ucs2(S) (Uint16 *)XERCES_iconv_string("UCS-2", "UTF-8", S, strlen(S)+1) +#define XERCES_iconv_utf8_ucs4(S) (Uint32 *)XERCES_iconv_string("UCS-4", "UTF-8", S, strlen(S)+1) + + + +typedef struct _XERCES_iconv_t *XERCES_iconv_t; + +extern XERCES_iconv_t XERCES_iconv_open(const char *tocode, const char *fromcode); +extern int XERCES_iconv_close(XERCES_iconv_t cd); +extern size_t XERCES_iconv(XERCES_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +extern char * XERCES_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft); + +#define iconv_t XERCES_iconv_t +#define iconv XERCES_iconv +#define iconv_open XERCES_iconv_open +#define iconv_close XERCES_iconv_close + +#ifdef __cplusplus +} +#endif + +#define HAVE_ICONV 1 + +#endif diff --git a/project/jni/xerces/include/xercesc/util/Transcoders/IconvGNU/IconvGNUTransService.hpp b/project/jni/xerces/include/xercesc/util/Transcoders/IconvGNU/IconvGNUTransService.hpp index d7457cb2b..c97587171 100644 --- a/project/jni/xerces/include/xercesc/util/Transcoders/IconvGNU/IconvGNUTransService.hpp +++ b/project/jni/xerces/include/xercesc/util/Transcoders/IconvGNU/IconvGNUTransService.hpp @@ -25,7 +25,7 @@ #include #include -#include +#include XERCES_CPP_NAMESPACE_BEGIN diff --git a/project/jni/xerces/src/iconv.c b/project/jni/xerces/src/iconv.c new file mode 100644 index 000000000..776f97ae6 --- /dev/null +++ b/project/jni/xerces/src/iconv.c @@ -0,0 +1,859 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2009 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include +#include +#include +#include +#include + +/* This file contains portable iconv functions for SDL */ + +#include + +#define XERCES_arraysize(array) (sizeof(array)/sizeof(array[0])) + +typedef int8_t Sint8; +typedef uint8_t Uint8; +typedef int16_t Sint16; +typedef uint16_t Uint16; +typedef int32_t Sint32; +typedef uint32_t Uint32; + + +/* Lots of useful information on Unicode at: + http://www.cl.cam.ac.uk/~mgk25/unicode.html +*/ + +#define UNICODE_BOM 0xFEFF + +#define UNKNOWN_ASCII '?' +#define UNKNOWN_UNICODE 0xFFFD + +enum { + ENCODING_UNKNOWN, + ENCODING_ASCII, + ENCODING_LATIN1, + ENCODING_UTF8, + ENCODING_UTF16, /* Needs byte order marker */ + ENCODING_UTF16BE, + ENCODING_UTF16LE, + ENCODING_UTF32, /* Needs byte order marker */ + ENCODING_UTF32BE, + ENCODING_UTF32LE, + ENCODING_UCS2, /* Native byte order assumed */ + ENCODING_UCS4, /* Native byte order assumed */ +}; +#ifndef BYTE_ORDER +#error "BYTE_ORDER undefined" +#endif +#ifndef BIG_ENDIAN +#error "BIG_ENDIAN undefined" +#endif + +#if BYTE_ORDER == BIG_ENDIAN +#define ENCODING_UTF16NATIVE ENCODING_UTF16BE +#define ENCODING_UTF32NATIVE ENCODING_UTF32BE +#else +#define ENCODING_UTF16NATIVE ENCODING_UTF16LE +#define ENCODING_UTF32NATIVE ENCODING_UTF32LE +#endif + +struct _XERCES_iconv_t +{ + int src_fmt; + int dst_fmt; +}; + +static struct { + const char *name; + int format; +} encodings[] = { + { "ASCII", ENCODING_ASCII }, + { "US-ASCII", ENCODING_ASCII }, + { "8859-1", ENCODING_LATIN1 }, + { "ISO-8859-1", ENCODING_LATIN1 }, + { "UTF8", ENCODING_UTF8 }, + { "UTF-8", ENCODING_UTF8 }, + { "UTF16", ENCODING_UTF16 }, + { "UTF-16", ENCODING_UTF16 }, + { "UTF16BE", ENCODING_UTF16BE }, + { "UTF-16BE", ENCODING_UTF16BE }, + { "UTF16LE", ENCODING_UTF16LE }, + { "UTF-16LE", ENCODING_UTF16LE }, + { "UTF32", ENCODING_UTF32 }, + { "UTF-32", ENCODING_UTF32 }, + { "UTF32BE", ENCODING_UTF32BE }, + { "UTF-32BE", ENCODING_UTF32BE }, + { "UTF32LE", ENCODING_UTF32LE }, + { "UTF-32LE", ENCODING_UTF32LE }, + { "UCS2", ENCODING_UCS2 }, + { "UCS-2", ENCODING_UCS2 }, + { "UCS4", ENCODING_UCS4 }, + { "UCS-4", ENCODING_UCS4 }, +}; + +static const char *getlocale(char *buffer, size_t bufsize) +{ + const char *lang; + char *ptr; + + lang = getenv("LC_ALL"); + if ( !lang ) { + lang = getenv("LC_CTYPE"); + } + if ( !lang ) { + lang = getenv("LC_MESSAGES"); + } + if ( !lang ) { + lang = getenv("LANG"); + } + if ( !lang || !*lang || strcmp(lang, "C") == 0 ) { + lang = "ASCII"; + } + + /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ + ptr = strchr(lang, '.'); + if (ptr != NULL) { + lang = ptr + 1; + } + + strlcpy(buffer, lang, bufsize); + ptr = strchr(buffer, '@'); + if (ptr != NULL) { + *ptr = '\0'; /* chop end of string. */ + } + + return buffer; +} + +XERCES_iconv_t XERCES_iconv_open(const char *tocode, const char *fromcode) +{ + int src_fmt = ENCODING_UNKNOWN; + int dst_fmt = ENCODING_UNKNOWN; + int i; + char fromcode_buffer[64]; + char tocode_buffer[64]; + + if ( !fromcode || !*fromcode ) { + fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); + } + if ( !tocode || !*tocode ) { + tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); + } + for ( i = 0; i < XERCES_arraysize(encodings); ++i ) { + if ( strcasecmp(fromcode, encodings[i].name) == 0 ) { + src_fmt = encodings[i].format; + if ( dst_fmt != ENCODING_UNKNOWN ) { + break; + } + } + if ( strcasecmp(tocode, encodings[i].name) == 0 ) { + dst_fmt = encodings[i].format; + if ( src_fmt != ENCODING_UNKNOWN ) { + break; + } + } + } + if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { + XERCES_iconv_t cd = (XERCES_iconv_t)malloc(sizeof(*cd)); + if ( cd ) { + cd->src_fmt = src_fmt; + cd->dst_fmt = dst_fmt; + return cd; + } + } + return (XERCES_iconv_t)-1; +} + +size_t XERCES_iconv(XERCES_iconv_t cd, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + /* For simplicity, we'll convert everything to and from UCS-4 */ + const char *src; + char *dst; + size_t srclen, dstlen; + Uint32 ch = 0; + size_t total; + + if ( !inbuf || !*inbuf ) { + /* Reset the context */ + return 0; + } + if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { + return XERCES_ICONV_E2BIG; + } + src = *inbuf; + srclen = (inbytesleft ? *inbytesleft : 0); + dst = *outbuf; + dstlen = *outbytesleft; + + switch ( cd->src_fmt ) { + case ENCODING_UTF16: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 2; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE ) { + cd->src_fmt = ENCODING_UTF16BE; + break; + } else if ( p[0] == 0xFE && p[1] == 0xFF ) { + cd->src_fmt = ENCODING_UTF16LE; + break; + } + p += 2; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF16NATIVE; + } + } + break; + case ENCODING_UTF32: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 4; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE && + p[2] == 0x00 && p[3] == 0x00 ) { + cd->src_fmt = ENCODING_UTF32BE; + break; + } else if ( p[0] == 0x00 && p[1] == 0x00 && + p[2] == 0xFE && p[3] == 0xFF ) { + cd->src_fmt = ENCODING_UTF32LE; + break; + } + p += 4; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF32NATIVE; + } + } + break; + } + + switch ( cd->dst_fmt ) { + case ENCODING_UTF16: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 2 ) { + return XERCES_ICONV_E2BIG; + } + *(Uint16 *)dst = UNICODE_BOM; + dst += 2; + dstlen -= 2; + cd->dst_fmt = ENCODING_UTF16NATIVE; + break; + case ENCODING_UTF32: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + *(Uint32 *)dst = UNICODE_BOM; + dst += 4; + dstlen -= 4; + cd->dst_fmt = ENCODING_UTF32NATIVE; + break; + } + + total = 0; + while ( srclen > 0 ) { + /* Decode a character */ + switch ( cd->src_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)(p[0] & 0x7F); + ++src; + --srclen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)p[0]; + ++src; + --srclen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)src; + size_t left = 0; + bool overlong = false; + if ( p[0] >= 0xFC ) { + if ( (p[0] & 0xFE) != 0xFC ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xFC ) { + overlong = true; + } + ch = (Uint32)(p[0] & 0x01); + left = 5; + } + } else if ( p[0] >= 0xF8 ) { + if ( (p[0] & 0xFC) != 0xF8 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF8 ) { + overlong = true; + } + ch = (Uint32)(p[0] & 0x03); + left = 4; + } + } else if ( p[0] >= 0xF0 ) { + if ( (p[0] & 0xF8) != 0xF0 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF0 ) { + overlong = true; + } + ch = (Uint32)(p[0] & 0x07); + left = 3; + } + } else if ( p[0] >= 0xE0 ) { + if ( (p[0] & 0xF0) != 0xE0 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xE0 ) { + overlong = true; + } + ch = (Uint32)(p[0] & 0x0F); + left = 2; + } + } else if ( p[0] >= 0xC0 ) { + if ( (p[0] & 0xE0) != 0xC0 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( (p[0] & 0xDE) == 0xC0 ) { + overlong = true; + } + ch = (Uint32)(p[0] & 0x1F); + left = 1; + } + } else { + if ( (p[0] & 0x80) != 0x00 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + ch = (Uint32)p[0]; + } + } + ++src; + --srclen; + if ( srclen < left ) { + return XERCES_ICONV_EINVAL; + } + while ( left-- ) { + ++p; + if ( (p[0] & 0xC0) != 0x80 ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch <<= 6; + ch |= (p[0] & 0x3F); + ++src; + --srclen; + } + if ( overlong ) { + /* Potential security risk + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + if ( (ch >= 0xD800 && ch <= 0xDFFF) || + (ch == 0xFFFE || ch == 0xFFFF) || + ch > 0x10FFFF ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return XERCES_ICONV_EINVAL; + } + W1 = ((Uint16)p[0] << 8) | + (Uint16)p[1]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return XERCES_ICONV_EINVAL; + } + p = (Uint8 *)src; + W2 = ((Uint16)p[0] << 8) | + (Uint16)p[1]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return XERCES_ICONV_EINVAL; + } + W1 = ((Uint16)p[1] << 8) | + (Uint16)p[0]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return XERCES_ICONV_EINVAL; + } + p = (Uint8 *)src; + W2 = ((Uint16)p[1] << 8) | + (Uint16)p[0]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return XERCES_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return XERCES_ICONV_EINVAL; + } + ch = ((Uint32)p[0] << 24) | + ((Uint32)p[1] << 16) | + ((Uint32)p[2] << 8) | + (Uint32)p[3]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return XERCES_ICONV_EINVAL; + } + ch = ((Uint32)p[3] << 24) | + ((Uint32)p[2] << 16) | + ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)src; + if ( srclen < 2 ) { + return XERCES_ICONV_EINVAL; + } + ch = *p; + src += 2; + srclen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)src; + if ( srclen < 4 ) { + return XERCES_ICONV_EINVAL; + } + ch = *p; + src += 4; + srclen -= 4; + } + break; + } + + /* Encode a character */ + switch ( cd->dst_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return XERCES_ICONV_E2BIG; + } + if ( ch > 0x7F ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return XERCES_ICONV_E2BIG; + } + if ( ch > 0xFF ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch <= 0x7F ) { + if ( dstlen < 1 ) { + return XERCES_ICONV_E2BIG; + } + *p = (Uint8)ch; + ++dst; + --dstlen; + } else if ( ch <= 0x7FF ) { + if ( dstlen < 2 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); + p[1] = 0x80 | (Uint8)(ch & 0x3F); + dst += 2; + dstlen -= 2; + } else if ( ch <= 0xFFFF ) { + if ( dstlen < 3 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); + p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[2] = 0x80 | (Uint8)(ch & 0x3F); + dst += 3; + dstlen -= 3; + } else if ( ch <= 0x1FFFFF ) { + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); + p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[3] = 0x80 | (Uint8)(ch & 0x3F); + dst += 4; + dstlen -= 4; + } else if ( ch <= 0x3FFFFFF ) { + if ( dstlen < 5 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); + p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[4] = 0x80 | (Uint8)(ch & 0x3F); + dst += 5; + dstlen -= 5; + } else { + if ( dstlen < 6 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); + p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[5] = 0x80 | (Uint8)(ch & 0x3F); + dst += 6; + dstlen -= 6; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 8); + p[1] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[0] = (Uint8)(W1 >> 8); + p[1] = (Uint8)W1; + p[2] = (Uint8)(W2 >> 8); + p[3] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return XERCES_ICONV_E2BIG; + } + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[1] = (Uint8)(W1 >> 8); + p[0] = (Uint8)W1; + p[3] = (Uint8)(W2 >> 8); + p[2] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 24); + p[1] = (Uint8)(ch >> 16); + p[2] = (Uint8)(ch >> 8); + p[3] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + p[3] = (Uint8)(ch >> 24); + p[2] = (Uint8)(ch >> 16); + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)dst; + if ( ch > 0xFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 2 ) { + return XERCES_ICONV_E2BIG; + } + *p = (Uint16)ch; + dst += 2; + dstlen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return XERCES_ICONV_E2BIG; + } + *p = ch; + dst += 4; + dstlen -= 4; + } + break; + } + + /* Update state */ + *inbuf = src; + *inbytesleft = srclen; + *outbuf = dst; + *outbytesleft = dstlen; + ++total; + } + return total; +} + +int XERCES_iconv_close(XERCES_iconv_t cd) +{ + if ( cd && cd != (XERCES_iconv_t)-1 ) { + free(cd); + } + return 0; +} + +char *XERCES_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft) +{ + XERCES_iconv_t cd; + char *string; + size_t stringsize; + char *outbuf; + size_t outbytesleft; + size_t retCode = 0; + + cd = XERCES_iconv_open(tocode, fromcode); + if ( cd == (XERCES_iconv_t)-1 ) { + /* See if we can recover here (fixes iconv on Solaris 11) */ + if ( !tocode || !*tocode ) { + tocode = "UTF-8"; + } + if ( !fromcode || !*fromcode ) { + fromcode = "UTF-8"; + } + cd = XERCES_iconv_open(tocode, fromcode); + } + if ( cd == (XERCES_iconv_t)-1 ) { + return NULL; + } + + stringsize = inbytesleft > 4 ? inbytesleft : 4; + string = malloc(stringsize); + if ( !string ) { + XERCES_iconv_close(cd); + return NULL; + } + outbuf = string; + outbytesleft = stringsize; + memset(outbuf, 0, 4); + + while ( inbytesleft > 0 ) { + retCode = XERCES_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + switch (retCode) { + case XERCES_ICONV_E2BIG: + { + char *oldstring = string; + stringsize *= 2; + string = realloc(string, stringsize); + if ( !string ) { + XERCES_iconv_close(cd); + return NULL; + } + outbuf = string + (outbuf - oldstring); + outbytesleft = stringsize - (outbuf - string); + memset(outbuf, 0, 4); + } + break; + case XERCES_ICONV_EILSEQ: + /* Try skipping some input data - not perfect, but... */ + ++inbuf; + --inbytesleft; + break; + case XERCES_ICONV_EINVAL: + case XERCES_ICONV_ERROR: + /* We can't continue... */ + inbytesleft = 0; + break; + } + } + XERCES_iconv_close(cd); + + return string; +}