xerces: replaced SDL_iconv with internal iconv to fix linking

This commit is contained in:
Sergii Pylypenko
2019-07-02 22:12:26 +03:00
parent dd184b184e
commit f89164b152
5 changed files with 902 additions and 32 deletions

View File

@@ -7,7 +7,7 @@ XERCES_SUBDIRS := $(patsubst $(LOCAL_PATH)/%, %, $(shell find $(LOCAL_PATH)/src/
LOCAL_MODULE := xerces
LOCAL_C_INCLUDES := $(LOCAL_PATH) $(LOCAL_PATH)/src $(LOCAL_PATH)/include \
$(LOCAL_PATH)/.. $(LOCAL_PATH)/include/xercesc/util $(LOCAL_PATH)/include/xercesc/util/MsgLoaders/InMemory \
$(LOCAL_PATH)/include/xercesc/util $(LOCAL_PATH)/include/xercesc/util/MsgLoaders/InMemory \
$(LOCAL_PATH)/include/xercesc/dom/ $(LOCAL_PATH)/include/xercesc/dom/impl \
$(LOCAL_PATH)/include/xercesc/validators/schema/identity $(LOCAL_PATH)/include/xercesc/util/Transcoders/IconvGNU/ \
$(LOCAL_PATH)/include/xercesc/sax

View File

@@ -1,30 +0,0 @@
// SDL_iconv >> iconv wrapper dummy
#ifndef DUMMY_ICONV_H
#define DUMMY_ICONV_H
#undef HAVE_ICONV
#ifdef __cplusplus
extern "C" {
#endif
// #include <SDL.h> // We'll have to rebuild xerces if we'll include this file and change SDL version, so just provide declarations here
typedef struct _SDL_iconv_t *SDL_iconv_t;
extern SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode);
extern int SDL_iconv_close(SDL_iconv_t cd);
extern size_t SDL_iconv(SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
extern char * SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft);
#define iconv_t SDL_iconv_t
#define iconv SDL_iconv
#define iconv_open SDL_iconv_open
#define iconv_close SDL_iconv_close
#ifdef __cplusplus
}
#endif
#define HAVE_ICONV 1
#endif

View File

@@ -0,0 +1,41 @@
// iconv wrapper
#ifndef XERCES_DUMMY_ICONV_H
#define XERCES_DUMMY_ICONV_H
#undef HAVE_ICONV
#ifdef __cplusplus
extern "C" {
#endif
#define XERCES_ICONV_ERROR (size_t)-1
#define XERCES_ICONV_E2BIG (size_t)-2
#define XERCES_ICONV_EILSEQ (size_t)-3
#define XERCES_ICONV_EINVAL (size_t)-4
/*@}*/
#define XERCES_iconv_utf8_locale(S) XERCES_iconv_string("", "UTF-8", S, strlen(S)+1)
#define XERCES_iconv_utf8_ucs2(S) (Uint16 *)XERCES_iconv_string("UCS-2", "UTF-8", S, strlen(S)+1)
#define XERCES_iconv_utf8_ucs4(S) (Uint32 *)XERCES_iconv_string("UCS-4", "UTF-8", S, strlen(S)+1)
typedef struct _XERCES_iconv_t *XERCES_iconv_t;
extern XERCES_iconv_t XERCES_iconv_open(const char *tocode, const char *fromcode);
extern int XERCES_iconv_close(XERCES_iconv_t cd);
extern size_t XERCES_iconv(XERCES_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
extern char * XERCES_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft);
#define iconv_t XERCES_iconv_t
#define iconv XERCES_iconv
#define iconv_open XERCES_iconv_open
#define iconv_close XERCES_iconv_close
#ifdef __cplusplus
}
#endif
#define HAVE_ICONV 1
#endif

View File

@@ -25,7 +25,7 @@
#include <xercesc/util/TransService.hpp>
#include <xercesc/util/Mutexes.hpp>
#include <iconv/iconv.h>
#include <xercesc/iconv/iconv.h>
XERCES_CPP_NAMESPACE_BEGIN

View File

@@ -0,0 +1,859 @@
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <endian.h>
#include <stdbool.h>
/* This file contains portable iconv functions for SDL */
#include <xercesc/iconv/iconv.h>
#define XERCES_arraysize(array) (sizeof(array)/sizeof(array[0]))
typedef int8_t Sint8;
typedef uint8_t Uint8;
typedef int16_t Sint16;
typedef uint16_t Uint16;
typedef int32_t Sint32;
typedef uint32_t Uint32;
/* Lots of useful information on Unicode at:
http://www.cl.cam.ac.uk/~mgk25/unicode.html
*/
#define UNICODE_BOM 0xFEFF
#define UNKNOWN_ASCII '?'
#define UNKNOWN_UNICODE 0xFFFD
enum {
ENCODING_UNKNOWN,
ENCODING_ASCII,
ENCODING_LATIN1,
ENCODING_UTF8,
ENCODING_UTF16, /* Needs byte order marker */
ENCODING_UTF16BE,
ENCODING_UTF16LE,
ENCODING_UTF32, /* Needs byte order marker */
ENCODING_UTF32BE,
ENCODING_UTF32LE,
ENCODING_UCS2, /* Native byte order assumed */
ENCODING_UCS4, /* Native byte order assumed */
};
#ifndef BYTE_ORDER
#error "BYTE_ORDER undefined"
#endif
#ifndef BIG_ENDIAN
#error "BIG_ENDIAN undefined"
#endif
#if BYTE_ORDER == BIG_ENDIAN
#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
#else
#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
#endif
struct _XERCES_iconv_t
{
int src_fmt;
int dst_fmt;
};
static struct {
const char *name;
int format;
} encodings[] = {
{ "ASCII", ENCODING_ASCII },
{ "US-ASCII", ENCODING_ASCII },
{ "8859-1", ENCODING_LATIN1 },
{ "ISO-8859-1", ENCODING_LATIN1 },
{ "UTF8", ENCODING_UTF8 },
{ "UTF-8", ENCODING_UTF8 },
{ "UTF16", ENCODING_UTF16 },
{ "UTF-16", ENCODING_UTF16 },
{ "UTF16BE", ENCODING_UTF16BE },
{ "UTF-16BE", ENCODING_UTF16BE },
{ "UTF16LE", ENCODING_UTF16LE },
{ "UTF-16LE", ENCODING_UTF16LE },
{ "UTF32", ENCODING_UTF32 },
{ "UTF-32", ENCODING_UTF32 },
{ "UTF32BE", ENCODING_UTF32BE },
{ "UTF-32BE", ENCODING_UTF32BE },
{ "UTF32LE", ENCODING_UTF32LE },
{ "UTF-32LE", ENCODING_UTF32LE },
{ "UCS2", ENCODING_UCS2 },
{ "UCS-2", ENCODING_UCS2 },
{ "UCS4", ENCODING_UCS4 },
{ "UCS-4", ENCODING_UCS4 },
};
static const char *getlocale(char *buffer, size_t bufsize)
{
const char *lang;
char *ptr;
lang = getenv("LC_ALL");
if ( !lang ) {
lang = getenv("LC_CTYPE");
}
if ( !lang ) {
lang = getenv("LC_MESSAGES");
}
if ( !lang ) {
lang = getenv("LANG");
}
if ( !lang || !*lang || strcmp(lang, "C") == 0 ) {
lang = "ASCII";
}
/* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
ptr = strchr(lang, '.');
if (ptr != NULL) {
lang = ptr + 1;
}
strlcpy(buffer, lang, bufsize);
ptr = strchr(buffer, '@');
if (ptr != NULL) {
*ptr = '\0'; /* chop end of string. */
}
return buffer;
}
XERCES_iconv_t XERCES_iconv_open(const char *tocode, const char *fromcode)
{
int src_fmt = ENCODING_UNKNOWN;
int dst_fmt = ENCODING_UNKNOWN;
int i;
char fromcode_buffer[64];
char tocode_buffer[64];
if ( !fromcode || !*fromcode ) {
fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
}
if ( !tocode || !*tocode ) {
tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
}
for ( i = 0; i < XERCES_arraysize(encodings); ++i ) {
if ( strcasecmp(fromcode, encodings[i].name) == 0 ) {
src_fmt = encodings[i].format;
if ( dst_fmt != ENCODING_UNKNOWN ) {
break;
}
}
if ( strcasecmp(tocode, encodings[i].name) == 0 ) {
dst_fmt = encodings[i].format;
if ( src_fmt != ENCODING_UNKNOWN ) {
break;
}
}
}
if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
XERCES_iconv_t cd = (XERCES_iconv_t)malloc(sizeof(*cd));
if ( cd ) {
cd->src_fmt = src_fmt;
cd->dst_fmt = dst_fmt;
return cd;
}
}
return (XERCES_iconv_t)-1;
}
size_t XERCES_iconv(XERCES_iconv_t cd,
const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
/* For simplicity, we'll convert everything to and from UCS-4 */
const char *src;
char *dst;
size_t srclen, dstlen;
Uint32 ch = 0;
size_t total;
if ( !inbuf || !*inbuf ) {
/* Reset the context */
return 0;
}
if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
return XERCES_ICONV_E2BIG;
}
src = *inbuf;
srclen = (inbytesleft ? *inbytesleft : 0);
dst = *outbuf;
dstlen = *outbytesleft;
switch ( cd->src_fmt ) {
case ENCODING_UTF16:
/* Scan for a byte order marker */
{
Uint8 *p = (Uint8 *)src;
size_t n = srclen / 2;
while ( n ) {
if ( p[0] == 0xFF && p[1] == 0xFE ) {
cd->src_fmt = ENCODING_UTF16BE;
break;
} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
cd->src_fmt = ENCODING_UTF16LE;
break;
}
p += 2;
--n;
}
if ( n == 0 ) {
/* We can't tell, default to host order */
cd->src_fmt = ENCODING_UTF16NATIVE;
}
}
break;
case ENCODING_UTF32:
/* Scan for a byte order marker */
{
Uint8 *p = (Uint8 *)src;
size_t n = srclen / 4;
while ( n ) {
if ( p[0] == 0xFF && p[1] == 0xFE &&
p[2] == 0x00 && p[3] == 0x00 ) {
cd->src_fmt = ENCODING_UTF32BE;
break;
} else if ( p[0] == 0x00 && p[1] == 0x00 &&
p[2] == 0xFE && p[3] == 0xFF ) {
cd->src_fmt = ENCODING_UTF32LE;
break;
}
p += 4;
--n;
}
if ( n == 0 ) {
/* We can't tell, default to host order */
cd->src_fmt = ENCODING_UTF32NATIVE;
}
}
break;
}
switch ( cd->dst_fmt ) {
case ENCODING_UTF16:
/* Default to host order, need to add byte order marker */
if ( dstlen < 2 ) {
return XERCES_ICONV_E2BIG;
}
*(Uint16 *)dst = UNICODE_BOM;
dst += 2;
dstlen -= 2;
cd->dst_fmt = ENCODING_UTF16NATIVE;
break;
case ENCODING_UTF32:
/* Default to host order, need to add byte order marker */
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
*(Uint32 *)dst = UNICODE_BOM;
dst += 4;
dstlen -= 4;
cd->dst_fmt = ENCODING_UTF32NATIVE;
break;
}
total = 0;
while ( srclen > 0 ) {
/* Decode a character */
switch ( cd->src_fmt ) {
case ENCODING_ASCII:
{
Uint8 *p = (Uint8 *)src;
ch = (Uint32)(p[0] & 0x7F);
++src;
--srclen;
}
break;
case ENCODING_LATIN1:
{
Uint8 *p = (Uint8 *)src;
ch = (Uint32)p[0];
++src;
--srclen;
}
break;
case ENCODING_UTF8: /* RFC 3629 */
{
Uint8 *p = (Uint8 *)src;
size_t left = 0;
bool overlong = false;
if ( p[0] >= 0xFC ) {
if ( (p[0] & 0xFE) != 0xFC ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( p[0] == 0xFC ) {
overlong = true;
}
ch = (Uint32)(p[0] & 0x01);
left = 5;
}
} else if ( p[0] >= 0xF8 ) {
if ( (p[0] & 0xFC) != 0xF8 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( p[0] == 0xF8 ) {
overlong = true;
}
ch = (Uint32)(p[0] & 0x03);
left = 4;
}
} else if ( p[0] >= 0xF0 ) {
if ( (p[0] & 0xF8) != 0xF0 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( p[0] == 0xF0 ) {
overlong = true;
}
ch = (Uint32)(p[0] & 0x07);
left = 3;
}
} else if ( p[0] >= 0xE0 ) {
if ( (p[0] & 0xF0) != 0xE0 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( p[0] == 0xE0 ) {
overlong = true;
}
ch = (Uint32)(p[0] & 0x0F);
left = 2;
}
} else if ( p[0] >= 0xC0 ) {
if ( (p[0] & 0xE0) != 0xC0 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( (p[0] & 0xDE) == 0xC0 ) {
overlong = true;
}
ch = (Uint32)(p[0] & 0x1F);
left = 1;
}
} else {
if ( (p[0] & 0x80) != 0x00 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
ch = (Uint32)p[0];
}
}
++src;
--srclen;
if ( srclen < left ) {
return XERCES_ICONV_EINVAL;
}
while ( left-- ) {
++p;
if ( (p[0] & 0xC0) != 0x80 ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
break;
}
ch <<= 6;
ch |= (p[0] & 0x3F);
++src;
--srclen;
}
if ( overlong ) {
/* Potential security risk
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
}
if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
(ch == 0xFFFE || ch == 0xFFFF) ||
ch > 0x10FFFF ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
}
}
break;
case ENCODING_UTF16BE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *)src;
Uint16 W1, W2;
if ( srclen < 2 ) {
return XERCES_ICONV_EINVAL;
}
W1 = ((Uint16)p[0] << 8) |
(Uint16)p[1];
src += 2;
srclen -= 2;
if ( W1 < 0xD800 || W1 > 0xDFFF ) {
ch = (Uint32)W1;
break;
}
if ( W1 > 0xDBFF ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
break;
}
if ( srclen < 2 ) {
return XERCES_ICONV_EINVAL;
}
p = (Uint8 *)src;
W2 = ((Uint16)p[0] << 8) |
(Uint16)p[1];
src += 2;
srclen -= 2;
if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
break;
}
ch = (((Uint32)(W1 & 0x3FF) << 10) |
(Uint32)(W2 & 0x3FF)) + 0x10000;
}
break;
case ENCODING_UTF16LE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *)src;
Uint16 W1, W2;
if ( srclen < 2 ) {
return XERCES_ICONV_EINVAL;
}
W1 = ((Uint16)p[1] << 8) |
(Uint16)p[0];
src += 2;
srclen -= 2;
if ( W1 < 0xD800 || W1 > 0xDFFF ) {
ch = (Uint32)W1;
break;
}
if ( W1 > 0xDBFF ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
break;
}
if ( srclen < 2 ) {
return XERCES_ICONV_EINVAL;
}
p = (Uint8 *)src;
W2 = ((Uint16)p[1] << 8) |
(Uint16)p[0];
src += 2;
srclen -= 2;
if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
/* Skip illegal sequences
return XERCES_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
break;
}
ch = (((Uint32)(W1 & 0x3FF) << 10) |
(Uint32)(W2 & 0x3FF)) + 0x10000;
}
break;
case ENCODING_UTF32BE:
{
Uint8 *p = (Uint8 *)src;
if ( srclen < 4 ) {
return XERCES_ICONV_EINVAL;
}
ch = ((Uint32)p[0] << 24) |
((Uint32)p[1] << 16) |
((Uint32)p[2] << 8) |
(Uint32)p[3];
src += 4;
srclen -= 4;
}
break;
case ENCODING_UTF32LE:
{
Uint8 *p = (Uint8 *)src;
if ( srclen < 4 ) {
return XERCES_ICONV_EINVAL;
}
ch = ((Uint32)p[3] << 24) |
((Uint32)p[2] << 16) |
((Uint32)p[1] << 8) |
(Uint32)p[0];
src += 4;
srclen -= 4;
}
break;
case ENCODING_UCS2:
{
Uint16 *p = (Uint16 *)src;
if ( srclen < 2 ) {
return XERCES_ICONV_EINVAL;
}
ch = *p;
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS4:
{
Uint32 *p = (Uint32 *)src;
if ( srclen < 4 ) {
return XERCES_ICONV_EINVAL;
}
ch = *p;
src += 4;
srclen -= 4;
}
break;
}
/* Encode a character */
switch ( cd->dst_fmt ) {
case ENCODING_ASCII:
{
Uint8 *p = (Uint8 *)dst;
if ( dstlen < 1 ) {
return XERCES_ICONV_E2BIG;
}
if ( ch > 0x7F ) {
*p = UNKNOWN_ASCII;
} else {
*p = (Uint8)ch;
}
++dst;
--dstlen;
}
break;
case ENCODING_LATIN1:
{
Uint8 *p = (Uint8 *)dst;
if ( dstlen < 1 ) {
return XERCES_ICONV_E2BIG;
}
if ( ch > 0xFF ) {
*p = UNKNOWN_ASCII;
} else {
*p = (Uint8)ch;
}
++dst;
--dstlen;
}
break;
case ENCODING_UTF8: /* RFC 3629 */
{
Uint8 *p = (Uint8 *)dst;
if ( ch > 0x10FFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( ch <= 0x7F ) {
if ( dstlen < 1 ) {
return XERCES_ICONV_E2BIG;
}
*p = (Uint8)ch;
++dst;
--dstlen;
} else if ( ch <= 0x7FF ) {
if ( dstlen < 2 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
p[1] = 0x80 | (Uint8)(ch & 0x3F);
dst += 2;
dstlen -= 2;
} else if ( ch <= 0xFFFF ) {
if ( dstlen < 3 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
p[2] = 0x80 | (Uint8)(ch & 0x3F);
dst += 3;
dstlen -= 3;
} else if ( ch <= 0x1FFFFF ) {
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
p[3] = 0x80 | (Uint8)(ch & 0x3F);
dst += 4;
dstlen -= 4;
} else if ( ch <= 0x3FFFFFF ) {
if ( dstlen < 5 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
p[4] = 0x80 | (Uint8)(ch & 0x3F);
dst += 5;
dstlen -= 5;
} else {
if ( dstlen < 6 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
p[5] = 0x80 | (Uint8)(ch & 0x3F);
dst += 6;
dstlen -= 6;
}
}
break;
case ENCODING_UTF16BE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *)dst;
if ( ch > 0x10FFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( ch < 0x10000 ) {
if ( dstlen < 2 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = (Uint8)(ch >> 8);
p[1] = (Uint8)ch;
dst += 2;
dstlen -= 2;
} else {
Uint16 W1, W2;
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
ch = ch - 0x10000;
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
p[0] = (Uint8)(W1 >> 8);
p[1] = (Uint8)W1;
p[2] = (Uint8)(W2 >> 8);
p[3] = (Uint8)W2;
dst += 4;
dstlen -= 4;
}
}
break;
case ENCODING_UTF16LE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *)dst;
if ( ch > 0x10FFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( ch < 0x10000 ) {
if ( dstlen < 2 ) {
return XERCES_ICONV_E2BIG;
}
p[1] = (Uint8)(ch >> 8);
p[0] = (Uint8)ch;
dst += 2;
dstlen -= 2;
} else {
Uint16 W1, W2;
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
ch = ch - 0x10000;
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
p[1] = (Uint8)(W1 >> 8);
p[0] = (Uint8)W1;
p[3] = (Uint8)(W2 >> 8);
p[2] = (Uint8)W2;
dst += 4;
dstlen -= 4;
}
}
break;
case ENCODING_UTF32BE:
{
Uint8 *p = (Uint8 *)dst;
if ( ch > 0x10FFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
p[0] = (Uint8)(ch >> 24);
p[1] = (Uint8)(ch >> 16);
p[2] = (Uint8)(ch >> 8);
p[3] = (Uint8)ch;
dst += 4;
dstlen -= 4;
}
break;
case ENCODING_UTF32LE:
{
Uint8 *p = (Uint8 *)dst;
if ( ch > 0x10FFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
p[3] = (Uint8)(ch >> 24);
p[2] = (Uint8)(ch >> 16);
p[1] = (Uint8)(ch >> 8);
p[0] = (Uint8)ch;
dst += 4;
dstlen -= 4;
}
break;
case ENCODING_UCS2:
{
Uint16 *p = (Uint16 *)dst;
if ( ch > 0xFFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( dstlen < 2 ) {
return XERCES_ICONV_E2BIG;
}
*p = (Uint16)ch;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UCS4:
{
Uint32 *p = (Uint32 *)dst;
if ( ch > 0x7FFFFFFF ) {
ch = UNKNOWN_UNICODE;
}
if ( dstlen < 4 ) {
return XERCES_ICONV_E2BIG;
}
*p = ch;
dst += 4;
dstlen -= 4;
}
break;
}
/* Update state */
*inbuf = src;
*inbytesleft = srclen;
*outbuf = dst;
*outbytesleft = dstlen;
++total;
}
return total;
}
int XERCES_iconv_close(XERCES_iconv_t cd)
{
if ( cd && cd != (XERCES_iconv_t)-1 ) {
free(cd);
}
return 0;
}
char *XERCES_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
{
XERCES_iconv_t cd;
char *string;
size_t stringsize;
char *outbuf;
size_t outbytesleft;
size_t retCode = 0;
cd = XERCES_iconv_open(tocode, fromcode);
if ( cd == (XERCES_iconv_t)-1 ) {
/* See if we can recover here (fixes iconv on Solaris 11) */
if ( !tocode || !*tocode ) {
tocode = "UTF-8";
}
if ( !fromcode || !*fromcode ) {
fromcode = "UTF-8";
}
cd = XERCES_iconv_open(tocode, fromcode);
}
if ( cd == (XERCES_iconv_t)-1 ) {
return NULL;
}
stringsize = inbytesleft > 4 ? inbytesleft : 4;
string = malloc(stringsize);
if ( !string ) {
XERCES_iconv_close(cd);
return NULL;
}
outbuf = string;
outbytesleft = stringsize;
memset(outbuf, 0, 4);
while ( inbytesleft > 0 ) {
retCode = XERCES_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
switch (retCode) {
case XERCES_ICONV_E2BIG:
{
char *oldstring = string;
stringsize *= 2;
string = realloc(string, stringsize);
if ( !string ) {
XERCES_iconv_close(cd);
return NULL;
}
outbuf = string + (outbuf - oldstring);
outbytesleft = stringsize - (outbuf - string);
memset(outbuf, 0, 4);
}
break;
case XERCES_ICONV_EILSEQ:
/* Try skipping some input data - not perfect, but... */
++inbuf;
--inbytesleft;
break;
case XERCES_ICONV_EINVAL:
case XERCES_ICONV_ERROR:
/* We can't continue... */
inbytesleft = 0;
break;
}
}
XERCES_iconv_close(cd);
return string;
}