diff options
-rw-r--r-- | encoding.c | 563 | ||||
-rw-r--r-- | include/libxml/encoding.h | 9 |
2 files changed, 194 insertions, 378 deletions
@@ -171,7 +171,9 @@ closeIcuConverter(uconv_t *conv) * * Take a block of ASCII chars in and try to convert it to an UTF-8 * block of chars out. - * Returns 0 if success, or -1 otherwise + * + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. * The value of @outlen after return is the number of octets produced. @@ -197,7 +199,7 @@ asciiToUTF8(unsigned char* out, int *outlen, } else { *outlen = out - outstart; *inlen = processed - base; - return(-2); + return(XML_ENC_ERR_INPUT); } processed = (const unsigned char*) in; @@ -218,7 +220,8 @@ asciiToUTF8(unsigned char* out, int *outlen, * Take a block of UTF-8 chars in and try to convert it to an ASCII * block of chars out. * - * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. * The value of @outlen after return is the number of octets produced. @@ -234,7 +237,8 @@ UTF8Toascii(unsigned char* out, int *outlen, unsigned int c, d; int trailing; - if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) + return(XML_ENC_ERR_INTERNAL); if (in == NULL) { /* * initialization nothing to do @@ -252,7 +256,7 @@ UTF8Toascii(unsigned char* out, int *outlen, /* trailing byte in leading position */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } @@ -260,7 +264,7 @@ UTF8Toascii(unsigned char* out, int *outlen, /* no chance for this in Ascii */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } if (inend - in < trailing) { @@ -283,7 +287,7 @@ UTF8Toascii(unsigned char* out, int *outlen, /* no chance for this in Ascii */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } processed = in; } @@ -302,7 +306,9 @@ UTF8Toascii(unsigned char* out, int *outlen, * * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 * block of chars out. - * Returns the number of bytes written if success, or -1 otherwise + * + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. * The value of @outlen after return is the number of octets produced. @@ -317,7 +323,7 @@ isolat1ToUTF8(unsigned char* out, int *outlen, const unsigned char* instop; if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) - return(-1); + return(XML_ENC_ERR_INTERNAL); outend = out + *outlen; inend = in + (*inlen); @@ -351,7 +357,8 @@ isolat1ToUTF8(unsigned char* out, int *outlen, * * No op copy operation for UTF8 handling. * - * Returns the number of bytes written, or -1 if lack of space. + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of *inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. */ @@ -362,7 +369,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen, int len; if ((out == NULL) || (outlen == NULL) || (inlenb == NULL)) - return(-1); + return(XML_ENC_ERR_INTERNAL); if (inb == NULL) { /* inb == NULL means output is initialized. */ *outlen = 0; @@ -375,7 +382,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen, len = *outlen; } if (len < 0) - return(-1); + return(XML_ENC_ERR_INTERNAL); /* * FIXME: Conversion functions must assure valid UTF-8, so we have @@ -401,8 +408,8 @@ UTF8ToUTF8(unsigned char* out, int *outlen, * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 * block of chars out. * - * Returns the number of bytes written if success, -2 if the transcoding fails, - or -1 otherwise + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. * The value of @outlen after return is the number of octets produced. @@ -418,7 +425,8 @@ UTF8Toisolat1(unsigned char* out, int *outlen, unsigned int c, d; int trailing; - if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) + return(XML_ENC_ERR_INTERNAL); if (in == NULL) { /* * initialization nothing to do @@ -436,7 +444,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen, /* trailing byte in leading position */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } @@ -444,7 +452,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen, /* no chance for this in IsoLat1 */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } if (inend - in < trailing) { @@ -457,7 +465,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen, if (((d= *in++) & 0xC0) != 0x80) { *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } c <<= 6; c |= d & 0x3F; @@ -472,7 +480,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen, /* no chance for this in IsoLat1 */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } processed = in; } @@ -494,10 +502,10 @@ UTF8Toisolat1(unsigned char* out, int *outlen, * is the same between the native type of this machine and the * inputed one. * - * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding fails (if *in is not a valid utf16 string) - * The value of *inlen after return is the number of octets consumed - * if the return value is positive, else unpredictable. + * Returns the number of bytes written or an XML_ENC_ERR code. + * + * The value of *inlen after return is the number of octets consumed + * if the return value is positive, else unpredictable. */ static int UTF16LEToUTF8(unsigned char* out, int *outlen, @@ -551,7 +559,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, else { *outlen = out - outstart; *inlenb = processed - inb; - return(-2); + return(XML_ENC_ERR_INPUT); } } @@ -586,8 +594,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE * block of chars out. * - * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding failed. + * Returns the number of bytes written or an XML_ENC_ERR code. */ static int UTF8ToUTF16LE(unsigned char* outb, int *outlen, @@ -605,7 +612,8 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, unsigned short tmp1, tmp2; /* UTF16LE encoding has no BOM */ - if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) + return(XML_ENC_ERR_INTERNAL); if (in == NULL) { *outlen = 0; *inlen = 0; @@ -620,7 +628,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, /* trailing byte in leading position */ *outlen = (out - outstart) * 2; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } @@ -628,7 +636,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, /* no chance for this in UTF-16 */ *outlen = (out - outstart) * 2; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } if (inend - in < trailing) { @@ -695,8 +703,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, * Take a block of UTF-8 chars in and try to convert it to an UTF-16 * block of chars out. * - * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding failed. + * Returns the number of bytes written or an XML_ENC_ERR code. */ static int UTF8ToUTF16(unsigned char* outb, int *outlen, @@ -737,10 +744,10 @@ UTF8ToUTF16(unsigned char* outb, int *outlen, * is the same between the native type of this machine and the * inputed one. * - * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding fails (if *in is not a valid utf16 string) + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of *inlen after return is the number of octets consumed - * if the return value is positive, else unpredictable. + * if the return value is positive, else unpredictable. */ static int UTF16BEToUTF8(unsigned char* out, int *outlen, @@ -794,7 +801,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, else { *outlen = out - outstart; *inlenb = processed - inb; - return(-2); + return(XML_ENC_ERR_INPUT); } } @@ -829,8 +836,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE * block of chars out. * - * Returns the number of byte written, or -1 by lack of space, or -2 - * if the transcoding failed. + * Returns the number of bytes written or an XML_ENC_ERR code. */ static int UTF8ToUTF16BE(unsigned char* outb, int *outlen, @@ -848,7 +854,8 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, unsigned short tmp1, tmp2; /* UTF-16BE has no BOM */ - if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) + return(XML_ENC_ERR_INTERNAL); if (in == NULL) { *outlen = 0; *inlen = 0; @@ -863,7 +870,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, /* trailing byte in leading position */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } @@ -871,7 +878,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, /* no chance for this in UTF-16 */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } if (inend - in < trailing) { @@ -1890,11 +1897,7 @@ xmlFindCharEncodingHandler(const char *name) { * @in: a pointer to an array of input bytes * @inlen: the length of @in * - * Returns 0 if success, or - * -1 by lack of space, or - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or - * -3 if there the last byte can't form a single output char. + * Returns an XML_ENC_ERR code. * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. @@ -1910,7 +1913,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { if (outlen != NULL) *outlen = 0; - return(-1); + return(XML_ENC_ERR_INTERNAL); } icv_inlen = *inlen; icv_outlen = *outlen; @@ -1920,27 +1923,16 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen); *inlen -= icv_inlen; *outlen -= icv_outlen; - if ((icv_inlen != 0) || (ret == (size_t) -1)) { -#ifdef EILSEQ - if (errno == EILSEQ) { - return -2; - } else -#endif -#ifdef E2BIG - if (errno == E2BIG) { - return -1; - } else -#endif -#ifdef EINVAL - if (errno == EINVAL) { - return -3; - } else -#endif - { - return -3; - } + if (ret == (size_t) -1) { + if (errno == EILSEQ) + return(XML_ENC_ERR_INPUT); + if (errno == E2BIG) + return(XML_ENC_ERR_SPACE); + if (errno == EINVAL) + return(XML_ENC_ERR_PARTIAL); + return(XML_ENC_ERR_INTERNAL); } - return 0; + return(XML_ENC_ERR_SUCCESS); } #endif /* LIBXML_ICONV_ENABLED */ @@ -1961,11 +1953,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * @inlen: the length of @in * @flush: if true, indicates end of input * - * Returns 0 if success, or - * -1 by lack of space, or - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or - * -3 if there the last byte can't form a single output char. + * Returns an XML_ENC_ERR code. * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. @@ -1980,7 +1968,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { if (outlen != NULL) *outlen = 0; - return(-1); + return(XML_ENC_ERR_INTERNAL); } if (toUnicode) { @@ -2002,13 +1990,13 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, /* reset pivot buf if this is the last call for input (flush==TRUE) */ if (flush) cd->pivot_source = cd->pivot_target = cd->pivot_buf; - return 0; + return(XML_ENC_ERR_SUCCESS); } if (err == U_BUFFER_OVERFLOW_ERROR) - return -1; + return(XML_ENC_ERR_SPACE); if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) - return -2; - return -3; + return(XML_ENC_ERR_INPUT); + return(XML_ENC_ERR_PARTIAL); } #endif /* LIBXML_ICU_ENABLED */ @@ -2027,11 +2015,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, * @inlen: the length of @in * @flush: flush (ICU-related) * - * Returns 0 if success, or - * -1 by lack of space, or - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or - * -3 if there the last byte can't form a single output char. + * Returns an XML_ENC_ERR code. * * The value of @inlen after return is the number of octets consumed * as the return value is 0, else unpredictable. @@ -2046,7 +2030,7 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->input != NULL) { ret = handler->input(out, outlen, in, inlen); if (ret > 0) - ret = 0; + ret = XML_ENC_ERR_SUCCESS; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_in != NULL) { @@ -2062,9 +2046,13 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, else { *outlen = 0; *inlen = 0; - ret = -2; + ret = XML_ENC_ERR_INTERNAL; } + /* Ignore space and partial errors when reading. */ + if ((ret == XML_ENC_ERR_SPACE) || (ret == XML_ENC_ERR_PARTIAL)) + ret = XML_ENC_ERR_SUCCESS; + return(ret); } @@ -2076,12 +2064,7 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, * @in: a pointer to an array of input bytes * @inlen: the length of @in * - * Returns 0 if success, or - * -1 by lack of space, or - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or - * -3 if there the last byte can't form a single output char. - * -4 if no output function was found. + * Returns an XML_ENC_ERR code. * * The value of @inlen after return is the number of octets consumed * as the return value is 0, else unpredictable. @@ -2095,7 +2078,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->output != NULL) { ret = handler->output(out, outlen, in, inlen); if (ret > 0) - ret = 0; + ret = XML_ENC_ERR_SUCCESS; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_out != NULL) { @@ -2111,15 +2094,19 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, else { *outlen = 0; *inlen = 0; - ret = -4; + ret = XML_ENC_ERR_INTERNAL; } + /* We shouldn't generate partial sequences when writing. */ + if (ret == XML_ENC_ERR_PARTIAL) + ret = XML_ENC_ERR_INTERNAL; + return(ret); } /** * xmlCharEncFirstLine: - * @handler: char encoding transformation data structure + * @handler: char encoding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input * @@ -2138,10 +2125,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, * * Generic front-end for the encoding handler on parser input * - * Returns the number of byte written if success, or - * -1 general error - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or + * Returns the number of bytes written or an XML_ENC_ERR code. */ int xmlCharEncInput(xmlParserInputBufferPtr input, int flush) @@ -2156,7 +2140,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush) if ((input == NULL) || (input->encoder == NULL) || (input->buffer == NULL) || (input->raw == NULL)) - return (-1); + return(XML_ENC_ERR_INTERNAL); out = input->buffer; in = input->raw; @@ -2168,7 +2152,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush) written = xmlBufAvail(out); if (toconv * 2 >= written) { if (xmlBufGrow(out, toconv * 2) < 0) - return (-1); + return(XML_ENC_ERR_MEMORY); written = xmlBufAvail(out); } if ((written > 128 * 1024) && (flush == 0)) @@ -2180,49 +2164,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush) xmlBufContent(in), &c_in, flush); xmlBufShrink(in, c_in); xmlBufAddLen(out, c_out); - if (ret == -1) - ret = -3; - switch (ret) { - case 0: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input\n", - c_in, c_out); -#endif - break; - case -1: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input, %d left\n", - c_in, c_out, (int)xmlBufUse(in)); -#endif - break; - case -3: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input, %d left\n", - c_in, c_out, (int)xmlBufUse(in)); -#endif - break; - case -2: { - char buf[50]; - const xmlChar *content = xmlBufContent(in); - - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", - content[0], content[1], - content[2], content[3]); - buf[49] = 0; - xmlEncodingErr(XML_I18N_CONV_FAILED, - "input conversion failed due to input error, bytes %s\n", - buf); - } - } - /* - * Ignore when input buffer is not on a boundary - */ - if (ret == -3) - ret = 0; return (c_out? c_out : ret); } @@ -2234,10 +2176,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush) * * Generic front-end for the encoding handler input function * - * Returns the number of byte written if success, or - * -1 general error - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or + * Returns the number of bytes written or an XML_ENC_ERR code. */ int xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, @@ -2248,11 +2187,11 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, int toconv; if (handler == NULL) - return (-1); + return(XML_ENC_ERR_INTERNAL); if (out == NULL) - return (-1); + return(XML_ENC_ERR_INTERNAL); if (in == NULL) - return (-1); + return(XML_ENC_ERR_INTERNAL); toconv = in->use; if (toconv == 0) @@ -2267,48 +2206,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferShrink(in, toconv); out->use += written; out->content[out->use] = 0; - if (ret == -1) - ret = -3; - switch (ret) { - case 0: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input\n", - toconv, written); -#endif - break; - case -1: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input, %d left\n", - toconv, written, in->use); -#endif - break; - case -3: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of input, %d left\n", - toconv, written, in->use); -#endif - break; - case -2: { - char buf[50]; - - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", - in->content[0], in->content[1], - in->content[2], in->content[3]); - buf[49] = 0; - xmlEncodingErr(XML_I18N_CONV_FAILED, - "input conversion failed due to input error, bytes %s\n", - buf); - } - } - /* - * Ignore when input buffer is not on a boundary - */ - if (ret == -3) - ret = 0; return (written? written : ret); } @@ -2325,10 +2223,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, * In case of UTF8 sequence conversion errors for the given encoder, * the content will be automatically remapped to a CharRef sequence. * - * Returns the number of byte written if success, or - * -1 general error - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or + * Returns the number of bytes written or an XML_ENC_ERR code. */ int xmlCharEncOutput(xmlOutputBufferPtr output, int init) @@ -2344,7 +2239,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) if ((output == NULL) || (output->encoder == NULL) || (output->buffer == NULL) || (output->conv == NULL)) - return (-1); + return(XML_ENC_ERR_INTERNAL); out = output->conv; in = output->buffer; @@ -2391,92 +2286,50 @@ retry: xmlBufShrink(in, c_in); xmlBufAddLen(out, c_out); writtentot += c_out; - if (ret == -1) { - if (c_out > 0) { - /* Can be a limitation of iconv or uconv */ - goto retry; - } - ret = -3; - } + + if (ret == XML_ENC_ERR_SPACE) + goto retry; /* * Attempt to handle error cases */ - switch (ret) { - case 0: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of output\n", - c_in, c_out); -#endif - break; - case -1: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "output conversion failed by lack of space\n"); -#endif - break; - case -3: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", - c_in, c_out, (int) xmlBufUse(in)); -#endif - break; - case -4: - xmlEncodingErr(XML_I18N_NO_OUTPUT, - "xmlCharEncOutFunc: no output function !\n", NULL); - ret = -1; - break; - case -2: { - xmlChar charref[20]; - int len = xmlBufUse(in); - xmlChar *content = xmlBufContent(in); - int cur, charrefLen; - - cur = xmlGetUTF8Char(content, &len); - if (cur <= 0) - break; + if (ret == XML_ENC_ERR_INPUT) { + xmlChar charref[20]; + int len = xmlBufUse(in); + xmlChar *content = xmlBufContent(in); + int cur, charrefLen; + + cur = xmlGetUTF8Char(content, &len); + if (cur <= 0) + return(ret); #ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "handling output conversion error\n"); - xmlGenericError(xmlGenericErrorContext, - "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - content[0], content[1], - content[2], content[3]); + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + content[0], content[1], + content[2], content[3]); #endif - /* - * Removes the UTF8 sequence, and replace it by a charref - * and continue the transcoding phase, hoping the error - * did not mangle the encoder state. - */ - charrefLen = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); - xmlBufShrink(in, len); - xmlBufGrow(out, charrefLen * 4); - c_out = xmlBufAvail(out); - c_in = charrefLen; - ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, - charref, &c_in); - - if ((ret < 0) || (c_in != charrefLen)) { - char buf[50]; - - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", - content[0], content[1], - content[2], content[3]); - buf[49] = 0; - xmlEncodingErr(XML_I18N_CONV_FAILED, - "output conversion failed due to conv error, bytes %s\n", - buf); - content[0] = ' '; - break; - } + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + charrefLen = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); + xmlBufShrink(in, len); + xmlBufGrow(out, charrefLen * 4); + c_out = xmlBufAvail(out); + c_in = charrefLen; + ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, + charref, &c_in); + if ((ret < 0) || (c_in != charrefLen)) + return(XML_ENC_ERR_INTERNAL); - xmlBufAddLen(out, c_out); - writtentot += c_out; - goto retry; - } + xmlBufAddLen(out, c_out); + writtentot += c_out; + goto retry; } return(writtentot ? writtentot : ret); } @@ -2495,10 +2348,7 @@ retry: * In case of UTF8 sequence conversion errors for the given encoder, * the content will be automatically remapped to a CharRef sequence. * - * Returns the number of byte written if success, or - * -1 general error - * -2 if the transcoding fails (for *in is not valid utf8 string or - * the result of transformation can't fit into the encoding we want), or + * Returns the number of bytes written or an XML_ENC_ERR code. */ int xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, @@ -2508,8 +2358,8 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, int writtentot = 0; int toconv; - if (handler == NULL) return(-1); - if (out == NULL) return(-1); + if (handler == NULL) return(XML_ENC_ERR_INTERNAL); + if (out == NULL) return(XML_ENC_ERR_INTERNAL); retry: @@ -2551,93 +2401,51 @@ retry: out->use += written; writtentot += written; out->content[out->use] = 0; - if (ret == -1) { - if (written > 0) { - /* Can be a limitation of iconv or uconv */ - goto retry; - } - ret = -3; - } + + if (ret == XML_ENC_ERR_SPACE) + goto retry; /* * Attempt to handle error cases */ - switch (ret) { - case 0: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "converted %d bytes to %d bytes of output\n", - toconv, written); -#endif - break; - case -1: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "output conversion failed by lack of space\n"); -#endif - break; - case -3: -#ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", - toconv, written, in->use); -#endif - break; - case -4: - xmlEncodingErr(XML_I18N_NO_OUTPUT, - "xmlCharEncOutFunc: no output function !\n", NULL); - ret = -1; - break; - case -2: { - xmlChar charref[20]; - int len = in->use; - const xmlChar *utf = (const xmlChar *) in->content; - int cur, charrefLen; - - cur = xmlGetUTF8Char(utf, &len); - if (cur <= 0) - break; + if (ret == XML_ENC_ERR_INPUT) { + xmlChar charref[20]; + int len = in->use; + const xmlChar *utf = (const xmlChar *) in->content; + int cur, charrefLen; + + cur = xmlGetUTF8Char(utf, &len); + if (cur <= 0) + return(ret); #ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "handling output conversion error\n"); - xmlGenericError(xmlGenericErrorContext, - "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - in->content[0], in->content[1], - in->content[2], in->content[3]); + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); #endif - /* - * Removes the UTF8 sequence, and replace it by a charref - * and continue the transcoding phase, hoping the error - * did not mangle the encoder state. - */ - charrefLen = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); - xmlBufferShrink(in, len); - xmlBufferGrow(out, charrefLen * 4); - written = out->size - out->use - 1; - toconv = charrefLen; - ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, - charref, &toconv); - - if ((ret < 0) || (toconv != charrefLen)) { - char buf[50]; - - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", - in->content[0], in->content[1], - in->content[2], in->content[3]); - buf[49] = 0; - xmlEncodingErr(XML_I18N_CONV_FAILED, - "output conversion failed due to conv error, bytes %s\n", - buf); - in->content[0] = ' '; - break; - } + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + charrefLen = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); + xmlBufferShrink(in, len); + xmlBufferGrow(out, charrefLen * 4); + written = out->size - out->use - 1; + toconv = charrefLen; + ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, + charref, &toconv); + if ((ret < 0) || (toconv != charrefLen)) + return(XML_ENC_ERR_INTERNAL); - out->use += written; - writtentot += written; - out->content[out->use] = 0; - goto retry; - } + out->use += written; + writtentot += written; + out->content[out->use] = 0; + goto retry; } return(writtentot ? writtentot : ret); } @@ -2761,15 +2569,11 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { written = 32000; ret = xmlEncOutputChunk(handler, &convbuf[0], &written, cur, &toconv); - if (ret < 0) { - if (written > 0) - ret = -2; - else - return(-1); - } + if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE)) + return(-1); unused += written; cur += toconv; - } while (ret == -2); + } while (ret == XML_ENC_ERR_SPACE); } if (in->buf->rawconsumed < unused) return(-1); @@ -2792,9 +2596,10 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* * block of chars out. * - * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed - * as the return value is positive, else unpredictable. + * as the return value is positive, else unpredictable. * The value of @outlen after return is the number of octets consumed. */ static int @@ -2808,7 +2613,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (xlattable == NULL)) - return(-1); + return(XML_ENC_ERR_INTERNAL); if (in == NULL) { /* * initialization nothing to do @@ -2826,21 +2631,21 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, /* trailing byte in leading position */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } else if (d < 0xE0) { unsigned char c; if (!(in < inend)) { /* trailing byte not in input buffer */ *outlen = out - outstart; *inlen = processed - instart; - return(-3); + return(XML_ENC_ERR_PARTIAL); } c = *in++; if ((c & 0xC0) != 0x80) { /* not a trailing byte */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } c = c & 0x3F; d = d & 0x1F; @@ -2849,7 +2654,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, /* not in character set */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } *out++ = d; } else if (d < 0xF0) { @@ -2859,21 +2664,21 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, /* trailing bytes not in input buffer */ *outlen = out - outstart; *inlen = processed - instart; - return(-3); + return(XML_ENC_ERR_PARTIAL); } c1 = *in++; if ((c1 & 0xC0) != 0x80) { /* not a trailing byte (c1) */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } c2 = *in++; if ((c2 & 0xC0) != 0x80) { /* not a trailing byte (c2) */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } c1 = c1 & 0x3F; c2 = c2 & 0x3F; @@ -2884,14 +2689,14 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, /* not in character set */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } *out++ = d; } else { /* cannot transcode >= U+010000 */ *outlen = out - outstart; *inlen = processed - instart; - return(-2); + return(XML_ENC_ERR_INPUT); } processed = in; } @@ -2909,7 +2714,9 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, * * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 * block of chars out. - * Returns 0 if success, or -1 otherwise + * + * Returns the number of bytes written or an XML_ENC_ERR code. + * * The value of @inlen after return is the number of octets consumed * The value of @outlen after return is the number of octets produced. */ @@ -2926,7 +2733,7 @@ ISO8859xToUTF8(unsigned char* out, int *outlen, if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL) || (unicodetable == NULL)) - return(-1); + return(XML_ENC_ERR_INTERNAL); outend = out + *outlen; inend = in + *inlen; instop = inend; @@ -2938,7 +2745,7 @@ ISO8859xToUTF8(unsigned char* out, int *outlen, /* undefined code point */ *outlen = out - outstart; *inlen = in - instart; - return (-1); + return(XML_ENC_ERR_INPUT); } if (c < 0x800) { *out++ = ((c >> 6) & 0x1F) | 0xC0; diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index 67add3b0..91fe87d7 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -32,6 +32,15 @@ extern "C" { #endif +typedef enum { + XML_ENC_ERR_SUCCESS = 0, + XML_ENC_ERR_SPACE = -1, + XML_ENC_ERR_INPUT = -2, + XML_ENC_ERR_PARTIAL = -3, + XML_ENC_ERR_INTERNAL = -4, + XML_ENC_ERR_MEMORY = -5 +} xmlCharEncError; + /* * xmlCharEncoding: * |