summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2023-02-22 17:11:20 +0100
committerNick Wellnhofer <wellnhofer@aevum.de>2023-04-30 16:43:29 +0200
commit3ff6abbf58ac4b182037b18810376ed12fca0ba3 (patch)
tree8081169761f535ff28583cc646dc052054da54ec
parentb463b38bcd5a2a797c332b180723b6c26a213311 (diff)
downloadlibxml2-3ff6abbf58ac4b182037b18810376ed12fca0ba3.tar.gz
encoding: Rework error codes
Use an enum instead of magic numbers. Fix a few error codes. Simplify handling of "space" and "partial" errors. See #506.
-rw-r--r--encoding.c563
-rw-r--r--include/libxml/encoding.h9
2 files changed, 194 insertions, 378 deletions
diff --git a/encoding.c b/encoding.c
index 52bc15ac..6c2aa32e 100644
--- a/encoding.c
+++ b/encoding.c
@@ -171,7 +171,9 @@ closeIcuConverter(uconv_t *conv)
*
* Take a block of ASCII chars in and try to convert it to an UTF-8
* block of chars out.
- * Returns 0 if success, or -1 otherwise
+ *
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets produced.
@@ -197,7 +199,7 @@ asciiToUTF8(unsigned char* out, int *outlen,
} else {
*outlen = out - outstart;
*inlen = processed - base;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
processed = (const unsigned char*) in;
@@ -218,7 +220,8 @@ asciiToUTF8(unsigned char* out, int *outlen,
* Take a block of UTF-8 chars in and try to convert it to an ASCII
* block of chars out.
*
- * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets produced.
@@ -234,7 +237,8 @@ UTF8Toascii(unsigned char* out, int *outlen,
unsigned int c, d;
int trailing;
- if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL) {
/*
* initialization nothing to do
@@ -252,7 +256,7 @@ UTF8Toascii(unsigned char* out, int *outlen,
/* trailing byte in leading position */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
@@ -260,7 +264,7 @@ UTF8Toascii(unsigned char* out, int *outlen,
/* no chance for this in Ascii */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
if (inend - in < trailing) {
@@ -283,7 +287,7 @@ UTF8Toascii(unsigned char* out, int *outlen,
/* no chance for this in Ascii */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
processed = in;
}
@@ -302,7 +306,9 @@ UTF8Toascii(unsigned char* out, int *outlen,
*
* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
* block of chars out.
- * Returns the number of bytes written if success, or -1 otherwise
+ *
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets produced.
@@ -317,7 +323,7 @@ isolat1ToUTF8(unsigned char* out, int *outlen,
const unsigned char* instop;
if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
outend = out + *outlen;
inend = in + (*inlen);
@@ -351,7 +357,8 @@ isolat1ToUTF8(unsigned char* out, int *outlen,
*
* No op copy operation for UTF8 handling.
*
- * Returns the number of bytes written, or -1 if lack of space.
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of *inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
*/
@@ -362,7 +369,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
int len;
if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
if (inb == NULL) {
/* inb == NULL means output is initialized. */
*outlen = 0;
@@ -375,7 +382,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
len = *outlen;
}
if (len < 0)
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
/*
* FIXME: Conversion functions must assure valid UTF-8, so we have
@@ -401,8 +408,8 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
* block of chars out.
*
- * Returns the number of bytes written if success, -2 if the transcoding fails,
- or -1 otherwise
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets produced.
@@ -418,7 +425,8 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
unsigned int c, d;
int trailing;
- if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL) {
/*
* initialization nothing to do
@@ -436,7 +444,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
/* trailing byte in leading position */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
@@ -444,7 +452,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
/* no chance for this in IsoLat1 */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
if (inend - in < trailing) {
@@ -457,7 +465,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
if (((d= *in++) & 0xC0) != 0x80) {
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
c <<= 6;
c |= d & 0x3F;
@@ -472,7 +480,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
/* no chance for this in IsoLat1 */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
processed = in;
}
@@ -494,10 +502,10 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
* is the same between the native type of this machine and the
* inputed one.
*
- * Returns the number of bytes written, or -1 if lack of space, or -2
- * if the transcoding fails (if *in is not a valid utf16 string)
- * The value of *inlen after return is the number of octets consumed
- * if the return value is positive, else unpredictable.
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
+ * The value of *inlen after return is the number of octets consumed
+ * if the return value is positive, else unpredictable.
*/
static int
UTF16LEToUTF8(unsigned char* out, int *outlen,
@@ -551,7 +559,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
else {
*outlen = out - outstart;
*inlenb = processed - inb;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
}
@@ -586,8 +594,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
* block of chars out.
*
- * Returns the number of bytes written, or -1 if lack of space, or -2
- * if the transcoding failed.
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
static int
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
@@ -605,7 +612,8 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
unsigned short tmp1, tmp2;
/* UTF16LE encoding has no BOM */
- if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL) {
*outlen = 0;
*inlen = 0;
@@ -620,7 +628,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
/* trailing byte in leading position */
*outlen = (out - outstart) * 2;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
@@ -628,7 +636,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
/* no chance for this in UTF-16 */
*outlen = (out - outstart) * 2;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
if (inend - in < trailing) {
@@ -695,8 +703,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
* Take a block of UTF-8 chars in and try to convert it to an UTF-16
* block of chars out.
*
- * Returns the number of bytes written, or -1 if lack of space, or -2
- * if the transcoding failed.
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
static int
UTF8ToUTF16(unsigned char* outb, int *outlen,
@@ -737,10 +744,10 @@ UTF8ToUTF16(unsigned char* outb, int *outlen,
* is the same between the native type of this machine and the
* inputed one.
*
- * Returns the number of bytes written, or -1 if lack of space, or -2
- * if the transcoding fails (if *in is not a valid utf16 string)
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of *inlen after return is the number of octets consumed
- * if the return value is positive, else unpredictable.
+ * if the return value is positive, else unpredictable.
*/
static int
UTF16BEToUTF8(unsigned char* out, int *outlen,
@@ -794,7 +801,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen,
else {
*outlen = out - outstart;
*inlenb = processed - inb;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
}
@@ -829,8 +836,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen,
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
* block of chars out.
*
- * Returns the number of byte written, or -1 by lack of space, or -2
- * if the transcoding failed.
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
static int
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
@@ -848,7 +854,8 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
unsigned short tmp1, tmp2;
/* UTF-16BE has no BOM */
- if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
+ if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL) {
*outlen = 0;
*inlen = 0;
@@ -863,7 +870,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
/* trailing byte in leading position */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
@@ -871,7 +878,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
/* no chance for this in UTF-16 */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
if (inend - in < trailing) {
@@ -1890,11 +1897,7 @@ xmlFindCharEncodingHandler(const char *name) {
* @in: a pointer to an array of input bytes
* @inlen: the length of @in
*
- * Returns 0 if success, or
- * -1 by lack of space, or
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
- * -3 if there the last byte can't form a single output char.
+ * Returns an XML_ENC_ERR code.
*
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictable.
@@ -1910,7 +1913,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
if (outlen != NULL) *outlen = 0;
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
}
icv_inlen = *inlen;
icv_outlen = *outlen;
@@ -1920,27 +1923,16 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
*inlen -= icv_inlen;
*outlen -= icv_outlen;
- if ((icv_inlen != 0) || (ret == (size_t) -1)) {
-#ifdef EILSEQ
- if (errno == EILSEQ) {
- return -2;
- } else
-#endif
-#ifdef E2BIG
- if (errno == E2BIG) {
- return -1;
- } else
-#endif
-#ifdef EINVAL
- if (errno == EINVAL) {
- return -3;
- } else
-#endif
- {
- return -3;
- }
+ if (ret == (size_t) -1) {
+ if (errno == EILSEQ)
+ return(XML_ENC_ERR_INPUT);
+ if (errno == E2BIG)
+ return(XML_ENC_ERR_SPACE);
+ if (errno == EINVAL)
+ return(XML_ENC_ERR_PARTIAL);
+ return(XML_ENC_ERR_INTERNAL);
}
- return 0;
+ return(XML_ENC_ERR_SUCCESS);
}
#endif /* LIBXML_ICONV_ENABLED */
@@ -1961,11 +1953,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
* @inlen: the length of @in
* @flush: if true, indicates end of input
*
- * Returns 0 if success, or
- * -1 by lack of space, or
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
- * -3 if there the last byte can't form a single output char.
+ * Returns an XML_ENC_ERR code.
*
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictable.
@@ -1980,7 +1968,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
if (outlen != NULL) *outlen = 0;
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
}
if (toUnicode) {
@@ -2002,13 +1990,13 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
/* reset pivot buf if this is the last call for input (flush==TRUE) */
if (flush)
cd->pivot_source = cd->pivot_target = cd->pivot_buf;
- return 0;
+ return(XML_ENC_ERR_SUCCESS);
}
if (err == U_BUFFER_OVERFLOW_ERROR)
- return -1;
+ return(XML_ENC_ERR_SPACE);
if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
- return -2;
- return -3;
+ return(XML_ENC_ERR_INPUT);
+ return(XML_ENC_ERR_PARTIAL);
}
#endif /* LIBXML_ICU_ENABLED */
@@ -2027,11 +2015,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
* @inlen: the length of @in
* @flush: flush (ICU-related)
*
- * Returns 0 if success, or
- * -1 by lack of space, or
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
- * -3 if there the last byte can't form a single output char.
+ * Returns an XML_ENC_ERR code.
*
* The value of @inlen after return is the number of octets consumed
* as the return value is 0, else unpredictable.
@@ -2046,7 +2030,7 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
if (handler->input != NULL) {
ret = handler->input(out, outlen, in, inlen);
if (ret > 0)
- ret = 0;
+ ret = XML_ENC_ERR_SUCCESS;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_in != NULL) {
@@ -2062,9 +2046,13 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
else {
*outlen = 0;
*inlen = 0;
- ret = -2;
+ ret = XML_ENC_ERR_INTERNAL;
}
+ /* Ignore space and partial errors when reading. */
+ if ((ret == XML_ENC_ERR_SPACE) || (ret == XML_ENC_ERR_PARTIAL))
+ ret = XML_ENC_ERR_SUCCESS;
+
return(ret);
}
@@ -2076,12 +2064,7 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
* @in: a pointer to an array of input bytes
* @inlen: the length of @in
*
- * Returns 0 if success, or
- * -1 by lack of space, or
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
- * -3 if there the last byte can't form a single output char.
- * -4 if no output function was found.
+ * Returns an XML_ENC_ERR code.
*
* The value of @inlen after return is the number of octets consumed
* as the return value is 0, else unpredictable.
@@ -2095,7 +2078,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
if (handler->output != NULL) {
ret = handler->output(out, outlen, in, inlen);
if (ret > 0)
- ret = 0;
+ ret = XML_ENC_ERR_SUCCESS;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_out != NULL) {
@@ -2111,15 +2094,19 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
else {
*outlen = 0;
*inlen = 0;
- ret = -4;
+ ret = XML_ENC_ERR_INTERNAL;
}
+ /* We shouldn't generate partial sequences when writing. */
+ if (ret == XML_ENC_ERR_PARTIAL)
+ ret = XML_ENC_ERR_INTERNAL;
+
return(ret);
}
/**
* xmlCharEncFirstLine:
- * @handler: char encoding transformation data structure
+ * @handler: char encoding transformation data structure
* @out: an xmlBuffer for the output.
* @in: an xmlBuffer for the input
*
@@ -2138,10 +2125,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
*
* Generic front-end for the encoding handler on parser input
*
- * Returns the number of byte written if success, or
- * -1 general error
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
int
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
@@ -2156,7 +2140,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
if ((input == NULL) || (input->encoder == NULL) ||
(input->buffer == NULL) || (input->raw == NULL))
- return (-1);
+ return(XML_ENC_ERR_INTERNAL);
out = input->buffer;
in = input->raw;
@@ -2168,7 +2152,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
written = xmlBufAvail(out);
if (toconv * 2 >= written) {
if (xmlBufGrow(out, toconv * 2) < 0)
- return (-1);
+ return(XML_ENC_ERR_MEMORY);
written = xmlBufAvail(out);
}
if ((written > 128 * 1024) && (flush == 0))
@@ -2180,49 +2164,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
xmlBufContent(in), &c_in, flush);
xmlBufShrink(in, c_in);
xmlBufAddLen(out, c_out);
- if (ret == -1)
- ret = -3;
- switch (ret) {
- case 0:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input\n",
- c_in, c_out);
-#endif
- break;
- case -1:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input, %d left\n",
- c_in, c_out, (int)xmlBufUse(in));
-#endif
- break;
- case -3:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input, %d left\n",
- c_in, c_out, (int)xmlBufUse(in));
-#endif
- break;
- case -2: {
- char buf[50];
- const xmlChar *content = xmlBufContent(in);
-
- snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
- content[0], content[1],
- content[2], content[3]);
- buf[49] = 0;
- xmlEncodingErr(XML_I18N_CONV_FAILED,
- "input conversion failed due to input error, bytes %s\n",
- buf);
- }
- }
- /*
- * Ignore when input buffer is not on a boundary
- */
- if (ret == -3)
- ret = 0;
return (c_out? c_out : ret);
}
@@ -2234,10 +2176,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
*
* Generic front-end for the encoding handler input function
*
- * Returns the number of byte written if success, or
- * -1 general error
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
int
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
@@ -2248,11 +2187,11 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
int toconv;
if (handler == NULL)
- return (-1);
+ return(XML_ENC_ERR_INTERNAL);
if (out == NULL)
- return (-1);
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL)
- return (-1);
+ return(XML_ENC_ERR_INTERNAL);
toconv = in->use;
if (toconv == 0)
@@ -2267,48 +2206,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
- if (ret == -1)
- ret = -3;
- switch (ret) {
- case 0:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input\n",
- toconv, written);
-#endif
- break;
- case -1:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input, %d left\n",
- toconv, written, in->use);
-#endif
- break;
- case -3:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of input, %d left\n",
- toconv, written, in->use);
-#endif
- break;
- case -2: {
- char buf[50];
-
- snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
- buf[49] = 0;
- xmlEncodingErr(XML_I18N_CONV_FAILED,
- "input conversion failed due to input error, bytes %s\n",
- buf);
- }
- }
- /*
- * Ignore when input buffer is not on a boundary
- */
- if (ret == -3)
- ret = 0;
return (written? written : ret);
}
@@ -2325,10 +2223,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
* In case of UTF8 sequence conversion errors for the given encoder,
* the content will be automatically remapped to a CharRef sequence.
*
- * Returns the number of byte written if success, or
- * -1 general error
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
int
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
@@ -2344,7 +2239,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
if ((output == NULL) || (output->encoder == NULL) ||
(output->buffer == NULL) || (output->conv == NULL))
- return (-1);
+ return(XML_ENC_ERR_INTERNAL);
out = output->conv;
in = output->buffer;
@@ -2391,92 +2286,50 @@ retry:
xmlBufShrink(in, c_in);
xmlBufAddLen(out, c_out);
writtentot += c_out;
- if (ret == -1) {
- if (c_out > 0) {
- /* Can be a limitation of iconv or uconv */
- goto retry;
- }
- ret = -3;
- }
+
+ if (ret == XML_ENC_ERR_SPACE)
+ goto retry;
/*
* Attempt to handle error cases
*/
- switch (ret) {
- case 0:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of output\n",
- c_in, c_out);
-#endif
- break;
- case -1:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "output conversion failed by lack of space\n");
-#endif
- break;
- case -3:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
- c_in, c_out, (int) xmlBufUse(in));
-#endif
- break;
- case -4:
- xmlEncodingErr(XML_I18N_NO_OUTPUT,
- "xmlCharEncOutFunc: no output function !\n", NULL);
- ret = -1;
- break;
- case -2: {
- xmlChar charref[20];
- int len = xmlBufUse(in);
- xmlChar *content = xmlBufContent(in);
- int cur, charrefLen;
-
- cur = xmlGetUTF8Char(content, &len);
- if (cur <= 0)
- break;
+ if (ret == XML_ENC_ERR_INPUT) {
+ xmlChar charref[20];
+ int len = xmlBufUse(in);
+ xmlChar *content = xmlBufContent(in);
+ int cur, charrefLen;
+
+ cur = xmlGetUTF8Char(content, &len);
+ if (cur <= 0)
+ return(ret);
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- content[0], content[1],
- content[2], content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ content[0], content[1],
+ content[2], content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charrefLen = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufShrink(in, len);
- xmlBufGrow(out, charrefLen * 4);
- c_out = xmlBufAvail(out);
- c_in = charrefLen;
- ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
- charref, &c_in);
-
- if ((ret < 0) || (c_in != charrefLen)) {
- char buf[50];
-
- snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
- content[0], content[1],
- content[2], content[3]);
- buf[49] = 0;
- xmlEncodingErr(XML_I18N_CONV_FAILED,
- "output conversion failed due to conv error, bytes %s\n",
- buf);
- content[0] = ' ';
- break;
- }
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufShrink(in, len);
+ xmlBufGrow(out, charrefLen * 4);
+ c_out = xmlBufAvail(out);
+ c_in = charrefLen;
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ charref, &c_in);
+ if ((ret < 0) || (c_in != charrefLen))
+ return(XML_ENC_ERR_INTERNAL);
- xmlBufAddLen(out, c_out);
- writtentot += c_out;
- goto retry;
- }
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ goto retry;
}
return(writtentot ? writtentot : ret);
}
@@ -2495,10 +2348,7 @@ retry:
* In case of UTF8 sequence conversion errors for the given encoder,
* the content will be automatically remapped to a CharRef sequence.
*
- * Returns the number of byte written if success, or
- * -1 general error
- * -2 if the transcoding fails (for *in is not valid utf8 string or
- * the result of transformation can't fit into the encoding we want), or
+ * Returns the number of bytes written or an XML_ENC_ERR code.
*/
int
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
@@ -2508,8 +2358,8 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
int writtentot = 0;
int toconv;
- if (handler == NULL) return(-1);
- if (out == NULL) return(-1);
+ if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
+ if (out == NULL) return(XML_ENC_ERR_INTERNAL);
retry:
@@ -2551,93 +2401,51 @@ retry:
out->use += written;
writtentot += written;
out->content[out->use] = 0;
- if (ret == -1) {
- if (written > 0) {
- /* Can be a limitation of iconv or uconv */
- goto retry;
- }
- ret = -3;
- }
+
+ if (ret == XML_ENC_ERR_SPACE)
+ goto retry;
/*
* Attempt to handle error cases
*/
- switch (ret) {
- case 0:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "converted %d bytes to %d bytes of output\n",
- toconv, written);
-#endif
- break;
- case -1:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "output conversion failed by lack of space\n");
-#endif
- break;
- case -3:
-#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
- toconv, written, in->use);
-#endif
- break;
- case -4:
- xmlEncodingErr(XML_I18N_NO_OUTPUT,
- "xmlCharEncOutFunc: no output function !\n", NULL);
- ret = -1;
- break;
- case -2: {
- xmlChar charref[20];
- int len = in->use;
- const xmlChar *utf = (const xmlChar *) in->content;
- int cur, charrefLen;
-
- cur = xmlGetUTF8Char(utf, &len);
- if (cur <= 0)
- break;
+ if (ret == XML_ENC_ERR_INPUT) {
+ xmlChar charref[20];
+ int len = in->use;
+ const xmlChar *utf = (const xmlChar *) in->content;
+ int cur, charrefLen;
+
+ cur = xmlGetUTF8Char(utf, &len);
+ if (cur <= 0)
+ return(ret);
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ in->content[0], in->content[1],
+ in->content[2], in->content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charrefLen = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufferShrink(in, len);
- xmlBufferGrow(out, charrefLen * 4);
- written = out->size - out->use - 1;
- toconv = charrefLen;
- ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
- charref, &toconv);
-
- if ((ret < 0) || (toconv != charrefLen)) {
- char buf[50];
-
- snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
- buf[49] = 0;
- xmlEncodingErr(XML_I18N_CONV_FAILED,
- "output conversion failed due to conv error, bytes %s\n",
- buf);
- in->content[0] = ' ';
- break;
- }
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufferShrink(in, len);
+ xmlBufferGrow(out, charrefLen * 4);
+ written = out->size - out->use - 1;
+ toconv = charrefLen;
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ charref, &toconv);
+ if ((ret < 0) || (toconv != charrefLen))
+ return(XML_ENC_ERR_INTERNAL);
- out->use += written;
- writtentot += written;
- out->content[out->use] = 0;
- goto retry;
- }
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ goto retry;
}
return(writtentot ? writtentot : ret);
}
@@ -2761,15 +2569,11 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
written = 32000;
ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
cur, &toconv);
- if (ret < 0) {
- if (written > 0)
- ret = -2;
- else
- return(-1);
- }
+ if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
+ return(-1);
unused += written;
cur += toconv;
- } while (ret == -2);
+ } while (ret == XML_ENC_ERR_SPACE);
}
if (in->buf->rawconsumed < unused)
return(-1);
@@ -2792,9 +2596,10 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
* Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
* block of chars out.
*
- * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
- * as the return value is positive, else unpredictable.
+ * as the return value is positive, else unpredictable.
* The value of @outlen after return is the number of octets consumed.
*/
static int
@@ -2808,7 +2613,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
(xlattable == NULL))
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
if (in == NULL) {
/*
* initialization nothing to do
@@ -2826,21 +2631,21 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
/* trailing byte in leading position */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
} else if (d < 0xE0) {
unsigned char c;
if (!(in < inend)) {
/* trailing byte not in input buffer */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-3);
+ return(XML_ENC_ERR_PARTIAL);
}
c = *in++;
if ((c & 0xC0) != 0x80) {
/* not a trailing byte */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
c = c & 0x3F;
d = d & 0x1F;
@@ -2849,7 +2654,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
/* not in character set */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
*out++ = d;
} else if (d < 0xF0) {
@@ -2859,21 +2664,21 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
/* trailing bytes not in input buffer */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-3);
+ return(XML_ENC_ERR_PARTIAL);
}
c1 = *in++;
if ((c1 & 0xC0) != 0x80) {
/* not a trailing byte (c1) */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
c2 = *in++;
if ((c2 & 0xC0) != 0x80) {
/* not a trailing byte (c2) */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
c1 = c1 & 0x3F;
c2 = c2 & 0x3F;
@@ -2884,14 +2689,14 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
/* not in character set */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
*out++ = d;
} else {
/* cannot transcode >= U+010000 */
*outlen = out - outstart;
*inlen = processed - instart;
- return(-2);
+ return(XML_ENC_ERR_INPUT);
}
processed = in;
}
@@ -2909,7 +2714,9 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
*
* Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
* block of chars out.
- * Returns 0 if success, or -1 otherwise
+ *
+ * Returns the number of bytes written or an XML_ENC_ERR code.
+ *
* The value of @inlen after return is the number of octets consumed
* The value of @outlen after return is the number of octets produced.
*/
@@ -2926,7 +2733,7 @@ ISO8859xToUTF8(unsigned char* out, int *outlen,
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
(in == NULL) || (unicodetable == NULL))
- return(-1);
+ return(XML_ENC_ERR_INTERNAL);
outend = out + *outlen;
inend = in + *inlen;
instop = inend;
@@ -2938,7 +2745,7 @@ ISO8859xToUTF8(unsigned char* out, int *outlen,
/* undefined code point */
*outlen = out - outstart;
*inlen = in - instart;
- return (-1);
+ return(XML_ENC_ERR_INPUT);
}
if (c < 0x800) {
*out++ = ((c >> 6) & 0x1F) | 0xC0;
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 67add3b0..91fe87d7 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -32,6 +32,15 @@
extern "C" {
#endif
+typedef enum {
+ XML_ENC_ERR_SUCCESS = 0,
+ XML_ENC_ERR_SPACE = -1,
+ XML_ENC_ERR_INPUT = -2,
+ XML_ENC_ERR_PARTIAL = -3,
+ XML_ENC_ERR_INTERNAL = -4,
+ XML_ENC_ERR_MEMORY = -5
+} xmlCharEncError;
+
/*
* xmlCharEncoding:
*