diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9ccd06eebb..a246756d94 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4429,7 +4429,10 @@ unicode_decode_call_errorhandler_writer( Py_ssize_t insize; Py_ssize_t newpos; Py_ssize_t replen; + Py_ssize_t remain; PyObject *inputobj = NULL; + int need_to_grow = 0; + const char *new_inptr; if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); @@ -4463,6 +4466,7 @@ unicode_decode_call_errorhandler_writer( if (!PyBytes_Check(inputobj)) { PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); } + remain = *inend - *input - *endinpos; *input = PyBytes_AS_STRING(inputobj); insize = PyBytes_GET_SIZE(inputobj); *inend = *input + insize; @@ -4482,6 +4486,19 @@ unicode_decode_call_errorhandler_writer( replen = PyUnicode_GET_LENGTH(repunicode); if (replen > 1) { writer->min_length += replen - 1; + need_to_grow = 1; + } + new_inptr = *input + newpos; + if (*inend - new_inptr > remain) { + /* We don't know the decoding algorithm here so we make the worst + assumption that one byte decodes to one unicode character. + If unfortunately one byte could decode to more unicode characters, + the decoder may write out-of-bound then. Is it possible for the + algorithms using this function? */ + writer->min_length += *inend - new_inptr - remain; + need_to_grow = 1; + } + if (need_to_grow) { writer->overallocate = 1; if (_PyUnicodeWriter_Prepare(writer, writer->min_length, PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1) @@ -4491,7 +4508,7 @@ unicode_decode_call_errorhandler_writer( goto onError; *endinpos = newpos; - *inptr = *input + newpos; + *inptr = new_inptr; /* we made it! */ Py_XDECREF(restuple); @@ -5663,7 +5680,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, #endif /* Note: size will always be longer than the resulting Unicode - character count */ + character count normally. Error handler will take care of + resizing when needed. */ _PyUnicodeWriter_Init(&writer); writer.min_length = (e - q + 1) / 2; if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) |