From 9f4b1e9c50da83b51a4b0c7ee7d7dc3ef94a0cf6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Nov 2011 20:56:30 +0100 Subject: Fix and deprecated the unicode_internal codec unicode_internal codec uses Py_UNICODE instead of the real internal representation (PEP 393: Py_UCS1, Py_UCS2 or Py_UCS4) for backward compatibility. --- Objects/unicodeobject.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 61534b48d5..3f580b5ff6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6237,6 +6237,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "unicode_internal codecs has been deprecated", + 1)) + return NULL; + /* XXX overflow detection missing */ v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127); if (v == NULL) @@ -6270,15 +6275,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, errors, &errorHandler, "unicode_internal", reason, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { + &v, &outpos)) goto onError; - } + continue; } - else { - if (unicode_putchar(&v, &outpos, ch) < 0) - goto onError; - s += Py_UNICODE_SIZE; + + s += Py_UNICODE_SIZE; +#ifndef Py_UNICODE_WIDE + if (ch >= 0xD800 && ch <= 0xDBFF && s < end) + { + Py_UCS4 ch2 = *(Py_UNICODE*)s; + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) + { + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; + s += Py_UNICODE_SIZE; + } } +#endif + + if (unicode_putchar(&v, &outpos, ch) < 0) + goto onError; } if (PyUnicode_Resize(&v, outpos) < 0) -- cgit v1.2.1