diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 750 |
1 files changed, 609 insertions, 141 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f56cf7db0..9d11546cb3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -429,6 +429,10 @@ _PyUnicode_CheckConsistency(void *op, int check_content) } #endif +#ifdef HAVE_MBCS +static OSVERSIONINFOEX winver; +#endif + /* --- Bloom Filters ----------------------------------------------------- */ /* stuff to implement simple "bloom filters" for Unicode characters. @@ -6896,130 +6900,307 @@ PyUnicode_AsASCIIString(PyObject *unicode) #define NEED_RETRY #endif -/* XXX This code is limited to "true" double-byte encodings, as - a) it assumes an incomplete character consists of a single byte, and - b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte - encodings, see IsDBCSLeadByteEx documentation. */ +#ifndef WC_ERR_INVALID_CHARS +# define WC_ERR_INVALID_CHARS 0x0080 +#endif + +static char* +code_page_name(UINT code_page, PyObject **obj) +{ + *obj = NULL; + if (code_page == CP_ACP) + return "mbcs"; + if (code_page == CP_UTF7) + return "CP_UTF7"; + if (code_page == CP_UTF8) + return "CP_UTF8"; + + *obj = PyBytes_FromFormat("cp%u", code_page); + if (*obj == NULL) + return NULL; + return PyBytes_AS_STRING(*obj); +} static int -is_dbcs_lead_byte(const char *s, int offset) +is_dbcs_lead_byte(UINT code_page, const char *s, int offset) { const char *curr = s + offset; + const char *prev; - if (IsDBCSLeadByte(*curr)) { - const char *prev = CharPrev(s, curr); - return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2); - } + if (!IsDBCSLeadByteEx(code_page, *curr)) + return 0; + + prev = CharPrevExA(code_page, s, curr, 0); + if (prev == curr) + return 1; + /* FIXME: This code is limited to "true" double-byte encodings, + as it assumes an incomplete character consists of a single + byte. */ + if (curr - prev == 2) + return 1; + if (!IsDBCSLeadByteEx(code_page, *prev)) + return 1; return 0; } +static DWORD +decode_code_page_flags(UINT code_page) +{ + if (code_page == CP_UTF7) { + /* The CP_UTF7 decoder only supports flags=0 */ + return 0; + } + else + return MB_ERR_INVALID_CHARS; +} + /* - * Decode MBCS string into unicode object. If 'final' is set, converts - * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise. + * Decode a byte string from a Windows code page into unicode object in strict + * mode. + * + * Returns consumed size if succeed, returns -2 on decode error, or raise a + * WindowsError and returns -1 on other error. */ static int -decode_mbcs(PyUnicodeObject **v, - const char *s, /* MBCS string */ - int size, /* sizeof MBCS string */ - int final, - const char *errors) +decode_code_page_strict(UINT code_page, + PyUnicodeObject **v, + const char *in, + int insize) { - Py_UNICODE *p; - Py_ssize_t n; - DWORD usize; - DWORD flags; + const DWORD flags = decode_code_page_flags(code_page); + Py_UNICODE *out; + DWORD outsize; - assert(size >= 0); + /* First get the size of the result */ + assert(insize > 0); + outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0); + if (outsize <= 0) + goto error; - /* check and handle 'errors' arg */ - if (errors==NULL || strcmp(errors, "strict")==0) - flags = MB_ERR_INVALID_CHARS; - else if (strcmp(errors, "ignore")==0) - flags = 0; + if (*v == NULL) { + /* Create unicode object */ + *v = _PyUnicode_New(outsize); + if (*v == NULL) + return -1; + out = PyUnicode_AS_UNICODE(*v); + } else { - PyErr_Format(PyExc_ValueError, - "mbcs encoding does not support errors='%s'", - errors); - return -1; + /* Extend unicode object */ + Py_ssize_t n = PyUnicode_GET_SIZE(*v); + if (PyUnicode_Resize((PyObject**)v, n + outsize) < 0) + return -1; + out = PyUnicode_AS_UNICODE(*v) + n; } - /* Skip trailing lead-byte unless 'final' is set */ - if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) - --size; + /* Do the conversion */ + outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize); + if (outsize <= 0) + goto error; + return insize; - /* First get the size of the result */ - if (size > 0) { - usize = MultiByteToWideChar(CP_ACP, flags, s, size, NULL, 0); - if (usize==0) - goto mbcs_decode_error; - } else - usize = 0; +error: + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) + return -2; + PyErr_SetFromWindowsErr(0); + return -1; +} + +/* + * Decode a byte string from a code page into unicode object with an error + * handler. + * + * Returns consumed size if succeed, or raise a WindowsError or + * UnicodeDecodeError exception and returns -1 on error. + */ +static int +decode_code_page_errors(UINT code_page, + PyUnicodeObject **v, + const char *in, + int size, + const char *errors) +{ + const char *startin = in; + const char *endin = in + size; + const DWORD flags = decode_code_page_flags(code_page); + /* Ideally, we should get reason from FormatMessage. This is the Windows + 2000 English version of the message. */ + const char *reason = "No mapping for the Unicode character exists " + "in the target code page."; + /* each step cannot decode more than 1 character, but a character can be + represented as a surrogate pair */ + wchar_t buffer[2], *startout, *out; + int insize, outsize; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *encoding_obj = NULL; + char *encoding; + DWORD err; + int ret = -1; + + assert(size > 0); + + encoding = code_page_name(code_page, &encoding_obj); + if (encoding == NULL) + return -1; + + if (errors == NULL || strcmp(errors, "strict") == 0) { + /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a + UnicodeDecodeError. */ + make_decode_exception(&exc, encoding, in, size, 0, 0, reason); + if (exc != NULL) { + PyCodec_StrictErrors(exc); + Py_CLEAR(exc); + } + goto error; + } if (*v == NULL) { /* Create unicode object */ - *v = _PyUnicode_New(usize); + if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + PyErr_NoMemory(); + goto error; + } + *v = _PyUnicode_New(size * Py_ARRAY_LENGTH(buffer)); if (*v == NULL) - return -1; - n = 0; + goto error; + startout = PyUnicode_AS_UNICODE(*v); } else { /* Extend unicode object */ - n = PyUnicode_GET_SIZE(*v); - if (PyUnicode_Resize((PyObject**)v, n + usize) < 0) - return -1; + Py_ssize_t n = PyUnicode_GET_SIZE(*v); + if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + PyErr_NoMemory(); + goto error; + } + if (PyUnicode_Resize((PyObject**)v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) + goto error; + startout = PyUnicode_AS_UNICODE(*v) + n; } - /* Do the conversion */ - if (usize > 0) { - p = PyUnicode_AS_UNICODE(*v) + n; - if (0 == MultiByteToWideChar(CP_ACP, flags, s, size, p, usize)) { - goto mbcs_decode_error; + /* Decode the byte string character per character */ + out = startout; + while (in < endin) + { + /* Decode a character */ + insize = 1; + do + { + outsize = MultiByteToWideChar(code_page, flags, + in, insize, + buffer, Py_ARRAY_LENGTH(buffer)); + if (outsize > 0) + break; + err = GetLastError(); + if (err != ERROR_NO_UNICODE_TRANSLATION + && err != ERROR_INSUFFICIENT_BUFFER) + { + PyErr_SetFromWindowsErr(0); + goto error; + } + insize++; + } + /* 4=maximum length of a UTF-8 sequence */ + while (insize <= 4 && (in + insize) <= endin); + + if (outsize <= 0) { + Py_ssize_t startinpos, endinpos, outpos; + + startinpos = in - startin; + endinpos = startinpos + 1; + outpos = out - PyUnicode_AS_UNICODE(*v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + encoding, reason, + &startin, &endin, &startinpos, &endinpos, &exc, &in, + v, &outpos, &out)) + { + goto error; + } + } + else { + in += insize; + memcpy(out, buffer, outsize * sizeof(wchar_t)); + out += outsize; } } - return size; -mbcs_decode_error: - /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then - we raise a UnicodeDecodeError - else it is a 'generic' - windows error - */ - if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) { - /* Ideally, we should get reason from FormatMessage - this - is the Windows 2000 English version of the message - */ - PyObject *exc = NULL; - const char *reason = "No mapping for the Unicode character exists " - "in the target multi-byte code page."; - make_decode_exception(&exc, "mbcs", s, size, 0, 0, reason); - if (exc != NULL) { - PyCodec_StrictErrors(exc); - Py_DECREF(exc); + /* write a NUL character at the end */ + *out = 0; + + /* Extend unicode object */ + outsize = out - startout; + assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); + if (PyUnicode_Resize((PyObject**)v, outsize) < 0) + goto error; + ret = 0; + +error: + Py_XDECREF(encoding_obj); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return ret; +} + +/* + * Decode a byte string from a Windows code page into unicode object. If + * 'final' is set, converts trailing lead-byte too. + * + * Returns consumed size if succeed, or raise a WindowsError or + * UnicodeDecodeError exception and returns -1 on error. + */ +static int +decode_code_page(UINT code_page, + PyUnicodeObject **v, + const char *s, int size, + int final, const char *errors) +{ + int done; + + /* Skip trailing lead-byte unless 'final' is set */ + if (size == 0) { + if (*v == NULL) { + Py_INCREF(unicode_empty); + *v = (PyUnicodeObject*)unicode_empty; + if (*v == NULL) + return -1; } - } else { - PyErr_SetFromWindowsErrWithFilename(0, NULL); + return 0; } - return -1; + + if (!final && is_dbcs_lead_byte(code_page, s, size - 1)) + --size; + + done = decode_code_page_strict(code_page, v, s, size); + if (done == -2) + done = decode_code_page_errors(code_page, v, s, size, errors); + return done; } -PyObject * -PyUnicode_DecodeMBCSStateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) +static PyObject * +decode_code_page_stateful(int code_page, + const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) { PyUnicodeObject *v = NULL; int done; + if (code_page < 0) { + PyErr_SetString(PyExc_ValueError, "invalid code page number"); + return NULL; + } + if (consumed) *consumed = 0; #ifdef NEED_RETRY retry: if (size > INT_MAX) - done = decode_mbcs(&v, s, INT_MAX, 0, errors); + done = decode_code_page(code_page, &v, s, INT_MAX, 0, errors); else #endif - done = decode_mbcs(&v, s, (int)size, !consumed, errors); + done = decode_code_page(code_page, &v, s, (int)size, !consumed, errors); if (done < 0) { Py_XDECREF(v); @@ -7036,6 +7217,7 @@ PyUnicode_DecodeMBCSStateful(const char *s, goto retry; } #endif + #ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); @@ -7047,6 +7229,25 @@ PyUnicode_DecodeMBCSStateful(const char *s, } PyObject * +PyUnicode_DecodeCodePageStateful(int code_page, + const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + return decode_code_page_stateful(code_page, s, size, errors, consumed); +} + +PyObject * +PyUnicode_DecodeMBCSStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + return decode_code_page_stateful(CP_ACP, s, size, errors, consumed); +} + +PyObject * PyUnicode_DecodeMBCS(const char *s, Py_ssize_t size, const char *errors) @@ -7054,105 +7255,342 @@ PyUnicode_DecodeMBCS(const char *s, return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); } +static DWORD +encode_code_page_flags(UINT code_page, const char *errors) +{ + if (code_page == CP_UTF8) { + if (winver.dwMajorVersion >= 6) + /* CP_UTF8 supports WC_ERR_INVALID_CHARS on Windows Vista + and later */ + return WC_ERR_INVALID_CHARS; + else + /* CP_UTF8 only supports flags=0 on Windows older than Vista */ + return 0; + } + else if (code_page == CP_UTF7) { + /* CP_UTF7 only supports flags=0 */ + return 0; + } + else { + if (errors != NULL && strcmp(errors, "replace") == 0) + return 0; + else + return WC_NO_BEST_FIT_CHARS; + } +} + /* - * Convert unicode into string object (MBCS). - * Returns 0 if succeed, -1 otherwise. + * Encode a Unicode string to a Windows code page into a byte string in strict + * mode. + * + * Returns consumed characters if succeed, returns -2 on encode error, or raise + * a WindowsError and returns -1 on other error. */ static int -encode_mbcs(PyObject **repr, - const Py_UNICODE *p, /* unicode */ - int size, /* size of unicode */ - const char* errors) +encode_code_page_strict(UINT code_page, PyObject **outbytes, + const Py_UNICODE *p, const int size, + const char* errors) { BOOL usedDefaultChar = FALSE; - BOOL *pusedDefaultChar; - int mbcssize; - Py_ssize_t n; + BOOL *pusedDefaultChar = &usedDefaultChar; + int outsize; PyObject *exc = NULL; - DWORD flags; + const DWORD flags = encode_code_page_flags(code_page, NULL); + char *out; - assert(size >= 0); + assert(size > 0); - /* check and handle 'errors' arg */ - if (errors==NULL || strcmp(errors, "strict")==0) { - flags = WC_NO_BEST_FIT_CHARS; + if (code_page != CP_UTF8 && code_page != CP_UTF7) pusedDefaultChar = &usedDefaultChar; - } else if (strcmp(errors, "replace")==0) { - flags = 0; + else pusedDefaultChar = NULL; - } else { - PyErr_Format(PyExc_ValueError, - "mbcs encoding does not support errors='%s'", - errors); - return -1; - } /* First get the size of the result */ - if (size > 0) { - mbcssize = WideCharToMultiByte(CP_ACP, flags, p, size, NULL, 0, - NULL, pusedDefaultChar); - if (mbcssize == 0) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; - } - /* If we used a default char, then we failed! */ - if (pusedDefaultChar && *pusedDefaultChar) - goto mbcs_encode_error; - } else { - mbcssize = 0; - } + outsize = WideCharToMultiByte(code_page, flags, + p, size, + NULL, 0, + NULL, pusedDefaultChar); + if (outsize <= 0) + goto error; + /* If we used a default char, then we failed! */ + if (pusedDefaultChar && *pusedDefaultChar) + return -2; - if (*repr == NULL) { + if (*outbytes == NULL) { /* Create string object */ - *repr = PyBytes_FromStringAndSize(NULL, mbcssize); - if (*repr == NULL) + *outbytes = PyBytes_FromStringAndSize(NULL, outsize); + if (*outbytes == NULL) return -1; - n = 0; + out = PyBytes_AS_STRING(*outbytes); } else { /* Extend string object */ - n = PyBytes_Size(*repr); - if (_PyBytes_Resize(repr, n + mbcssize) < 0) + const Py_ssize_t n = PyBytes_Size(*outbytes); + if (outsize > PY_SSIZE_T_MAX - n) { + PyErr_NoMemory(); return -1; + } + if (_PyBytes_Resize(outbytes, n + outsize) < 0) + return -1; + out = PyBytes_AS_STRING(*outbytes) + n; } /* Do the conversion */ - if (size > 0) { - char *s = PyBytes_AS_STRING(*repr) + n; - if (0 == WideCharToMultiByte(CP_ACP, flags, p, size, s, mbcssize, - NULL, pusedDefaultChar)) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; + outsize = WideCharToMultiByte(code_page, flags, + p, size, + out, outsize, + NULL, pusedDefaultChar); + if (outsize <= 0) + goto error; + if (pusedDefaultChar && *pusedDefaultChar) + return -2; + return 0; + +error: + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) + return -2; + PyErr_SetFromWindowsErr(0); + return -1; +} + +/* + * Encode a Unicode string to a Windows code page into a byte string using a + * error handler. + * + * Returns consumed characters if succeed, or raise a WindowsError and returns + * -1 on other error. + */ +static int +encode_code_page_errors(UINT code_page, PyObject **outbytes, + const Py_UNICODE *in, const int insize, + const char* errors) +{ + const DWORD flags = encode_code_page_flags(code_page, errors); + const Py_UNICODE *startin = in; + const Py_UNICODE *endin = in + insize; + /* Ideally, we should get reason from FormatMessage. This is the Windows + 2000 English version of the message. */ + const char *reason = "invalid character"; + /* 4=maximum length of a UTF-8 sequence */ + char buffer[4]; + BOOL usedDefaultChar = FALSE, *pusedDefaultChar; + Py_ssize_t outsize; + char *out; + int charsize; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *encoding_obj = NULL; + char *encoding; + int err; + Py_ssize_t startpos, newpos, newoutsize; + PyObject *rep; + int ret = -1; + + assert(insize > 0); + + encoding = code_page_name(code_page, &encoding_obj); + if (encoding == NULL) + return -1; + + if (errors == NULL || strcmp(errors, "strict") == 0) { + /* The last error was ERROR_NO_UNICODE_TRANSLATION, + then we raise a UnicodeEncodeError. */ + make_encode_exception(&exc, encoding, in, insize, 0, 0, reason); + if (exc != NULL) { + PyCodec_StrictErrors(exc); + Py_DECREF(exc); } - if (pusedDefaultChar && *pusedDefaultChar) - goto mbcs_encode_error; + Py_XDECREF(encoding_obj); + return -1; } - return 0; -mbcs_encode_error: - raise_encode_exception(&exc, "mbcs", p, size, 0, 0, "invalid character"); + if (code_page != CP_UTF8 && code_page != CP_UTF7) + pusedDefaultChar = &usedDefaultChar; + else + pusedDefaultChar = NULL; + + if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) { + PyErr_NoMemory(); + goto error; + } + outsize = insize * Py_ARRAY_LENGTH(buffer); + + if (*outbytes == NULL) { + /* Create string object */ + *outbytes = PyBytes_FromStringAndSize(NULL, outsize); + if (*outbytes == NULL) + goto error; + out = PyBytes_AS_STRING(*outbytes); + } + else { + /* Extend string object */ + Py_ssize_t n = PyBytes_Size(*outbytes); + if (n > PY_SSIZE_T_MAX - outsize) { + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(outbytes, n + outsize) < 0) + goto error; + out = PyBytes_AS_STRING(*outbytes) + n; + } + + /* Encode the string character per character */ + while (in < endin) + { + if ((in + 2) <= endin + && 0xD800 <= in[0] && in[0] <= 0xDBFF + && 0xDC00 <= in[1] && in[1] <= 0xDFFF) + charsize = 2; + else + charsize = 1; + + outsize = WideCharToMultiByte(code_page, flags, + in, charsize, + buffer, Py_ARRAY_LENGTH(buffer), + NULL, pusedDefaultChar); + if (outsize > 0) { + if (pusedDefaultChar == NULL || !(*pusedDefaultChar)) + { + in += charsize; + memcpy(out, buffer, outsize); + out += outsize; + continue; + } + } + else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) { + PyErr_SetFromWindowsErr(0); + goto error; + } + + charsize = Py_MAX(charsize - 1, 1); + startpos = in - startin; + rep = unicode_encode_call_errorhandler( + errors, &errorHandler, encoding, reason, + startin, insize, &exc, + startpos, startpos + charsize, &newpos); + if (rep == NULL) + goto error; + in = startin + newpos; + + if (PyBytes_Check(rep)) { + outsize = PyBytes_GET_SIZE(rep); + if (outsize != 1) { + Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); + newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + if (_PyBytes_Resize(outbytes, newoutsize) < 0) { + Py_DECREF(rep); + goto error; + } + out = PyBytes_AS_STRING(*outbytes) + offset; + } + memcpy(out, PyBytes_AS_STRING(rep), outsize); + out += outsize; + } + else { + Py_ssize_t i; + enum PyUnicode_Kind kind; + void *data; + + if (PyUnicode_READY(rep) < 0) { + Py_DECREF(rep); + goto error; + } + + outsize = PyUnicode_GET_LENGTH(rep); + if (outsize != 1) { + Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); + newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + if (_PyBytes_Resize(outbytes, newoutsize) < 0) { + Py_DECREF(rep); + goto error; + } + out = PyBytes_AS_STRING(*outbytes) + offset; + } + kind = PyUnicode_KIND(rep); + data = PyUnicode_DATA(rep); + for (i=0; i < outsize; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch > 127) { + raise_encode_exception(&exc, + encoding, + startin, insize, + startpos, startpos + charsize, + "unable to encode error handler result to ASCII"); + Py_DECREF(rep); + goto error; + } + *out = (unsigned char)ch; + out++; + } + } + Py_DECREF(rep); + } + /* write a NUL byte */ + *out = 0; + outsize = out - PyBytes_AS_STRING(*outbytes); + assert(outsize <= PyBytes_GET_SIZE(*outbytes)); + if (_PyBytes_Resize(outbytes, outsize) < 0) + goto error; + ret = 0; + +error: + Py_XDECREF(encoding_obj); + Py_XDECREF(errorHandler); Py_XDECREF(exc); - return -1; + return ret; } -PyObject * -PyUnicode_EncodeMBCS(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) +/* + * Encode a Unicode string to a Windows code page into a byte string. + * + * Returns consumed characters if succeed, or raise a WindowsError and returns + * -1 on other error. + */ +static int +encode_code_page_chunk(UINT code_page, PyObject **outbytes, + const Py_UNICODE *p, int size, + const char* errors) +{ + int done; + + if (size == 0) { + if (*outbytes == NULL) { + *outbytes = PyBytes_FromStringAndSize(NULL, 0); + if (*outbytes == NULL) + return -1; + } + return 0; + } + + done = encode_code_page_strict(code_page, outbytes, p, size, errors); + if (done == -2) + done = encode_code_page_errors(code_page, outbytes, p, size, errors); + return done; +} + +static PyObject * +encode_code_page(int code_page, + const Py_UNICODE *p, Py_ssize_t size, + const char *errors) { - PyObject *repr = NULL; + PyObject *outbytes = NULL; int ret; + if (code_page < 0) { + PyErr_SetString(PyExc_ValueError, "invalid code page number"); + return NULL; + } + #ifdef NEED_RETRY retry: if (size > INT_MAX) - ret = encode_mbcs(&repr, p, INT_MAX, errors); + ret = encode_code_page_chunk(code_page, &outbytes, p, INT_MAX, errors); else #endif - ret = encode_mbcs(&repr, p, (int)size, errors); + ret = encode_code_page_chunk(code_page, &outbytes, p, (int)size, errors); if (ret < 0) { - Py_XDECREF(repr); + Py_XDECREF(outbytes); return NULL; } @@ -7164,7 +7602,28 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p, } #endif - return repr; + return outbytes; +} + +PyObject * +PyUnicode_EncodeMBCS(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + return encode_code_page(CP_ACP, p, size, errors); +} + +PyObject * +PyUnicode_EncodeCodePage(int code_page, + PyObject *unicode, + const char *errors) +{ + const Py_UNICODE *p; + Py_ssize_t size; + p = PyUnicode_AsUnicodeAndSize(unicode, &size); + if (p == NULL) + return NULL; + return encode_code_page(code_page, p, size, errors); } PyObject * @@ -13434,7 +13893,7 @@ PyTypeObject PyUnicode_Type = { /* Initialize the Unicode implementation */ -void _PyUnicode_Init(void) +int _PyUnicode_Init(void) { int i; @@ -13467,6 +13926,15 @@ void _PyUnicode_Init(void) Py_ARRAY_LENGTH(linebreak)); PyType_Ready(&EncodingMapType); + +#ifdef HAVE_MBCS + winver.dwOSVersionInfoSize = sizeof(winver); + if (!GetVersionEx((OSVERSIONINFO*)&winver)) { + PyErr_SetFromWindowsErr(0); + return -1; + } +#endif + return 0; } /* Finalize the Unicode implementation */ |