From c72e4e6dccce99bcdcb45959767436d7e5cfda8c Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Fri, 22 Nov 2013 22:39:36 +1000 Subject: Issue #19619: Blacklist non-text codecs in method API str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings. --- Python/codecs.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 122 insertions(+), 16 deletions(-) (limited to 'Python/codecs.c') diff --git a/Python/codecs.c b/Python/codecs.c index 8fe0af7bf0..5ff41b57df 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -353,18 +353,15 @@ wrap_codec_error(const char *operation, errors is passed to the encoder factory as argument if non-NULL. */ -PyObject *PyCodec_Encode(PyObject *object, - const char *encoding, - const char *errors) +static PyObject * +_PyCodec_EncodeInternal(PyObject *object, + PyObject *encoder, + const char *encoding, + const char *errors) { - PyObject *encoder = NULL; PyObject *args = NULL, *result = NULL; PyObject *v = NULL; - encoder = PyCodec_Encoder(encoding); - if (encoder == NULL) - goto onError; - args = args_tuple(object, errors); if (args == NULL) goto onError; @@ -402,18 +399,15 @@ PyObject *PyCodec_Encode(PyObject *object, errors is passed to the decoder factory as argument if non-NULL. */ -PyObject *PyCodec_Decode(PyObject *object, - const char *encoding, - const char *errors) +static PyObject * +_PyCodec_DecodeInternal(PyObject *object, + PyObject *decoder, + const char *encoding, + const char *errors) { - PyObject *decoder = NULL; PyObject *args = NULL, *result = NULL; PyObject *v; - decoder = PyCodec_Decoder(encoding); - if (decoder == NULL) - goto onError; - args = args_tuple(object, errors); if (args == NULL) goto onError; @@ -445,6 +439,118 @@ PyObject *PyCodec_Decode(PyObject *object, return NULL; } +/* Generic encoding/decoding API */ +PyObject *PyCodec_Encode(PyObject *object, + const char *encoding, + const char *errors) +{ + PyObject *encoder; + + encoder = PyCodec_Encoder(encoding); + if (encoder == NULL) + return NULL; + + return _PyCodec_EncodeInternal(object, encoder, encoding, errors); +} + +PyObject *PyCodec_Decode(PyObject *object, + const char *encoding, + const char *errors) +{ + PyObject *decoder; + + decoder = PyCodec_Decoder(encoding); + if (decoder == NULL) + return NULL; + + return _PyCodec_DecodeInternal(object, decoder, encoding, errors); +} + +/* Text encoding/decoding API */ +static +PyObject *codec_getitem_checked(const char *encoding, + const char *operation_name, + int index) +{ + _Py_IDENTIFIER(_is_text_encoding); + PyObject *codec; + PyObject *attr; + PyObject *v; + int is_text_codec; + + codec = _PyCodec_Lookup(encoding); + if (codec == NULL) + return NULL; + + /* Backwards compatibility: assume any raw tuple describes a text + * encoding, and the same for anything lacking the private + * attribute. + */ + if (!PyTuple_CheckExact(codec)) { + attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding); + if (attr == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + Py_DECREF(codec); + return NULL; + } + } else { + is_text_codec = PyObject_IsTrue(attr); + Py_DECREF(attr); + if (!is_text_codec) { + Py_DECREF(codec); + PyErr_Format(PyExc_LookupError, + "'%.400s' is not a text encoding; " + "use codecs.%s() to handle arbitrary codecs", + encoding, operation_name); + return NULL; + } + } + } + + v = PyTuple_GET_ITEM(codec, index); + Py_DECREF(codec); + Py_INCREF(v); + return v; +} + +static PyObject * _PyCodec_TextEncoder(const char *encoding) +{ + return codec_getitem_checked(encoding, "encode", 0); +} + +static PyObject * _PyCodec_TextDecoder(const char *encoding) +{ + return codec_getitem_checked(encoding, "decode", 1); +} + +PyObject *_PyCodec_EncodeText(PyObject *object, + const char *encoding, + const char *errors) +{ + PyObject *encoder; + + encoder = _PyCodec_TextEncoder(encoding); + if (encoder == NULL) + return NULL; + + return _PyCodec_EncodeInternal(object, encoder, encoding, errors); +} + +PyObject *_PyCodec_DecodeText(PyObject *object, + const char *encoding, + const char *errors) +{ + PyObject *decoder; + + decoder = _PyCodec_TextDecoder(encoding); + if (decoder == NULL) + return NULL; + + return _PyCodec_DecodeInternal(object, decoder, encoding, errors); +} + /* Register the error handling callback function error under the name name. This function will be called by the codec when it encounters an unencodable characters/undecodable bytes and doesn't know the -- cgit v1.2.1