diff options
| author | Nick Coghlan <ncoghlan@gmail.com> | 2013-11-22 22:39:36 +1000 | 
|---|---|---|
| committer | Nick Coghlan <ncoghlan@gmail.com> | 2013-11-22 22:39:36 +1000 | 
| commit | c72e4e6dccce99bcdcb45959767436d7e5cfda8c (patch) | |
| tree | 029832d80cc82a039dc1014302c9eb9dd2214543 /Python/codecs.c | |
| parent | 322f5ba0d8d5e8a9cd2a134fa215884b4cbc373d (diff) | |
| download | cpython-git-c72e4e6dccce99bcdcb45959767436d7e5cfda8c.tar.gz | |
Issue #19619: Blacklist non-text codecs in method API
str.encode, bytes.decode and bytearray.decode now use an
internal API to throw LookupError for known non-text encodings,
rather than attempting the encoding or decoding operation and
then throwing a TypeError for an unexpected output type.
The latter mechanism remains in place for third party non-text
encodings.
Diffstat (limited to 'Python/codecs.c')
| -rw-r--r-- | Python/codecs.c | 138 | 
1 files changed, 122 insertions, 16 deletions
| diff --git a/Python/codecs.c b/Python/codecs.c index 8fe0af7bf0..5ff41b57df 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -353,18 +353,15 @@ wrap_codec_error(const char *operation,     errors is passed to the encoder factory as argument if non-NULL. */ -PyObject *PyCodec_Encode(PyObject *object, -                         const char *encoding, -                         const char *errors) +static PyObject * +_PyCodec_EncodeInternal(PyObject *object, +                        PyObject *encoder, +                        const char *encoding, +                        const char *errors)  { -    PyObject *encoder = NULL;      PyObject *args = NULL, *result = NULL;      PyObject *v = NULL; -    encoder = PyCodec_Encoder(encoding); -    if (encoder == NULL) -        goto onError; -      args = args_tuple(object, errors);      if (args == NULL)          goto onError; @@ -402,18 +399,15 @@ PyObject *PyCodec_Encode(PyObject *object,     errors is passed to the decoder factory as argument if non-NULL. */ -PyObject *PyCodec_Decode(PyObject *object, -                         const char *encoding, -                         const char *errors) +static PyObject * +_PyCodec_DecodeInternal(PyObject *object, +                        PyObject *decoder, +                        const char *encoding, +                        const char *errors)  { -    PyObject *decoder = NULL;      PyObject *args = NULL, *result = NULL;      PyObject *v; -    decoder = PyCodec_Decoder(encoding); -    if (decoder == NULL) -        goto onError; -      args = args_tuple(object, errors);      if (args == NULL)          goto onError; @@ -445,6 +439,118 @@ PyObject *PyCodec_Decode(PyObject *object,      return NULL;  } +/* Generic encoding/decoding API */ +PyObject *PyCodec_Encode(PyObject *object, +                         const char *encoding, +                         const char *errors) +{ +    PyObject *encoder; + +    encoder = PyCodec_Encoder(encoding); +    if (encoder == NULL) +        return NULL; + +    return _PyCodec_EncodeInternal(object, encoder, encoding, errors); +} + +PyObject *PyCodec_Decode(PyObject *object, +                         const char *encoding, +                         const char *errors) +{ +    PyObject *decoder; + +    decoder = PyCodec_Decoder(encoding); +    if (decoder == NULL) +        return NULL; + +    return _PyCodec_DecodeInternal(object, decoder, encoding, errors); +} + +/* Text encoding/decoding API */ +static +PyObject *codec_getitem_checked(const char *encoding, +                                const char *operation_name, +                                int index) +{ +    _Py_IDENTIFIER(_is_text_encoding); +    PyObject *codec; +    PyObject *attr; +    PyObject *v; +    int is_text_codec; + +    codec = _PyCodec_Lookup(encoding); +    if (codec == NULL) +        return NULL; + +    /* Backwards compatibility: assume any raw tuple describes a text +     * encoding, and the same for anything lacking the private +     * attribute. +     */ +    if (!PyTuple_CheckExact(codec)) { +        attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding); +        if (attr == NULL) { +            if (PyErr_ExceptionMatches(PyExc_AttributeError)) { +                PyErr_Clear(); +            } else { +                Py_DECREF(codec); +                return NULL; +            } +        } else { +            is_text_codec = PyObject_IsTrue(attr); +            Py_DECREF(attr); +            if (!is_text_codec) { +                Py_DECREF(codec); +                PyErr_Format(PyExc_LookupError, +                             "'%.400s' is not a text encoding; " +                             "use codecs.%s() to handle arbitrary codecs", +                             encoding, operation_name); +                return NULL; +            } +        } +    } + +    v = PyTuple_GET_ITEM(codec, index); +    Py_DECREF(codec); +    Py_INCREF(v); +    return v; +} + +static PyObject * _PyCodec_TextEncoder(const char *encoding) +{ +    return codec_getitem_checked(encoding, "encode", 0); +} + +static PyObject * _PyCodec_TextDecoder(const char *encoding) +{ +    return codec_getitem_checked(encoding, "decode", 1); +} + +PyObject *_PyCodec_EncodeText(PyObject *object, +                              const char *encoding, +                              const char *errors) +{ +    PyObject *encoder; + +    encoder = _PyCodec_TextEncoder(encoding); +    if (encoder == NULL) +        return NULL; + +    return _PyCodec_EncodeInternal(object, encoder, encoding, errors); +} + +PyObject *_PyCodec_DecodeText(PyObject *object, +                              const char *encoding, +                              const char *errors) +{ +    PyObject *decoder; + +    decoder = _PyCodec_TextDecoder(encoding); +    if (decoder == NULL) +        return NULL; + +    return _PyCodec_DecodeInternal(object, decoder, encoding, errors); +} +  /* Register the error handling callback function error under the name     name. This function will be called by the codec when it encounters     an unencodable characters/undecodable bytes and doesn't know the | 
