summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2013-11-23 11:13:36 +1000
committerNick Coghlan <ncoghlan@gmail.com>2013-11-23 11:13:36 +1000
commit9c1aed8f94a2b7a40c3a4db60cb289c90e001896 (patch)
tree563a50fe1083bc0fdba7ea27e0def43f93c91455
parent12820c0d5d6b3ccbd191703d1003794ddb1bcac9 (diff)
downloadcpython-git-9c1aed8f94a2b7a40c3a4db60cb289c90e001896.tar.gz
Close #7475: Restore binary & text transform codecs
The codecs themselves were restored in Python 3.2, this completes the restoration by adding back the convenience aliases. These aliases were originally left out due to confusing errors when attempting to use them with the text encoding specific convenience methods. Python 3.4 includes several improvements to those errors, thus permitting the aliases to be restored as well.
-rw-r--r--Doc/library/codecs.rst116
-rw-r--r--Doc/whatsnew/3.4.rst50
-rw-r--r--Lib/encodings/aliases.py36
-rw-r--r--Lib/test/test_codecs.py20
4 files changed, 142 insertions, 80 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 358fde74b8..ef79918980 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1188,6 +1188,9 @@ common use case for codecs, the underlying codec infrastructure supports
arbitrary data transforms rather than just text encodings). For asymmetric
codecs, the stated purpose describes the encoding direction.
+Text Encodings
+^^^^^^^^^^^^^^
+
The following codecs provide :class:`str` to :class:`bytes` encoding and
:term:`bytes-like object` to :class:`str` decoding, similar to the Unicode text
encodings.
@@ -1234,62 +1237,83 @@ encodings.
| | | .. deprecated:: 3.3 |
+--------------------+---------+---------------------------+
-The following codecs provide :term:`bytes-like object` to :class:`bytes`
-mappings.
-
-
-.. tabularcolumns:: |l|L|L|
-
-+----------------------+------------------------------+------------------------------+
-| Codec | Purpose | Encoder / decoder |
-+======================+==============================+==============================+
-| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode` / |
-| | base64 (the result always | :meth:`base64.b64decode` |
-| | includes a trailing | |
-| | ``'\n'``) | |
-| | | |
-| | .. versionchanged:: 3.4 | |
-| | accepts any | |
-| | :term:`bytes-like object` | |
-| | as input for encoding and | |
-| | decoding | |
-+----------------------+------------------------------+------------------------------+
-| bz2_codec | Compress the operand | :meth:`bz2.compress` / |
-| | using bz2 | :meth:`bz2.decompress` |
-+----------------------+------------------------------+------------------------------+
-| hex_codec | Convert operand to | :meth:`base64.b16encode` / |
-| | hexadecimal | :meth:`base64.b16decode` |
-| | representation, with two | |
-| | digits per byte | |
-+----------------------+------------------------------+------------------------------+
-| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring` /|
-| | quoted printable | :meth:`quopri.decodestring` |
-+----------------------+------------------------------+------------------------------+
-| uu_codec | Convert the operand using | :meth:`uu.encode` / |
-| | uuencode | :meth:`uu.decode` |
-+----------------------+------------------------------+------------------------------+
-| zlib_codec | Compress the operand | :meth:`zlib.compress` / |
-| | using gzip | :meth:`zlib.decompress` |
-+----------------------+------------------------------+------------------------------+
+.. _binary-transforms:
+
+Binary Transforms
+^^^^^^^^^^^^^^^^^
+
+The following codecs provide binary transforms: :term:`bytes-like object`
+to :class:`bytes` mappings.
+
+
+.. tabularcolumns:: |l|L|L|L|
+
++----------------------+------------------+------------------------------+------------------------------+
+| Codec | Aliases | Purpose | Encoder / decoder |
++======================+==================+==============================+==============================+
+| base64_codec [#b64]_ | base64, base_64 | Convert operand to MIME | :meth:`base64.b64encode` / |
+| | | base64 (the result always | :meth:`base64.b64decode` |
+| | | includes a trailing | |
+| | | ``'\n'``) | |
+| | | | |
+| | | .. versionchanged:: 3.4 | |
+| | | accepts any | |
+| | | :term:`bytes-like object` | |
+| | | as input for encoding and | |
+| | | decoding | |
++----------------------+------------------+------------------------------+------------------------------+
+| bz2_codec | bz2 | Compress the operand | :meth:`bz2.compress` / |
+| | | using bz2 | :meth:`bz2.decompress` |
++----------------------+------------------+------------------------------+------------------------------+
+| hex_codec | hex | Convert operand to | :meth:`base64.b16encode` / |
+| | | hexadecimal | :meth:`base64.b16decode` |
+| | | representation, with two | |
+| | | digits per byte | |
++----------------------+------------------+------------------------------+------------------------------+
+| quopri_codec | quopri, | Convert operand to MIME | :meth:`quopri.encodestring` /|
+| | quotedprintable, | quoted printable | :meth:`quopri.decodestring` |
+| | quoted_printable | | |
++----------------------+------------------+------------------------------+------------------------------+
+| uu_codec | uu | Convert the operand using | :meth:`uu.encode` / |
+| | | uuencode | :meth:`uu.decode` |
++----------------------+------------------+------------------------------+------------------------------+
+| zlib_codec | zip, zlib | Compress the operand | :meth:`zlib.compress` / |
+| | | using gzip | :meth:`zlib.decompress` |
++----------------------+------------------+------------------------------+------------------------------+
.. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`,
``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for
decoding
+.. versionadded:: 3.2
+ Restoration of the binary transforms.
-The following codecs provide :class:`str` to :class:`str` mappings.
+.. versionchanged:: 3.4
+ Restoration of the aliases for the binary transforms.
-.. tabularcolumns:: |l|L|
-+--------------------+---------------------------+
-| Codec | Purpose |
-+====================+===========================+
-| rot_13 | Returns the Caesar-cypher |
-| | encryption of the operand |
-+--------------------+---------------------------+
+.. _text-transforms:
+
+Text Transforms
+^^^^^^^^^^^^^^^
+
+The following codec provides a text transform: a :class:`str` to :class:`str`
+mapping.
+
+.. tabularcolumns:: |l|l|L|
+
++--------------------+---------+---------------------------+
+| Codec | Aliases | Purpose |
++====================+=========+===========================+
+| rot_13 | rot13 | Returns the Caesar-cypher |
+| | | encryption of the operand |
++--------------------+---------+---------------------------+
.. versionadded:: 3.2
- bytes-to-bytes and str-to-str codecs.
+ Restoration of the ``rot_13`` text transform.
+
+.. versionchanged:: 3.4
+ Restoration of the ``rot13`` alias.
:mod:`encodings.idna` --- Internationalized Domain Names in Applications
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
index 09d8be87a9..6fc0e482dc 100644
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -103,7 +103,8 @@ New expected features for Python implementations:
* :ref:`PEP 446: Make newly created file descriptors non-inheritable <pep-446>`.
* command line option for :ref:`isolated mode <using-on-misc-options>`,
(:issue:`16499`).
-* improvements to handling of non-Unicode codecs
+* :ref:`improvements <codec-handling-improvements>` in the handling of
+ codecs that are not text encodings
Significantly Improved Library Modules:
@@ -173,8 +174,10 @@ PEP 446: Make newly created file descriptors non-inheritable
PEP written and implemented by Victor Stinner.
-Improvements to handling of non-Unicode codecs
-==============================================
+.. _codec-handling-improvements:
+
+Improvements to codec handling
+==============================
Since it was first introduced, the :mod:`codecs` module has always been
intended to operate as a type-neutral dynamic encoding and decoding
@@ -186,7 +189,7 @@ fact.
As a key step in clarifying the situation, the :meth:`codecs.encode` and
:meth:`codecs.decode` convenience functions are now properly documented in
Python 2.7, 3.3 and 3.4. These functions have existed in the :mod:`codecs`
-module and have been covered by the regression test suite since Python 2.4,
+module (and have been covered by the regression test suite) since Python 2.4,
but were previously only discoverable through runtime introspection.
Unlike the convenience methods on :class:`str`, :class:`bytes` and
@@ -199,43 +202,58 @@ In Python 3.4, the interpreter is able to identify the known non-text
encodings provided in the standard library and direct users towards these
general purpose convenience functions when appropriate::
- >>> import codecs
-
- >>> b"abcdef".decode("hex_codec")
+ >>> b"abcdef".decode("hex")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
- LookupError: 'hex_codec' is not a text encoding; use codecs.decode() to handle arbitrary codecs
+ LookupError: 'hex' is not a text encoding; use codecs.decode() to handle arbitrary codecs
- >>> "hello".encode("rot_13")
+ >>> "hello".encode("rot13")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
- LookupError: 'rot_13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
+ LookupError: 'rot13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
In a related change, whenever it is feasible without breaking backwards
compatibility, exceptions raised during encoding and decoding operations
will be wrapped in a chained exception of the same type that mentions the
name of the codec responsible for producing the error::
- >>> codecs.decode(b"abcdefgh", "hex_codec")
+ >>> import codecs
+
+ >>> codecs.decode(b"abcdefgh", "hex")
binascii.Error: Non-hexadecimal digit found
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
- binascii.Error: decoding with 'hex_codec' codec failed (Error: Non-hexadecimal digit found)
+ binascii.Error: decoding with 'hex' codec failed (Error: Non-hexadecimal digit found)
- >>> codecs.encode("hello", "bz2_codec")
+ >>> codecs.encode("hello", "bz2")
TypeError: 'str' does not support the buffer interface
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
- TypeError: encoding with 'bz2_codec' codec failed (TypeError: 'str' does not support the buffer interface)
+ TypeError: encoding with 'bz2' codec failed (TypeError: 'str' does not support the buffer interface)
+
+Finally, as the examples above show, these improvements have permitted
+the restoration of the convenience aliases for the non-Unicode codecs that
+were themselves restored in Python 3.2. This means that encoding binary data
+to and from its hexadecimal representation (for example) can now be written
+as::
+
+ >>> from codecs import encode, decode
+ >>> encode(b"hello", "hex")
+ b'68656c6c6f'
+ >>> decode(b"68656c6c6f", "hex")
+ b'hello'
+
+The binary and text transforms provided in the standard library are detailed
+in :ref:`binary-transforms` and :ref:`text-transforms`.
-(Contributed by Nick Coghlan in :issue:`17827`, :issue:`17828` and
-:issue:`19619`)
+(Contributed by Nick Coghlan in :issue:`7475`, , :issue:`17827`,
+:issue:`17828` and :issue:`19619`)
.. _pep-451:
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 235deb5c30..331095b1f1 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -33,9 +33,9 @@ aliases = {
'us' : 'ascii',
'us_ascii' : 'ascii',
- ## base64_codec codec
- #'base64' : 'base64_codec',
- #'base_64' : 'base64_codec',
+ # base64_codec codec
+ 'base64' : 'base64_codec',
+ 'base_64' : 'base64_codec',
# big5 codec
'big5_tw' : 'big5',
@@ -45,8 +45,8 @@ aliases = {
'big5_hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',
- ## bz2_codec codec
- #'bz2' : 'bz2_codec',
+ # bz2_codec codec
+ 'bz2' : 'bz2_codec',
# cp037 codec
'037' : 'cp037',
@@ -248,8 +248,8 @@ aliases = {
'cp936' : 'gbk',
'ms936' : 'gbk',
- ## hex_codec codec
- #'hex' : 'hex_codec',
+ # hex_codec codec
+ 'hex' : 'hex_codec',
# hp_roman8 codec
'roman8' : 'hp_roman8',
@@ -450,13 +450,13 @@ aliases = {
'cp154' : 'ptcp154',
'cyrillic_asian' : 'ptcp154',
- ## quopri_codec codec
- #'quopri' : 'quopri_codec',
- #'quoted_printable' : 'quopri_codec',
- #'quotedprintable' : 'quopri_codec',
+ # quopri_codec codec
+ 'quopri' : 'quopri_codec',
+ 'quoted_printable' : 'quopri_codec',
+ 'quotedprintable' : 'quopri_codec',
- ## rot_13 codec
- #'rot13' : 'rot_13',
+ # rot_13 codec
+ 'rot13' : 'rot_13',
# shift_jis codec
'csshiftjis' : 'shift_jis',
@@ -518,12 +518,12 @@ aliases = {
'utf8_ucs2' : 'utf_8',
'utf8_ucs4' : 'utf_8',
- ## uu_codec codec
- #'uu' : 'uu_codec',
+ # uu_codec codec
+ 'uu' : 'uu_codec',
- ## zlib_codec codec
- #'zip' : 'zlib_codec',
- #'zlib' : 'zlib_codec',
+ # zlib_codec codec
+ 'zip' : 'zlib_codec',
+ 'zlib' : 'zlib_codec',
# temporary mac CJK aliases, will be replaced by proper codecs in 3.1
'x_mac_japanese' : 'shift_jis',
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 506ba7dfbf..07a6a5e0be 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -2320,18 +2320,29 @@ bytes_transform_encodings = [
"quopri_codec",
"hex_codec",
]
+
+transform_aliases = {
+ "base64_codec": ["base64", "base_64"],
+ "uu_codec": ["uu"],
+ "quopri_codec": ["quopri", "quoted_printable", "quotedprintable"],
+ "hex_codec": ["hex"],
+ "rot_13": ["rot13"],
+}
+
try:
import zlib
except ImportError:
pass
else:
bytes_transform_encodings.append("zlib_codec")
+ transform_aliases["zlib_codec"] = ["zip", "zlib"]
try:
import bz2
except ImportError:
pass
else:
bytes_transform_encodings.append("bz2_codec")
+ transform_aliases["bz2_codec"] = ["bz2"]
class TransformCodecTest(unittest.TestCase):
@@ -2445,6 +2456,15 @@ class TransformCodecTest(unittest.TestCase):
# Unfortunately, the bz2 module throws OSError, which the codec
# machinery currently can't wrap :(
+ # Ensure codec aliases from http://bugs.python.org/issue7475 work
+ def test_aliases(self):
+ for codec_name, aliases in transform_aliases.items():
+ expected_name = codecs.lookup(codec_name).name
+ for alias in aliases:
+ with self.subTest(alias=alias):
+ info = codecs.lookup(alias)
+ self.assertEqual(info.name, expected_name)
+
# The codec system tries to wrap exceptions in order to ensure the error
# mentions the operation being performed and the codec involved. We