Issue #19619: Blacklist non-text codecs in method API

str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings.
author: Nick Coghlan <ncoghlan@gmail.com> 2013-11-22 22:39:36 +1000
committer: Nick Coghlan <ncoghlan@gmail.com> 2013-11-22 22:39:36 +1000
commit: c72e4e6dccce99bcdcb45959767436d7e5cfda8c (patch)
tree: 029832d80cc82a039dc1014302c9eb9dd2214543 /Lib/codecs.py
parent: 322f5ba0d8d5e8a9cd2a134fa215884b4cbc373d (diff)
download: cpython-git-c72e4e6dccce99bcdcb45959767436d7e5cfda8c.tar.gz
1 files changed, 13 insertions, 1 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 6a6eb900c7..2e2e7555a4 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
 ### Codec base classes (defining the API)
 
 class CodecInfo(tuple):
+    """Codec details when looking up the codec registry"""
+
+    # Private API to allow Python 3.4 to blacklist the known non-Unicode
+    # codecs in the standard library. A more general mechanism to
+    # reliably distinguish test encodings from other codecs will hopefully
+    # be defined for Python 3.5
+    #
+    # See http://bugs.python.org/issue19619
+    _is_text_encoding = True # Assume codecs are text encodings by default
 
     def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
-        incrementalencoder=None, incrementaldecoder=None, name=None):
+        incrementalencoder=None, incrementaldecoder=None, name=None,
+        *, _is_text_encoding=None):
         self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
         self.name = name
         self.encode = encode
@@ -84,6 +94,8 @@ class CodecInfo(tuple):
         self.incrementaldecoder = incrementaldecoder
         self.streamwriter = streamwriter
         self.streamreader = streamreader
+        if _is_text_encoding is not None:
+            self._is_text_encoding = _is_text_encoding
         return self
 
     def __repr__(self):
author	Nick Coghlan <ncoghlan@gmail.com>	2013-11-22 22:39:36 +1000
committer	Nick Coghlan <ncoghlan@gmail.com>	2013-11-22 22:39:36 +1000
commit	c72e4e6dccce99bcdcb45959767436d7e5cfda8c (patch)
tree	029832d80cc82a039dc1014302c9eb9dd2214543 /Lib/codecs.py
parent	322f5ba0d8d5e8a9cd2a134fa215884b4cbc373d (diff)
download	cpython-git-c72e4e6dccce99bcdcb45959767436d7e5cfda8c.tar.gz