summaryrefslogtreecommitdiff
path: root/Lib/test/test_codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r--Lib/test/test_codecs.py86
1 files changed, 64 insertions, 22 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 3dd56820cd..328a47b2e3 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1641,6 +1641,18 @@ class CodecsModuleTest(unittest.TestCase):
self.assertRaises(TypeError, codecs.register)
self.assertRaises(TypeError, codecs.register, 42)
+ def test_unregister(self):
+ name = "nonexistent_codec_name"
+ search_function = mock.Mock()
+ codecs.register(search_function)
+ self.assertRaises(TypeError, codecs.lookup, name)
+ search_function.assert_called_with(name)
+ search_function.reset_mock()
+
+ codecs.unregister(search_function)
+ self.assertRaises(LookupError, codecs.lookup, name)
+ search_function.assert_not_called()
+
def test_lookup(self):
self.assertRaises(TypeError, codecs.lookup)
self.assertRaises(LookupError, codecs.lookup, "__spam__")
@@ -2185,6 +2197,18 @@ class CharmapTest(unittest.TestCase):
("", len(allbytes))
)
+ self.assertRaisesRegex(TypeError,
+ "character mapping must be in range\\(0x110000\\)",
+ codecs.charmap_decode,
+ b"\x00\x01\x02", "strict", {0: "A", 1: 'Bb', 2: -2}
+ )
+
+ self.assertRaisesRegex(TypeError,
+ "character mapping must be in range\\(0x110000\\)",
+ codecs.charmap_decode,
+ b"\x00\x01\x02", "strict", {0: "A", 1: 'Bb', 2: 999999999}
+ )
+
def test_decode_with_int2int_map(self):
a = ord('a')
b = ord('b')
@@ -2742,29 +2766,14 @@ _TEST_CODECS = {}
def _get_test_codec(codec_name):
return _TEST_CODECS.get(codec_name)
-codecs.register(_get_test_codec) # Returns None, not usable as a decorator
-
-try:
- # Issue #22166: Also need to clear the internal cache in CPython
- from _codecs import _forget_codec
-except ImportError:
- def _forget_codec(codec_name):
- pass
class ExceptionChainingTest(unittest.TestCase):
def setUp(self):
- # There's no way to unregister a codec search function, so we just
- # ensure we render this one fairly harmless after the test
- # case finishes by using the test case repr as the codec name
- # The codecs module normalizes codec names, although this doesn't
- # appear to be formally documented...
- # We also make sure we use a truly unique id for the custom codec
- # to avoid issues with the codec cache when running these tests
- # multiple times (e.g. when hunting for refleaks)
- unique_id = repr(self) + str(id(self))
- self.codec_name = encodings.normalize_encoding(unique_id).lower()
+ self.codec_name = 'exception_chaining_test'
+ codecs.register(_get_test_codec)
+ self.addCleanup(codecs.unregister, _get_test_codec)
# We store the object to raise on the instance because of a bad
# interaction between the codec caching (which means we can't
@@ -2779,10 +2788,6 @@ class ExceptionChainingTest(unittest.TestCase):
_TEST_CODECS.pop(self.codec_name, None)
# Issue #22166: Also pop from caches to avoid appearance of ref leaks
encodings._cache.pop(self.codec_name, None)
- try:
- _forget_codec(self.codec_name)
- except KeyError:
- pass
def set_codec(self, encode, decode):
codec_info = codecs.CodecInfo(encode, decode,
@@ -3403,5 +3408,42 @@ class Rot13UtilTest(unittest.TestCase):
'To be, or not to be, that is the question')
+class CodecNameNormalizationTest(unittest.TestCase):
+ """Test codec name normalization"""
+ def test_codecs_lookup(self):
+ FOUND = (1, 2, 3, 4)
+ NOT_FOUND = (None, None, None, None)
+ def search_function(encoding):
+ if encoding == "aaa_8":
+ return FOUND
+ else:
+ return NOT_FOUND
+
+ codecs.register(search_function)
+ self.addCleanup(codecs.unregister, search_function)
+ self.assertEqual(FOUND, codecs.lookup('aaa_8'))
+ self.assertEqual(FOUND, codecs.lookup('AAA-8'))
+ self.assertEqual(FOUND, codecs.lookup('AAA---8'))
+ self.assertEqual(FOUND, codecs.lookup('AAA 8'))
+ self.assertEqual(FOUND, codecs.lookup('aaa\xe9\u20ac-8'))
+ self.assertEqual(NOT_FOUND, codecs.lookup('AAA.8'))
+ self.assertEqual(NOT_FOUND, codecs.lookup('AAA...8'))
+ self.assertEqual(NOT_FOUND, codecs.lookup('BBB-8'))
+ self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
+ self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
+
+ def test_encodings_normalize_encoding(self):
+ # encodings.normalize_encoding() ignores non-ASCII characters.
+ normalize = encodings.normalize_encoding
+ self.assertEqual(normalize('utf_8'), 'utf_8')
+ self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+ self.assertEqual(normalize('utf 8'), 'utf_8')
+ # encodings.normalize_encoding() doesn't convert
+ # characters to lower case.
+ self.assertEqual(normalize('UTF 8'), 'UTF_8')
+ self.assertEqual(normalize('utf.8'), 'utf.8')
+ self.assertEqual(normalize('utf...8'), 'utf...8')
+
+
if __name__ == "__main__":
unittest.main()