Revert "bpo-41919, test_codecs: Move codecs.register calls to setUp() (GH-22513)"revert-22513-update_test_codecs

This reverts commit c9f696cb96d1c362d5cad871f61da520572d9b08.
author: Pablo Galindo <Pablogsal@gmail.com> 2020-10-25 06:06:13 +0000
committer: GitHub <noreply@github.com> 2020-10-25 06:06:13 +0000
commit: b1c9a20f5d8961a5b1555cf68eb38afafb39e1d7 (patch)
tree: e5c3a1f5cddec8c49c94647d300a7ca2e76f191a
parent: 4a9c6379380defd37b5483607d0d804db18f7812 (diff)
download: cpython-git-revert-22513-update_test_codecs.tar.gz
7 files changed, 112 insertions, 16 deletions
diff --git a/Lib/test/test_charmapcodec.py b/Lib/test/test_charmapcodec.py
index 3f628902a1..0d4594d8c0 100644
--- a/Lib/test/test_charmapcodec.py
+++ b/Lib/test/test_charmapcodec.py
@@ -20,15 +20,12 @@ def codec_search_function(encoding):
         return tuple(testcodec.getregentry())
     return None
 
+codecs.register(codec_search_function)
+
 # test codec's name (see test/testcodec.py)
 codecname = 'testcodec'
 
 class CharmapCodecTest(unittest.TestCase):
-
-    def setUp(self):
-        codecs.register(codec_search_function)
-        self.addCleanup(codecs.unregister, codec_search_function)
-
     def test_constructorx(self):
         self.assertEqual(str(b'abc', codecname), 'abc')
         self.assertEqual(str(b'xdef', codecname), 'abcdef')
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 328a47b2e3..e99914f8ff 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -2766,14 +2766,29 @@ _TEST_CODECS = {}
 
 def _get_test_codec(codec_name):
     return _TEST_CODECS.get(codec_name)
+codecs.register(_get_test_codec) # Returns None, not usable as a decorator
+
+try:
+    # Issue #22166: Also need to clear the internal cache in CPython
+    from _codecs import _forget_codec
+except ImportError:
+    def _forget_codec(codec_name):
+        pass
 
 
 class ExceptionChainingTest(unittest.TestCase):
 
     def setUp(self):
-        self.codec_name = 'exception_chaining_test'
-        codecs.register(_get_test_codec)
-        self.addCleanup(codecs.unregister, _get_test_codec)
+        # There's no way to unregister a codec search function, so we just
+        # ensure we render this one fairly harmless after the test
+        # case finishes by using the test case repr as the codec name
+        # The codecs module normalizes codec names, although this doesn't
+        # appear to be formally documented...
+        # We also make sure we use a truly unique id for the custom codec
+        # to avoid issues with the codec cache when running these tests
+        # multiple times (e.g. when hunting for refleaks)
+        unique_id = repr(self) + str(id(self))
+        self.codec_name = encodings.normalize_encoding(unique_id).lower()
 
         # We store the object to raise on the instance because of a bad
         # interaction between the codec caching (which means we can't
@@ -2788,6 +2803,10 @@ class ExceptionChainingTest(unittest.TestCase):
         _TEST_CODECS.pop(self.codec_name, None)
         # Issue #22166: Also pop from caches to avoid appearance of ref leaks
         encodings._cache.pop(self.codec_name, None)
+        try:
+            _forget_codec(self.codec_name)
+        except KeyError:
+            pass
 
     def set_codec(self, encode, decode):
         codec_info = codecs.CodecInfo(encode, decode,
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index fbaea3aaec..85fac30e30 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2529,6 +2529,10 @@ class StatefulIncrementalDecoder(codecs.IncrementalDecoder):
                 streamreader=None, streamwriter=None,
                 incrementaldecoder=cls)
 
+# Register the previous decoder for testing.
+# Disabled by default, tests will enable it.
+codecs.register(StatefulIncrementalDecoder.lookupTestDecoder)
+
 
 class StatefulIncrementalDecoderTest(unittest.TestCase):
     """
@@ -2579,9 +2583,6 @@ class TextIOWrapperTest(unittest.TestCase):
         self.testdata = b"AAA\r\nBBB\rCCC\r\nDDD\nEEE\r\n"
         self.normalized = b"AAA\nBBB\nCCC\nDDD\nEEE\n".decode("ascii")
         os_helper.unlink(os_helper.TESTFN)
-        codecs.register(StatefulIncrementalDecoder.lookupTestDecoder)
-        self.addCleanup(codecs.unregister,
-                        StatefulIncrementalDecoder.lookupTestDecoder)
 
     def tearDown(self):
         os_helper.unlink(os_helper.TESTFN)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 90b0965582..d485bc7ede 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -36,6 +36,7 @@ def search_function(encoding):
         return (encode2, decode2, None, None)
     else:
         return None
+codecs.register(search_function)
 
 def duplicate_string(text):
     """
@@ -57,10 +58,6 @@ class UnicodeTest(string_tests.CommonTest,
 
     type2test = str
 
-    def setUp(self):
-        codecs.register(search_function)
-        self.addCleanup(codecs.unregister, search_function)
-
     def checkequalnofix(self, result, object, methodname, *args):
         method = getattr(object, methodname)
         realresult = method(*args)
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 2e8cb97fe7..08a3d4ab02 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -160,6 +160,25 @@ _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
 
 /* --- Helpers ------------------------------------------------------------ */
 
+/*[clinic input]
+_codecs._forget_codec
+
+    encoding: str
+    /
+
+Purge the named codec from the internal codec lookup cache
+[clinic start generated code]*/
+
+static PyObject *
+_codecs__forget_codec_impl(PyObject *module, const char *encoding)
+/*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
+{
+    if (_PyCodec_Forget(encoding) < 0) {
+        return NULL;
+    };
+    Py_RETURN_NONE;
+}
+
 static
 PyObject *codec_tuple(PyObject *decoded,
                       Py_ssize_t len)
@@ -1038,6 +1057,7 @@ static PyMethodDef _codecs_functions[] = {
     _CODECS_CODE_PAGE_DECODE_METHODDEF
     _CODECS_REGISTER_ERROR_METHODDEF
     _CODECS_LOOKUP_ERROR_METHODDEF
+    _CODECS__FORGET_CODEC_METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index 43378f94f9..e2ebb68612 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -217,6 +217,43 @@ exit:
     return return_value;
 }
 
+PyDoc_STRVAR(_codecs__forget_codec__doc__,
+"_forget_codec($module, encoding, /)\n"
+"--\n"
+"\n"
+"Purge the named codec from the internal codec lookup cache");
+
+#define _CODECS__FORGET_CODEC_METHODDEF    \
+    {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_O, _codecs__forget_codec__doc__},
+
+static PyObject *
+_codecs__forget_codec_impl(PyObject *module, const char *encoding);
+
+static PyObject *
+_codecs__forget_codec(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    const char *encoding;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("_forget_codec", "argument", "str", arg);
+        goto exit;
+    }
+    Py_ssize_t encoding_length;
+    encoding = PyUnicode_AsUTF8AndSize(arg, &encoding_length);
+    if (encoding == NULL) {
+        goto exit;
+    }
+    if (strlen(encoding) != (size_t)encoding_length) {
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        goto exit;
+    }
+    return_value = _codecs__forget_codec_impl(module, encoding);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_codecs_escape_decode__doc__,
 "escape_decode($module, data, errors=None, /)\n"
 "--\n"
@@ -2801,4 +2838,4 @@ exit:
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=557c3b37e4c492ac input=a9049054013a1b77]*/
+/*[clinic end generated code: output=9a97e2ddf3e69072 input=a9049054013a1b77]*/
diff --git a/Python/codecs.c b/Python/codecs.c
index ade1418720..a8233a73c4 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -208,6 +208,31 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     return NULL;
 }
 
+int _PyCodec_Forget(const char *encoding)
+{
+    PyObject *v;
+    int result;
+
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    if (interp->codec_search_path == NULL) {
+        return -1;
+    }
+
+    /* Convert the encoding to a normalized Python string: all
+       characters are converted to lower case, spaces and hyphens are
+       replaced with underscores. */
+    v = normalizestring(encoding);
+    if (v == NULL) {
+        return -1;
+    }
+
+    /* Drop the named codec from the internal cache */
+    result = PyDict_DelItem(interp->codec_search_cache, v);
+    Py_DECREF(v);
+
+    return result;
+}
+
 /* Codec registry encoding check API. */
 
 int PyCodec_KnownEncoding(const char *encoding)
author	Pablo Galindo <Pablogsal@gmail.com>	2020-10-25 06:06:13 +0000
committer	GitHub <noreply@github.com>	2020-10-25 06:06:13 +0000
commit	b1c9a20f5d8961a5b1555cf68eb38afafb39e1d7 (patch)
tree	e5c3a1f5cddec8c49c94647d300a7ca2e76f191a
parent	4a9c6379380defd37b5483607d0d804db18f7812 (diff)
download	cpython-git-revert-22513-update_test_codecs.tar.gz