diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-05-24 22:29:13 +0200 |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-05-24 22:29:13 +0200 |
commit | 3c15827acb180e89136699d5503e72308cfa81f0 (patch) | |
tree | 671bc662ed8ec0c7410dca65cd4d0aa97d185ac5 | |
parent | f8e1b76aad8a8e396b07efccb1a7941abf6dba11 (diff) | |
download | cpython-3c15827acb180e89136699d5503e72308cfa81f0.tar.gz |
Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
their encode() method anymore, but continue to call the reset() method if the
final argument is True.
-rw-r--r-- | Lib/test/test_multibytecodec.py | 30 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 8 |
3 files changed, 38 insertions, 4 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 63c1e620b5..5e86ca2aae 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -237,6 +237,36 @@ class Test_ISO2022(unittest.TestCase): # Any ISO 2022 codec will cause the segfault myunichr(x).encode('iso_2022_jp', 'ignore') +class TestStateful(unittest.TestCase): + text = u'\u4E16\u4E16' + encoding = 'iso-2022-jp' + expected = b'\x1b$B@$@$' + expected_reset = b'\x1b$B@$@$\x1b(B' + + def test_encode(self): + self.assertEqual(self.text.encode(self.encoding), self.expected_reset) + + def test_incrementalencoder(self): + encoder = codecs.getincrementalencoder(self.encoding)() + output = b''.join( + encoder.encode(char) + for char in self.text) + self.assertEqual(output, self.expected) + + def test_incrementalencoder_final(self): + encoder = codecs.getincrementalencoder(self.encoding)() + last_index = len(self.text) - 1 + output = b''.join( + encoder.encode(char, index == last_index) + for index, char in enumerate(self.text)) + self.assertEqual(output, self.expected_reset) + +class TestHZStateful(TestStateful): + text = u'\u804a\u804a' + encoding = 'hz' + expected = b'~{ADAD' + expected_reset = b'~{ADAD~}' + def test_main(): test_support.run_unittest(__name__) @@ -83,6 +83,10 @@ Core and Builtins Library ------- +- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to + their encode() method anymore, but continue to call the reset() method if the + final argument is True. + - Issue #12124: zipimport doesn't keep a reference to zlib.decompress() anymore to be able to unload the module. diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 1f31595d3a..14fed3ef38 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -471,7 +471,7 @@ multibytecodec_encode(MultibyteCodec *codec, MultibyteEncodeBuffer buf; Py_ssize_t finalsize, r = 0; - if (datalen == 0) + if (datalen == 0 && !(flags & MBENC_RESET)) return PyString_FromString(""); buf.excobj = NULL; @@ -506,7 +506,7 @@ multibytecodec_encode(MultibyteCodec *codec, break; } - if (codec->encreset != NULL) + if (codec->encreset != NULL && (flags & MBENC_RESET)) for (;;) { Py_ssize_t outleft; @@ -776,8 +776,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, inbuf_end = inbuf + datalen; r = multibytecodec_encode(ctx->codec, &ctx->state, - (const Py_UNICODE **)&inbuf, - datalen, ctx->errors, final ? MBENC_FLUSH : 0); + (const Py_UNICODE **)&inbuf, datalen, + ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); if (r == NULL) { /* recover the original pending buffer */ if (origpending > 0) |