diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-08 01:45:13 +0200 |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-08 01:45:13 +0200 |
commit | 2cded9c3f31d2fea4b033f44eaa828e508f03391 (patch) | |
tree | 1554d9f0baa575b7ae791ff1267c4e493a1b36bf /Modules/cjkcodecs/_codecs_cn.c | |
parent | 081fe46ff96bccb1a256c356443b625b467814c8 (diff) | |
download | cpython-git-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.gz |
Issue #12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence.
For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of
'\ufffd'.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_cn.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_cn.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index ab4e659332..9e9e96c4d1 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -85,7 +85,7 @@ DECODER(gb2312) TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -141,7 +141,7 @@ DECODER(gbk) REQUIRE_INBUF(2) GBK_DECODE(c, IN2, **outbuf) - else return 2; + else return 1; NEXT(2, 1) } @@ -267,7 +267,7 @@ DECODER(gb18030) c3 = IN3; c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; + return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; @@ -292,12 +292,12 @@ DECODER(gb18030) continue; } } - return 4; + return 1; } GBK_DECODE(c, c2, **outbuf) else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; + else return 1; NEXT(2, 1) } @@ -400,7 +400,7 @@ DECODER(hz) else if (c2 == '\n') ; /* line-continuation */ else - return 2; + return 1; NEXT(2, 0); continue; } @@ -419,7 +419,7 @@ DECODER(hz) NEXT(2, 1) } else - return 2; + return 1; } } |