diff options
-rw-r--r-- | tests/test_unicode.py | 12 | ||||
-rwxr-xr-x | tests/utf8-err.py | 15 |
2 files changed, 27 insertions, 0 deletions
diff --git a/tests/test_unicode.py b/tests/test_unicode.py index 483a956..c4bc394 100644 --- a/tests/test_unicode.py +++ b/tests/test_unicode.py @@ -123,6 +123,18 @@ class UnicodeTests(PexpectTestCase.PexpectTestCase): p.sendeof() p.expect(pexpect.EOF) + def test_spawn_utf8_incomplete(self): + # This test case ensures correct incremental decoding, as the first + # beta release decoded bytes staticly: + # return codecs.utf_8_decode(input, errors, True) + # which fails when the stream read does not align exactly at a utf-8 + # multibyte boundry: + # UnicodeDecodeError: 'utf8' codec can't decode byte 0xe2 in + # position 0: unexpected end of data + p = pexpect.spawnu('./utf8-err.py') + p.expect(u'▁▂▃▄▅▆▇█') + + if __name__ == '__main__': unittest.main() diff --git a/tests/utf8-err.py b/tests/utf8-err.py new file mode 100755 index 0000000..7b802c2 --- /dev/null +++ b/tests/utf8-err.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# This program is completely unaware of encodings, it just writes raw bytes, +# which happens to be utf-8 for ascending ansi-art-like blocks. However, only +# a single byte of multibyte utf-8 sequences are yielded each fraction of a +# second, testing Issue #9, which will yield a UnicodeDecodeError for +# incompletely decoded bytes, even though it looks fine in say, a Terminal +# (unless you interrupt the output bytes by holding down the spacebar!) +import sys, time +utf8_blurb = ('\xe2\x96\x81\xe2\x96\x82\xe2\x96\x83\xe2\x96\x84' + '\xe2\x96\x85\xe2\x96\x86\xe2\x96\x87\xe2\x96\x88') +for ch in utf8_blurb: + sys.stderr.write(ch) + sys.stderr.flush() + time.sleep(0.1) +sys.stderr.write('\n') |