2 files changed, 27 insertions, 0 deletions
diff --git a/tests/test_unicode.py b/tests/test_unicode.py
index 483a956..c4bc394 100644
--- a/tests/test_unicode.py
+++ b/tests/test_unicode.py
@@ -123,6 +123,18 @@ class UnicodeTests(PexpectTestCase.PexpectTestCase):
         p.sendeof()
         p.expect(pexpect.EOF)
 
+    def test_spawn_utf8_incomplete(self):
+        # This test case ensures correct incremental decoding, as the first
+        # beta release decoded bytes staticly:
+        #      return codecs.utf_8_decode(input, errors, True)
+        # which fails when the stream read does not align exactly at a utf-8
+        # multibyte boundry:
+        #    UnicodeDecodeError: 'utf8' codec can't decode byte 0xe2 in
+        #                        position 0: unexpected end of data
+        p = pexpect.spawnu('./utf8-err.py')
+        p.expect(u'▁▂▃▄▅▆▇█')
+
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/tests/utf8-err.py b/tests/utf8-err.py
new file mode 100755
index 0000000..7b802c2
--- /dev/null
+++ b/tests/utf8-err.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# This program is completely unaware of encodings, it just writes raw bytes,
+# which happens to be utf-8 for ascending ansi-art-like blocks. However, only
+# a single byte of multibyte utf-8 sequences are yielded each fraction of a
+# second, testing Issue #9, which will yield a UnicodeDecodeError for
+# incompletely decoded bytes, even though it looks fine in say, a Terminal
+# (unless you interrupt the output bytes by holding down the spacebar!)
+import sys, time
+utf8_blurb = ('\xe2\x96\x81\xe2\x96\x82\xe2\x96\x83\xe2\x96\x84'
+              '\xe2\x96\x85\xe2\x96\x86\xe2\x96\x87\xe2\x96\x88')
+for ch in utf8_blurb:
+    sys.stderr.write(ch)
+    sys.stderr.flush()
+    time.sleep(0.1)
+sys.stderr.write('\n')