Merge pull request #99 from takluyver/issue-84

Unicode support for screen and ANSI
author: Jeff Quast <contact@jeffquast.com> 2014-08-25 00:21:27 -0700
committer: Jeff Quast <contact@jeffquast.com> 2014-08-25 00:21:27 -0700
commit: 9057873f9b73d216149b78c922be19fac8e3a44c (patch)
tree: 8fa9059cb1530b725726fc109111f9ebf9c77a4c /tests
parent: 8d96042177a6986ae5b117e31916638309b2fd03 (diff)
parent: d42a81179666a924b406c67d73a9472f088f8ada (diff)
download: pexpect-9057873f9b73d216149b78c922be19fac8e3a44c.tar.gz
2 files changed, 169 insertions, 14 deletions
diff --git a/tests/test_ansi.py b/tests/test_ansi.py
index 516509c..a9d445e 100755
--- a/tests/test_ansi.py
+++ b/tests/test_ansi.py
@@ -21,6 +21,9 @@ PEXPECT LICENSE
 from pexpect import ANSI
 import unittest
 from . import PexpectTestCase
+import sys
+
+PY3 = (sys.version_info[0] >= 3)
 
 write_target = 'I\'ve got a ferret sticking up my nose.                           \n' +\
 '(He\'s got a ferret sticking up his nose.)                        \n' +\
@@ -162,6 +165,62 @@ class ansiTestCase (PexpectTestCase.PexpectTestCase):
         assert str(s) == ('test                ')
         assert s.state.memory == [s]
 
+    def test_utf8_bytes(self):
+        """Test that when bytes are passed in containing UTF-8 encoded
+        characters, where the encoding of each character consists of
+        multiple bytes, the characters are correctly decoded.
+        Incremental decoding is also tested."""
+        s = ANSI.ANSI(2, 10, encoding='utf-8')
+        # This is the UTF-8 encoding of the UCS character "HOURGLASS"
+        # followed by the UTF-8 encoding of the UCS character
+        # "KEYBOARD".  These characters can't be encoded in cp437 or
+        # latin-1.  The "KEYBOARD" character is split into two
+        # separate writes.
+        s.write(b'\xe2\x8c\x9b')
+        s.write(b'\xe2\x8c')
+        s.write(b'\xa8')
+        if PY3:
+            assert str(s) == u'\u231b\u2328        \n          '
+        else:
+            assert unicode(s) == u'\u231b\u2328        \n          '
+            assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8        \n          '
+        assert s.dump() == u'\u231b\u2328                  '
+        assert s.pretty() == u'+----------+\n|\u231b\u2328        |\n|          |\n+----------+\n'
+        assert s.get_abs(1, 1) == u'\u231b'
+        assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328   ']
+
+    def test_unicode(self):
+        """Test passing in of a unicode string."""
+        s = ANSI.ANSI(2, 10, encoding="utf-8")
+        s.write(u'\u231b\u2328')
+        if PY3:
+            assert str(s) == u'\u231b\u2328        \n          '
+        else:
+            assert unicode(s) == u'\u231b\u2328        \n          '
+            assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8        \n          '
+        assert s.dump() == u'\u231b\u2328                  '
+        assert s.pretty() == u'+----------+\n|\u231b\u2328        |\n|          |\n+----------+\n'
+        assert s.get_abs(1, 1) == u'\u231b'
+        assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328   ']
+
+    def test_decode_error(self):
+        """Test that default handling of decode errors replaces the
+        invalid characters."""
+        s = ANSI.ANSI(2, 10, encoding="ascii")
+        s.write(b'\xff') # a non-ASCII character
+        # In unicode, the non-ASCII character is replaced with
+        # REPLACEMENT CHARACTER.
+        if PY3:
+            assert str(s) == u'\ufffd         \n          '
+        else:
+            assert unicode(s) == u'\ufffd         \n          '
+            assert str(s) == b'?         \n          '
+        assert s.dump() == u'\ufffd                   '
+        assert s.pretty() == u'+----------+\n|\ufffd         |\n|          |\n+----------+\n'
+        assert s.get_abs(1, 1) == u'\ufffd'
+        assert s.get_region(1, 1, 1, 5) == [u'\ufffd    ']
+
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/tests/test_screen.py b/tests/test_screen.py
index 3f0736b..2429e57 100755
--- a/tests/test_screen.py
+++ b/tests/test_screen.py
@@ -19,10 +19,14 @@ PEXPECT LICENSE
 
 '''
 
+import sys
+
 from pexpect import screen
 import unittest
 from . import PexpectTestCase
 
+PY3 = (sys.version_info[0] >= 3)
+
 fill1_target='XXXXXXXXXX\n' + \
 'XOOOOOOOOX\n' + \
 'XO::::::OX\n' + \
@@ -76,6 +80,17 @@ insert_target = 'ZXZZZZZZXZ\n' +\
 'ZZ/2.4.6ZZ'
 get_region_target = ['......', '.\\/...', './\\...', '......']
 
+unicode_box_unicode_result = u'\u2554\u2557\n\u255A\u255D'
+unicode_box_pretty_result = u'''\
++--+
+|\u2554\u2557|
+|\u255A\u255D|
++--+
+'''
+unicode_box_ascii_bytes_result = b'??\n??'
+unicode_box_cp437_bytes_result = b'\xc9\xbb\n\xc8\xbc'
+unicode_box_utf8_bytes_result = b'\xe2\x95\x94\xe2\x95\x97\n\xe2\x95\x9a\xe2\x95\x9d'
+
 class screenTestCase (PexpectTestCase.PexpectTestCase):
     def make_screen_with_put (self):
         s = screen.screen(10,10)
@@ -168,20 +183,101 @@ class screenTestCase (PexpectTestCase.PexpectTestCase):
         s.insert_abs (10,9,'Z')
         s.insert_abs (10,9,'Z')
         assert str(s) == insert_target
- #   def test_write (self):
- #       s = screen.screen (6,65)
- #       s.fill('.')
- #       s.cursor_home()
- #       for c in write_text:
- #           s.write (c)
- #       print str(s)
- #       assert str(s) == write_target
- #   def test_tetris (self):
- #       s = screen.screen (24,80)
- #       tetris_text = open ('tetris.data').read()
- #       for c in tetris_text:
- #           s.write (c)
- #       assert str(s) == tetris_target
+
+    def make_screen_with_box_unicode(self, *args, **kwargs):
+        '''Creates a screen containing a box drawn using double-line
+        line drawing characters. The characters are fed in as
+        unicode. '''
+        s = screen.screen (2,2,*args,**kwargs)
+        s.put_abs (1,1,u'\u2554')
+        s.put_abs (1,2,u'\u2557')
+        s.put_abs (2,1,u'\u255A')
+        s.put_abs (2,2,u'\u255D')
+        return s
+
+    def make_screen_with_box_cp437(self, *args, **kwargs):
+        '''Creates a screen containing a box drawn using double-line
+        line drawing characters. The characters are fed in as
+        CP437. '''
+        s = screen.screen (2,2,*args,**kwargs)
+        s.put_abs (1,1,b'\xc9')
+        s.put_abs (1,2,b'\xbb')
+        s.put_abs (2,1,b'\xc8')
+        s.put_abs (2,2,b'\xbc')
+        return s
+
+    def make_screen_with_box_utf8(self, *args, **kwargs):
+        '''Creates a screen containing a box drawn using double-line
+        line drawing characters. The characters are fed in as
+        UTF-8. '''
+        s = screen.screen (2,2,*args,**kwargs)
+        s.put_abs (1,1,b'\xe2\x95\x94')
+        s.put_abs (1,2,b'\xe2\x95\x97')
+        s.put_abs (2,1,b'\xe2\x95\x9a')
+        s.put_abs (2,2,b'\xe2\x95\x9d')
+        return s
+
+    def test_unicode_ascii (self):
+        # With the default encoding set to ASCII, we should still be
+        # able to feed in unicode strings and get them back out:
+        s = self.make_screen_with_box_unicode('ascii')
+        if PY3:
+            assert str(s) == unicode_box_unicode_result
+        else:
+            assert unicode(s) == unicode_box_unicode_result
+            # And we should still get something for Python 2 str(), though
+            # it might not be very useful
+            str(s)
+
+        assert s.pretty() == unicode_box_pretty_result
+
+    def test_decoding_errors(self):
+        # With strict error handling, it should reject bytes it can't decode
+        with self.assertRaises(UnicodeDecodeError):
+            self.make_screen_with_box_cp437('ascii', 'strict')
+
+        # replace should turn them into unicode replacement characters, U+FFFD
+        s = self.make_screen_with_box_cp437('ascii', 'replace')
+        expected = u'\ufffd\ufffd\n\ufffd\ufffd'
+        if PY3:
+            assert str(s) == expected
+        else:
+            assert unicode(s) == expected
+
+    def test_unicode_cp437 (self):
+        # Verify decoding from and re-encoding to CP437.
+        s = self.make_screen_with_box_cp437('cp437','strict')
+        if PY3:
+            assert str(s) == unicode_box_unicode_result
+        else:
+            assert unicode(s) == unicode_box_unicode_result
+            assert str(s) == unicode_box_cp437_bytes_result
+        assert s.pretty() == unicode_box_pretty_result
+
+    def test_unicode_utf8 (self):
+        # Verify decoding from and re-encoding to UTF-8.
+        s = self.make_screen_with_box_utf8('utf-8','strict')
+        if PY3:
+            assert str(s) == unicode_box_unicode_result
+        else:
+            assert unicode(s) == unicode_box_unicode_result
+            assert str(s) == unicode_box_utf8_bytes_result
+        assert s.pretty() == unicode_box_pretty_result
+
+    def test_no_bytes(self):
+        s = screen.screen(2, 2, encoding=None)
+        s.put_abs(1, 1, u'A')
+        s.put_abs(2, 2, u'D')
+
+        with self.assertRaises(TypeError):
+            s.put_abs(1, 2, b'B')
+
+        if PY3:
+            assert str(s) == u'A \n D'
+        else:
+            assert unicode(s) == u'A \n D'
+            # This will still work if it's limited to ascii
+            assert str(s) == b'A \n D'
 
 if __name__ == '__main__':
     unittest.main()
author	Jeff Quast <contact@jeffquast.com>	2014-08-25 00:21:27 -0700
committer	Jeff Quast <contact@jeffquast.com>	2014-08-25 00:21:27 -0700
commit	9057873f9b73d216149b78c922be19fac8e3a44c (patch)
tree	8fa9059cb1530b725726fc109111f9ebf9c77a4c /tests
parent	8d96042177a6986ae5b117e31916638309b2fd03 (diff)
parent	d42a81179666a924b406c67d73a9472f088f8ada (diff)
download	pexpect-9057873f9b73d216149b78c922be19fac8e3a44c.tar.gz