diff options
author | Jeff Quast <contact@jeffquast.com> | 2014-08-25 00:21:27 -0700 |
---|---|---|
committer | Jeff Quast <contact@jeffquast.com> | 2014-08-25 00:21:27 -0700 |
commit | 9057873f9b73d216149b78c922be19fac8e3a44c (patch) | |
tree | 8fa9059cb1530b725726fc109111f9ebf9c77a4c /tests | |
parent | 8d96042177a6986ae5b117e31916638309b2fd03 (diff) | |
parent | d42a81179666a924b406c67d73a9472f088f8ada (diff) | |
download | pexpect-9057873f9b73d216149b78c922be19fac8e3a44c.tar.gz |
Merge pull request #99 from takluyver/issue-84
Unicode support for screen and ANSI
Diffstat (limited to 'tests')
-rwxr-xr-x | tests/test_ansi.py | 59 | ||||
-rwxr-xr-x | tests/test_screen.py | 124 |
2 files changed, 169 insertions, 14 deletions
diff --git a/tests/test_ansi.py b/tests/test_ansi.py index 516509c..a9d445e 100755 --- a/tests/test_ansi.py +++ b/tests/test_ansi.py @@ -21,6 +21,9 @@ PEXPECT LICENSE from pexpect import ANSI import unittest from . import PexpectTestCase +import sys + +PY3 = (sys.version_info[0] >= 3) write_target = 'I\'ve got a ferret sticking up my nose. \n' +\ '(He\'s got a ferret sticking up his nose.) \n' +\ @@ -162,6 +165,62 @@ class ansiTestCase (PexpectTestCase.PexpectTestCase): assert str(s) == ('test ') assert s.state.memory == [s] + def test_utf8_bytes(self): + """Test that when bytes are passed in containing UTF-8 encoded + characters, where the encoding of each character consists of + multiple bytes, the characters are correctly decoded. + Incremental decoding is also tested.""" + s = ANSI.ANSI(2, 10, encoding='utf-8') + # This is the UTF-8 encoding of the UCS character "HOURGLASS" + # followed by the UTF-8 encoding of the UCS character + # "KEYBOARD". These characters can't be encoded in cp437 or + # latin-1. The "KEYBOARD" character is split into two + # separate writes. + s.write(b'\xe2\x8c\x9b') + s.write(b'\xe2\x8c') + s.write(b'\xa8') + if PY3: + assert str(s) == u'\u231b\u2328 \n ' + else: + assert unicode(s) == u'\u231b\u2328 \n ' + assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n ' + assert s.dump() == u'\u231b\u2328 ' + assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\u231b' + assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 '] + + def test_unicode(self): + """Test passing in of a unicode string.""" + s = ANSI.ANSI(2, 10, encoding="utf-8") + s.write(u'\u231b\u2328') + if PY3: + assert str(s) == u'\u231b\u2328 \n ' + else: + assert unicode(s) == u'\u231b\u2328 \n ' + assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n ' + assert s.dump() == u'\u231b\u2328 ' + assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\u231b' + assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 '] + + def test_decode_error(self): + """Test that default handling of decode errors replaces the + invalid characters.""" + s = ANSI.ANSI(2, 10, encoding="ascii") + s.write(b'\xff') # a non-ASCII character + # In unicode, the non-ASCII character is replaced with + # REPLACEMENT CHARACTER. + if PY3: + assert str(s) == u'\ufffd \n ' + else: + assert unicode(s) == u'\ufffd \n ' + assert str(s) == b'? \n ' + assert s.dump() == u'\ufffd ' + assert s.pretty() == u'+----------+\n|\ufffd |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\ufffd' + assert s.get_region(1, 1, 1, 5) == [u'\ufffd '] + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_screen.py b/tests/test_screen.py index 3f0736b..2429e57 100755 --- a/tests/test_screen.py +++ b/tests/test_screen.py @@ -19,10 +19,14 @@ PEXPECT LICENSE ''' +import sys + from pexpect import screen import unittest from . import PexpectTestCase +PY3 = (sys.version_info[0] >= 3) + fill1_target='XXXXXXXXXX\n' + \ 'XOOOOOOOOX\n' + \ 'XO::::::OX\n' + \ @@ -76,6 +80,17 @@ insert_target = 'ZXZZZZZZXZ\n' +\ 'ZZ/2.4.6ZZ' get_region_target = ['......', '.\\/...', './\\...', '......'] +unicode_box_unicode_result = u'\u2554\u2557\n\u255A\u255D' +unicode_box_pretty_result = u'''\ ++--+ +|\u2554\u2557| +|\u255A\u255D| ++--+ +''' +unicode_box_ascii_bytes_result = b'??\n??' +unicode_box_cp437_bytes_result = b'\xc9\xbb\n\xc8\xbc' +unicode_box_utf8_bytes_result = b'\xe2\x95\x94\xe2\x95\x97\n\xe2\x95\x9a\xe2\x95\x9d' + class screenTestCase (PexpectTestCase.PexpectTestCase): def make_screen_with_put (self): s = screen.screen(10,10) @@ -168,20 +183,101 @@ class screenTestCase (PexpectTestCase.PexpectTestCase): s.insert_abs (10,9,'Z') s.insert_abs (10,9,'Z') assert str(s) == insert_target - # def test_write (self): - # s = screen.screen (6,65) - # s.fill('.') - # s.cursor_home() - # for c in write_text: - # s.write (c) - # print str(s) - # assert str(s) == write_target - # def test_tetris (self): - # s = screen.screen (24,80) - # tetris_text = open ('tetris.data').read() - # for c in tetris_text: - # s.write (c) - # assert str(s) == tetris_target + + def make_screen_with_box_unicode(self, *args, **kwargs): + '''Creates a screen containing a box drawn using double-line + line drawing characters. The characters are fed in as + unicode. ''' + s = screen.screen (2,2,*args,**kwargs) + s.put_abs (1,1,u'\u2554') + s.put_abs (1,2,u'\u2557') + s.put_abs (2,1,u'\u255A') + s.put_abs (2,2,u'\u255D') + return s + + def make_screen_with_box_cp437(self, *args, **kwargs): + '''Creates a screen containing a box drawn using double-line + line drawing characters. The characters are fed in as + CP437. ''' + s = screen.screen (2,2,*args,**kwargs) + s.put_abs (1,1,b'\xc9') + s.put_abs (1,2,b'\xbb') + s.put_abs (2,1,b'\xc8') + s.put_abs (2,2,b'\xbc') + return s + + def make_screen_with_box_utf8(self, *args, **kwargs): + '''Creates a screen containing a box drawn using double-line + line drawing characters. The characters are fed in as + UTF-8. ''' + s = screen.screen (2,2,*args,**kwargs) + s.put_abs (1,1,b'\xe2\x95\x94') + s.put_abs (1,2,b'\xe2\x95\x97') + s.put_abs (2,1,b'\xe2\x95\x9a') + s.put_abs (2,2,b'\xe2\x95\x9d') + return s + + def test_unicode_ascii (self): + # With the default encoding set to ASCII, we should still be + # able to feed in unicode strings and get them back out: + s = self.make_screen_with_box_unicode('ascii') + if PY3: + assert str(s) == unicode_box_unicode_result + else: + assert unicode(s) == unicode_box_unicode_result + # And we should still get something for Python 2 str(), though + # it might not be very useful + str(s) + + assert s.pretty() == unicode_box_pretty_result + + def test_decoding_errors(self): + # With strict error handling, it should reject bytes it can't decode + with self.assertRaises(UnicodeDecodeError): + self.make_screen_with_box_cp437('ascii', 'strict') + + # replace should turn them into unicode replacement characters, U+FFFD + s = self.make_screen_with_box_cp437('ascii', 'replace') + expected = u'\ufffd\ufffd\n\ufffd\ufffd' + if PY3: + assert str(s) == expected + else: + assert unicode(s) == expected + + def test_unicode_cp437 (self): + # Verify decoding from and re-encoding to CP437. + s = self.make_screen_with_box_cp437('cp437','strict') + if PY3: + assert str(s) == unicode_box_unicode_result + else: + assert unicode(s) == unicode_box_unicode_result + assert str(s) == unicode_box_cp437_bytes_result + assert s.pretty() == unicode_box_pretty_result + + def test_unicode_utf8 (self): + # Verify decoding from and re-encoding to UTF-8. + s = self.make_screen_with_box_utf8('utf-8','strict') + if PY3: + assert str(s) == unicode_box_unicode_result + else: + assert unicode(s) == unicode_box_unicode_result + assert str(s) == unicode_box_utf8_bytes_result + assert s.pretty() == unicode_box_pretty_result + + def test_no_bytes(self): + s = screen.screen(2, 2, encoding=None) + s.put_abs(1, 1, u'A') + s.put_abs(2, 2, u'D') + + with self.assertRaises(TypeError): + s.put_abs(1, 2, b'B') + + if PY3: + assert str(s) == u'A \n D' + else: + assert unicode(s) == u'A \n D' + # This will still work if it's limited to ascii + assert str(s) == b'A \n D' if __name__ == '__main__': unittest.main() |