summaryrefslogtreecommitdiff
path: root/tests/test_ansi.py
diff options
context:
space:
mode:
authorDavid O'Shea <doshea@doshea-centos-x86-64.adl.quantum.com>2014-07-08 10:40:27 +0930
committerDavid O'Shea <doshea@doshea-centos-x86-64.adl.quantum.com>2014-07-24 22:09:23 +0930
commitdb34b8fed2c21b8b790d2710fde1004d49c8f00e (patch)
tree589a5f4a902fc38570136c615686bd5bab8c4a68 /tests/test_ansi.py
parentcfca6f309c1c8109bd3ad023c6382849fb5c6632 (diff)
downloadpexpect-db34b8fed2c21b8b790d2710fde1004d49c8f00e.tar.gz
Issue #84: Unicode support in screen and ANSI.
This commit updates the the screen and ANSI modules to support Unicode under Python 2.x. Under Python 3.x, it was already supported because strings are Unicode by default. Now, on both Python versions: - The constructors accept a codec name (defaults to 'latin-1') and a scheme for handling encoding/decoding errors (defaults to 'replace'). The codec may be set to None to inhibit encoding/decoding. - Unicode is now used internally for storing the screen contents. - Methods that accept input characters will, if passed input of type 'bytes' (or, under Python 2.x, 'str'), use the specified codec to decode the input, otherwise treating it as Unicode. - Methods that return screen contents now return Unicode, with the exception of __str__() under Python 2.x, and __bytes__() in all versions of Python, which return the screen contents encoded using the specified codec. These changes are designed to work only with Python 2.6, 2.7, and 3.3 and later, specifically versions that provide both b'' and u'' string literals. The check in ANSI for characters being printable is also removed, as this prevents non-ASCII characters being accepted, which is not compatible with the goal of adding Unicode support. This addresses issue #83.
Diffstat (limited to 'tests/test_ansi.py')
-rwxr-xr-xtests/test_ansi.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/tests/test_ansi.py b/tests/test_ansi.py
index 3b8d6a9..48d92a8 100755
--- a/tests/test_ansi.py
+++ b/tests/test_ansi.py
@@ -21,6 +21,11 @@ PEXPECT LICENSE
from pexpect import ANSI
import unittest
from . import PexpectTestCase
+import sys
+
+PY3 = (sys.version_info[0] >= 3)
+if PY3:
+ unicode = str
write_target = 'I\'ve got a ferret sticking up my nose. \n' +\
'(He\'s got a ferret sticking up his nose.) \n' +\
@@ -147,6 +152,53 @@ class ansiTestCase (PexpectTestCase.PexpectTestCase):
assert str(s) == ('test ')
assert(s.state.memory == [s, '0', '1', '32', '45'])
+ def test_utf8_bytes(self):
+ """Test that when bytes are passed in containing UTF-8 encoded
+ characters, where the encoding of each character consists of
+ multiple bytes, the characters are correctly decoded.
+ Incremental decoding is also tested."""
+ s = ANSI.ANSI(2, 10, codec='utf-8')
+ # This is the UTF-8 encoding of the UCS character "HOURGLASS"
+ # followed by the UTF-8 encoding of the UCS character
+ # "KEYBOARD". These characters can't be encoded in cp437 or
+ # latin-1. The "KEYBOARD" character is split into two
+ # separate writes.
+ s.write(b'\xe2\x8c\x9b')
+ s.write(b'\xe2\x8c')
+ s.write(b'\xa8')
+ assert unicode(s) == u'\u231b\u2328 \n '
+ assert bytes(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n '
+ assert s.dump() == u'\u231b\u2328 '
+ assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n'
+ assert s.get_abs(1, 1) == u'\u231b'
+ assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 ']
+
+ def test_unicode(self):
+ """Test passing in of a unicode string."""
+ s = ANSI.ANSI(2, 10, codec="utf-8")
+ s.write(u'\u231b\u2328')
+ assert unicode(s) == u'\u231b\u2328 \n '
+ assert bytes(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n '
+ assert s.dump() == u'\u231b\u2328 '
+ assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n'
+ assert s.get_abs(1, 1) == u'\u231b'
+ assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 ']
+
+ def test_decode_error(self):
+ """Test that default handling of decode errors replaces the
+ invalid characters."""
+ s = ANSI.ANSI(2, 10, codec="ascii")
+ s.write(b'\xff') # a non-ASCII character
+ # In unicode, the non-ASCII character is replaced with
+ # REPLACEMENT CHARACTER.
+ assert unicode(s) == u'\ufffd \n '
+ assert bytes(s) == b'? \n '
+ assert s.dump() == u'\ufffd '
+ assert s.pretty() == u'+----------+\n|\ufffd |\n| |\n+----------+\n'
+ assert s.get_abs(1, 1) == u'\ufffd'
+ assert s.get_region(1, 1, 1, 5) == [u'\ufffd ']
+
+
if __name__ == '__main__':
unittest.main()