diff options
author | David O'Shea <doshea@doshea-centos-x86-64.adl.quantum.com> | 2014-07-08 10:40:27 +0930 |
---|---|---|
committer | David O'Shea <doshea@doshea-centos-x86-64.adl.quantum.com> | 2014-07-24 22:09:23 +0930 |
commit | db34b8fed2c21b8b790d2710fde1004d49c8f00e (patch) | |
tree | 589a5f4a902fc38570136c615686bd5bab8c4a68 /tests/test_ansi.py | |
parent | cfca6f309c1c8109bd3ad023c6382849fb5c6632 (diff) | |
download | pexpect-db34b8fed2c21b8b790d2710fde1004d49c8f00e.tar.gz |
Issue #84: Unicode support in screen and ANSI.
This commit updates the the screen and ANSI modules to support Unicode
under Python 2.x. Under Python 3.x, it was already supported because
strings are Unicode by default. Now, on both Python versions:
- The constructors accept a codec name (defaults to 'latin-1') and a
scheme for handling encoding/decoding errors (defaults to
'replace'). The codec may be set to None to inhibit
encoding/decoding.
- Unicode is now used internally for storing the screen contents.
- Methods that accept input characters will, if passed input of type
'bytes' (or, under Python 2.x, 'str'), use the specified codec to
decode the input, otherwise treating it as Unicode.
- Methods that return screen contents now return Unicode, with the
exception of __str__() under Python 2.x, and __bytes__() in all
versions of Python, which return the screen contents encoded using
the specified codec.
These changes are designed to work only with Python 2.6, 2.7, and 3.3
and later, specifically versions that provide both b'' and u'' string
literals.
The check in ANSI for characters being printable is also removed, as
this prevents non-ASCII characters being accepted, which is not
compatible with the goal of adding Unicode support. This addresses
issue #83.
Diffstat (limited to 'tests/test_ansi.py')
-rwxr-xr-x | tests/test_ansi.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/tests/test_ansi.py b/tests/test_ansi.py index 3b8d6a9..48d92a8 100755 --- a/tests/test_ansi.py +++ b/tests/test_ansi.py @@ -21,6 +21,11 @@ PEXPECT LICENSE from pexpect import ANSI import unittest from . import PexpectTestCase +import sys + +PY3 = (sys.version_info[0] >= 3) +if PY3: + unicode = str write_target = 'I\'ve got a ferret sticking up my nose. \n' +\ '(He\'s got a ferret sticking up his nose.) \n' +\ @@ -147,6 +152,53 @@ class ansiTestCase (PexpectTestCase.PexpectTestCase): assert str(s) == ('test ') assert(s.state.memory == [s, '0', '1', '32', '45']) + def test_utf8_bytes(self): + """Test that when bytes are passed in containing UTF-8 encoded + characters, where the encoding of each character consists of + multiple bytes, the characters are correctly decoded. + Incremental decoding is also tested.""" + s = ANSI.ANSI(2, 10, codec='utf-8') + # This is the UTF-8 encoding of the UCS character "HOURGLASS" + # followed by the UTF-8 encoding of the UCS character + # "KEYBOARD". These characters can't be encoded in cp437 or + # latin-1. The "KEYBOARD" character is split into two + # separate writes. + s.write(b'\xe2\x8c\x9b') + s.write(b'\xe2\x8c') + s.write(b'\xa8') + assert unicode(s) == u'\u231b\u2328 \n ' + assert bytes(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n ' + assert s.dump() == u'\u231b\u2328 ' + assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\u231b' + assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 '] + + def test_unicode(self): + """Test passing in of a unicode string.""" + s = ANSI.ANSI(2, 10, codec="utf-8") + s.write(u'\u231b\u2328') + assert unicode(s) == u'\u231b\u2328 \n ' + assert bytes(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8 \n ' + assert s.dump() == u'\u231b\u2328 ' + assert s.pretty() == u'+----------+\n|\u231b\u2328 |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\u231b' + assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328 '] + + def test_decode_error(self): + """Test that default handling of decode errors replaces the + invalid characters.""" + s = ANSI.ANSI(2, 10, codec="ascii") + s.write(b'\xff') # a non-ASCII character + # In unicode, the non-ASCII character is replaced with + # REPLACEMENT CHARACTER. + assert unicode(s) == u'\ufffd \n ' + assert bytes(s) == b'? \n ' + assert s.dump() == u'\ufffd ' + assert s.pretty() == u'+----------+\n|\ufffd |\n| |\n+----------+\n' + assert s.get_abs(1, 1) == u'\ufffd' + assert s.get_region(1, 1, 1, 5) == [u'\ufffd '] + + if __name__ == '__main__': unittest.main() |