summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjquast <contact@jeffquast.com>2013-11-04 20:32:20 -0800
committerjquast <contact@jeffquast.com>2013-11-04 20:32:20 -0800
commit691ce85b4acf55e699c2305e6302adc655d6fc70 (patch)
treee9c45016667321f80167045a8a854201c443e584
parent536396a69c7e6c2f36e4961e6ab67d78444e14de (diff)
downloadblessings-691ce85b4acf55e699c2305e6302adc655d6fc70.tar.gz
encode tparm output as latin1
as explained in the comments above the decode().encode() wrapper, certain terminal kinds, such as 'avatar' or 'kermit' emit 8-bit bytes that are not legal utf-8 tender. this issue is resolved by encoding these values as latin1, which leaves their values unmolested. This is exaplified by a gist: https://gist.github.com/jquast/5649654 If you don't believe me, try kind='kermit' or kind='avatar', along with t.cup(n, n). Unfortunately, the state of the test cases in the master branch (issue #33) does not allow to add a test case in this branch until that one is pulled to master. Hell, I'll go ahead and make a third (and final) pull request for that one,
-rw-r--r--blessings/__init__.py11
-rw-r--r--blessings/tests.py4
2 files changed, 11 insertions, 4 deletions
diff --git a/blessings/__init__.py b/blessings/__init__.py
index b135e01..d968bb5 100644
--- a/blessings/__init__.py
+++ b/blessings/__init__.py
@@ -367,7 +367,7 @@ class Terminal(object):
# We can encode escape sequences as UTF-8 because they never
# contain chars > 127, and UTF-8 never changes anything within that
# range..
- return code.decode('utf-8')
+ return code.decode('latin1')
return u''
def _resolve_color(self, color):
@@ -436,7 +436,14 @@ class ParametrizingString(unicode):
# Re-encode the cap, because tparm() takes a bytestring in Python
# 3. However, appear to be a plain Unicode string otherwise so
# concats work.
- parametrized = tparm(self.encode('utf-8'), *args).decode('utf-8')
+ # We use *latin1* encoding so that bytes emitted by tparam are
+ # encoded to their native value: some terminal kinds, such as
+ # 'avatar' or 'kermit', emit 8-bit bytes in range 0x7f to 0xff.
+ # latin1 leaves these values unmodified in their conversion to
+ # unicode byte values. The terminal emulator will 'catch' and
+ # handle these values, even if emitting utf8 encoded text, where
+ # these bytes would otherwise be illegal utf8 start bytes.
+ parametrized = tparm(self.encode('latin1'), *args).decode('latin1')
return (parametrized if self._normal is None else
FormattingString(parametrized, self._normal))
except curses.error:
diff --git a/blessings/tests.py b/blessings/tests.py
index 7dda746..15141dc 100644
--- a/blessings/tests.py
+++ b/blessings/tests.py
@@ -28,12 +28,12 @@ TestTerminal = partial(Terminal, kind='xterm-256color')
def unicode_cap(cap):
"""Return the result of ``tigetstr`` except as Unicode."""
- return tigetstr(cap).decode('utf-8')
+ return tigetstr(cap).decode('latin1')
def unicode_parm(cap, *parms):
"""Return the result of ``tparm(tigetstr())`` except as Unicode."""
- return tparm(tigetstr(cap), *parms).decode('utf-8')
+ return tparm(tigetstr(cap), *parms).decode('latin1')
def test_capability():