diff options
author | Masen Furer <m_github@0x26.net> | 2023-04-05 06:23:00 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-05 06:23:00 -0700 |
commit | 8d8eba619b9e31c94fb8c09d2ab61223e935acd5 (patch) | |
tree | d7d86a8352bf219ff5b7b2a6cfa13ccecd37e47e | |
parent | 8a7327c14b8b5fca98126dd06ba293446c644064 (diff) | |
download | tox-git-8d8eba619b9e31c94fb8c09d2ab61223e935acd5.tar.gz |
Avoid UnicodeDecodeError from command output (#2970)
* test_sync_write_decode_surrogate: utf-8 decode
When SyncWrite decodes bytes as utf-8, it should replace unknown
sequences with the unicode surrogate codepoint instead of crashing the
program.
Test case for #2969
* SyncWrite: decode with errors='surrogateescape'
Avoid bubbling UnicodeDecodeError up from stream handling internals.
Tox has no way of knowing that the bytestream emitted by a command will
be valid utf-8, even if utf-8 is ostensibly the "correct" encoding for
the stream. It's always possible for an arbitrary command to return
non-utf-8 bytes, and this situation should not break tox.
Fix #2969
-rw-r--r-- | docs/changelog/2969.bugfix.rst | 3 | ||||
-rw-r--r-- | src/tox/execute/stream.py | 2 | ||||
-rw-r--r-- | tests/execute/test_stream.py | 6 |
3 files changed, 10 insertions, 1 deletions
diff --git a/docs/changelog/2969.bugfix.rst b/docs/changelog/2969.bugfix.rst new file mode 100644 index 00000000..29464086 --- /dev/null +++ b/docs/changelog/2969.bugfix.rst @@ -0,0 +1,3 @@ +Instead of raising ``UnicodeDecodeError`` when command output includes non-utf-8 bytes, +``tox`` will now use ``surrogateescape`` error handling to convert the unrecognized bytes +to escape sequences according to :pep:`383` - by :user:`masenf`. diff --git a/src/tox/execute/stream.py b/src/tox/execute/stream.py index 980c97ff..28c66be6 100644 --- a/src/tox/execute/stream.py +++ b/src/tox/execute/stream.py @@ -100,7 +100,7 @@ class SyncWrite: @property def text(self) -> str: with self._content_lock: - return self._content.decode("utf-8") + return self._content.decode("utf-8", errors="surrogateescape") @property def content(self) -> bytearray: diff --git a/tests/execute/test_stream.py b/tests/execute/test_stream.py index 6e870826..fe9dd563 100644 --- a/tests/execute/test_stream.py +++ b/tests/execute/test_stream.py @@ -8,3 +8,9 @@ from tox.execute.stream import SyncWrite def test_sync_write_repr() -> None: sync_write = SyncWrite(name="a", target=None, color=Fore.RED) assert repr(sync_write) == f"SyncWrite(name='a', target=None, color={Fore.RED!r})" + + +def test_sync_write_decode_surrogate() -> None: + sync_write = SyncWrite(name="a", target=None) + sync_write.handler(b"\xed\n") + assert sync_write.text == "\udced\n" |