summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMasen Furer <m_github@0x26.net>2023-04-05 06:23:00 -0700
committerGitHub <noreply@github.com>2023-04-05 06:23:00 -0700
commit8d8eba619b9e31c94fb8c09d2ab61223e935acd5 (patch)
treed7d86a8352bf219ff5b7b2a6cfa13ccecd37e47e
parent8a7327c14b8b5fca98126dd06ba293446c644064 (diff)
downloadtox-git-8d8eba619b9e31c94fb8c09d2ab61223e935acd5.tar.gz
Avoid UnicodeDecodeError from command output (#2970)
* test_sync_write_decode_surrogate: utf-8 decode When SyncWrite decodes bytes as utf-8, it should replace unknown sequences with the unicode surrogate codepoint instead of crashing the program. Test case for #2969 * SyncWrite: decode with errors='surrogateescape' Avoid bubbling UnicodeDecodeError up from stream handling internals. Tox has no way of knowing that the bytestream emitted by a command will be valid utf-8, even if utf-8 is ostensibly the "correct" encoding for the stream. It's always possible for an arbitrary command to return non-utf-8 bytes, and this situation should not break tox. Fix #2969
-rw-r--r--docs/changelog/2969.bugfix.rst3
-rw-r--r--src/tox/execute/stream.py2
-rw-r--r--tests/execute/test_stream.py6
3 files changed, 10 insertions, 1 deletions
diff --git a/docs/changelog/2969.bugfix.rst b/docs/changelog/2969.bugfix.rst
new file mode 100644
index 00000000..29464086
--- /dev/null
+++ b/docs/changelog/2969.bugfix.rst
@@ -0,0 +1,3 @@
+Instead of raising ``UnicodeDecodeError`` when command output includes non-utf-8 bytes,
+``tox`` will now use ``surrogateescape`` error handling to convert the unrecognized bytes
+to escape sequences according to :pep:`383` - by :user:`masenf`.
diff --git a/src/tox/execute/stream.py b/src/tox/execute/stream.py
index 980c97ff..28c66be6 100644
--- a/src/tox/execute/stream.py
+++ b/src/tox/execute/stream.py
@@ -100,7 +100,7 @@ class SyncWrite:
@property
def text(self) -> str:
with self._content_lock:
- return self._content.decode("utf-8")
+ return self._content.decode("utf-8", errors="surrogateescape")
@property
def content(self) -> bytearray:
diff --git a/tests/execute/test_stream.py b/tests/execute/test_stream.py
index 6e870826..fe9dd563 100644
--- a/tests/execute/test_stream.py
+++ b/tests/execute/test_stream.py
@@ -8,3 +8,9 @@ from tox.execute.stream import SyncWrite
def test_sync_write_repr() -> None:
sync_write = SyncWrite(name="a", target=None, color=Fore.RED)
assert repr(sync_write) == f"SyncWrite(name='a', target=None, color={Fore.RED!r})"
+
+
+def test_sync_write_decode_surrogate() -> None:
+ sync_write = SyncWrite(name="a", target=None)
+ sync_write.handler(b"\xed\n")
+ assert sync_write.text == "\udced\n"