Fix multi-frame Zstandard response decoding

author: Rogdham <3994389+Rogdham@users.noreply.github.com> 2023-05-14 17:36:00 +0200
committer: GitHub <noreply@github.com> 2023-05-14 10:36:00 -0500
commit: aca0f01bb6a29eda24799ec31895f45a1bb9e58b (patch)
tree: 229f194df82da9867f34f2edf9bb962582afdea3
parent: be5e03b940c301f057e45b22ce5a7022071a3361 (diff)
download: urllib3-aca0f01bb6a29eda24799ec31895f45a1bb9e58b.tar.gz
3 files changed, 27 insertions, 2 deletions
diff --git a/changelog/3008.bugfix.rst b/changelog/3008.bugfix.rst
new file mode 100644
index 00000000..6d78c94b
--- /dev/null
+++ b/changelog/3008.bugfix.rst
@@ -0,0 +1 @@
+Fixed response decoding with Zstandard when compressed data is made of several frames.
+\ No newline at end of file
diff --git a/src/urllib3/response.py b/src/urllib3/response.py
index 1963f853..50e4d88f 100644
--- a/src/urllib3/response.py
+++ b/src/urllib3/response.py
@@ -169,10 +169,15 @@ if zstd is not None:
         def decompress(self, data: bytes) -> bytes:
             if not data:
                 return b""
-            return self._obj.decompress(data)  # type: ignore[no-any-return]
+            data_parts = [self._obj.decompress(data)]
+            while self._obj.eof and self._obj.unused_data:
+                unused_data = self._obj.unused_data
+                self._obj = zstd.ZstdDecompressor().decompressobj()
+                data_parts.append(self._obj.decompress(unused_data))
+            return b"".join(data_parts)
 
         def flush(self) -> bytes:
-            ret = self._obj.flush()
+            ret = self._obj.flush()  # note: this is a no-op
             if not self._obj.eof:
                 raise DecodeError("Zstandard data is incomplete")
             return ret  # type: ignore[no-any-return]
diff --git a/test/test_response.py b/test/test_response.py
index 6b5b1a17..c6d9d152 100644
--- a/test/test_response.py
+++ b/test/test_response.py
@@ -333,6 +333,25 @@ class TestResponse:
         assert r.data == b"foo"
 
     @onlyZstd()
+    def test_decode_multiframe_zstd(self) -> None:
+        data = (
+            # Zstandard frame
+            zstd.compress(b"foo")
+            # skippable frame (must be ignored)
+            + bytes.fromhex(
+                "50 2A 4D 18"  # Magic_Number (little-endian)
+                "07 00 00 00"  # Frame_Size (little-endian)
+                "00 00 00 00 00 00 00"  # User_Data
+            )
+            # Zstandard frame
+            + zstd.compress(b"bar")
+        )
+
+        fp = BytesIO(data)
+        r = HTTPResponse(fp, headers={"content-encoding": "zstd"})
+        assert r.data == b"foobar"
+
+    @onlyZstd()
     def test_chunked_decoding_zstd(self) -> None:
         data = zstd.compress(b"foobarbaz")
author	Rogdham <3994389+Rogdham@users.noreply.github.com>	2023-05-14 17:36:00 +0200
committer	GitHub <noreply@github.com>	2023-05-14 10:36:00 -0500
commit	aca0f01bb6a29eda24799ec31895f45a1bb9e58b (patch)
tree	229f194df82da9867f34f2edf9bb962582afdea3
parent	be5e03b940c301f057e45b22ce5a7022071a3361 (diff)
download	urllib3-aca0f01bb6a29eda24799ec31895f45a1bb9e58b.tar.gz