summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Michael Larson <sethmichaellarson@gmail.com>2023-05-03 15:46:21 -0500
committerGitHub <noreply@github.com>2023-05-03 15:46:21 -0500
commit4714836a667eb4837d005eb89d34fae60b9dc6cc (patch)
tree6585eeacf5245d430470227dbb856884cd8ded13
parent6351614959b6599fe53312223c972daba75a671f (diff)
downloadurllib3-4714836a667eb4837d005eb89d34fae60b9dc6cc.tar.gz
Continue reading the response stream if there is buffered decompressed data
-rw-r--r--changelog/3009.bugfix3
-rw-r--r--src/urllib3/response.py2
-rw-r--r--test/with_dummyserver/test_socketlevel.py49
3 files changed, 53 insertions, 1 deletions
diff --git a/changelog/3009.bugfix b/changelog/3009.bugfix
new file mode 100644
index 00000000..61f54a49
--- /dev/null
+++ b/changelog/3009.bugfix
@@ -0,0 +1,3 @@
+Fixed ``HTTPResponse.stream()`` to continue yielding bytes if buffered decompressed data
+was still available to be read even if the underlying socket is closed. This prevents
+a compressed response from being truncated.
diff --git a/src/urllib3/response.py b/src/urllib3/response.py
index d8506875..1963f853 100644
--- a/src/urllib3/response.py
+++ b/src/urllib3/response.py
@@ -931,7 +931,7 @@ class HTTPResponse(BaseHTTPResponse):
if self.chunked and self.supports_chunked_reads():
yield from self.read_chunked(amt, decode_content=decode_content)
else:
- while not is_fp_closed(self._fp):
+ while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
data = self.read(amt=amt, decode_content=decode_content)
if data:
diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py
index f0d5f556..cae6b241 100644
--- a/test/with_dummyserver/test_socketlevel.py
+++ b/test/with_dummyserver/test_socketlevel.py
@@ -15,6 +15,7 @@ import sys
import tempfile
import time
import typing
+import zlib
from collections import OrderedDict
from pathlib import Path
from test import (
@@ -2001,6 +2002,54 @@ class TestStream(SocketDummyServerTestCase):
done_event.set()
+ def test_large_compressed_stream(self) -> None:
+ done_event = Event()
+ expected_total_length = 296085
+
+ def socket_handler(listener: socket.socket) -> None:
+ compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+ data = compress.compress(b"x" * expected_total_length)
+ data += compress.flush()
+
+ sock = listener.accept()[0]
+
+ buf = b""
+ while not buf.endswith(b"\r\n\r\n"):
+ buf += sock.recv(65536)
+
+ sock.sendall(
+ b"HTTP/1.1 200 OK\r\n"
+ b"Content-Length: %d\r\n"
+ b"Content-Encoding: gzip\r\n"
+ b"\r\n" % (len(data),) + data
+ )
+
+ done_event.wait(5)
+ sock.close()
+
+ self._start_server(socket_handler)
+
+ with HTTPConnectionPool(self.host, self.port, retries=False) as pool:
+ r = pool.request("GET", "/", timeout=LONG_TIMEOUT, preload_content=False)
+
+ # Chunks must all be equal or less than 10240
+ # and only the last chunk is allowed to be smaller
+ # than 10240.
+ total_length = 0
+ chunks_smaller_than_10240 = 0
+ for chunk in r.stream(10240, decode_content=True):
+ assert 0 < len(chunk) <= 10240
+ if len(chunk) < 10240:
+ chunks_smaller_than_10240 += 1
+ else:
+ assert chunks_smaller_than_10240 == 0
+ total_length += len(chunk)
+
+ assert chunks_smaller_than_10240 == 1
+ assert expected_total_length == total_length
+
+ done_event.set()
+
class TestBadContentLength(SocketDummyServerTestCase):
def test_enforce_content_length_get(self) -> None: