diff options
author | Ian Cordasco <ian.cordasco@rackspace.com> | 2015-04-23 08:57:53 -0500 |
---|---|---|
committer | Ian Cordasco <ian.cordasco@rackspace.com> | 2015-04-23 08:57:54 -0500 |
commit | 2cfc21644ae868e0ac8b5f24a27eabda8a1ebc1e (patch) | |
tree | 77b7e04ec98bc7992c9c127362cf09cd912f0afb | |
parent | 10b7a0fefa6596f47a9a6afc80f1f4d1ae950b66 (diff) | |
download | urllib3-2cfc21644ae868e0ac8b5f24a27eabda8a1ebc1e.tar.gz |
Decode information received from read_chunked in stream
Previously, when we read chunked data we always decoded it because it
was handled by the read method on the HTTPResponse. Now that it's a
separate method, we need to handle it. This is a quick fix for #593 to
address a regression in behaviour.
Fixes #593
-rw-r--r-- | urllib3/response.py | 52 |
1 files changed, 34 insertions, 18 deletions
diff --git a/urllib3/response.py b/urllib3/response.py index 7e08fffe..f1ea9bb5 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -172,6 +172,36 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -193,12 +223,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -247,17 +272,7 @@ class HTTPResponse(io.IOBase): self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data @@ -284,9 +299,10 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ + self._init_decoder() if self.chunked: for line in self.read_chunked(amt): - yield line + yield self._decode(line, decode_content, True) else: while not is_fp_closed(self._fp): data = self.read(amt=amt, decode_content=decode_content) |