diff options
Diffstat (limited to 'urllib3/response.py')
-rw-r--r-- | urllib3/response.py | 162 |
1 files changed, 100 insertions, 62 deletions
diff --git a/urllib3/response.py b/urllib3/response.py index 7e08fffe..24140c4c 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -126,14 +126,15 @@ class HTTPResponse(io.IOBase): # Are we using the chunked-style of transfer encoding? self.chunked = False self.chunk_left = None - tr_enc = self.headers.get('transfer-encoding', '') - if tr_enc.lower() == "chunked": + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: self.chunked = True # We certainly don't want to preload content when the response is chunked. - if not self.chunked: - if preload_content and not self._body: - self._body = self.read(decode_content=decode_content) + if not self.chunked and preload_content and not self._body: + self._body = self.read(decode_content=decode_content) def get_redirect_location(self): """ @@ -172,6 +173,35 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -193,12 +223,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -247,17 +272,7 @@ class HTTPResponse(io.IOBase): self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data @@ -285,7 +300,7 @@ class HTTPResponse(io.IOBase): 'content-encoding' header. """ if self.chunked: - for line in self.read_chunked(amt): + for line in self.read_chunked(amt, decode_content=decode_content): yield line else: while not is_fp_closed(self._fp): @@ -371,48 +386,70 @@ class HTTPResponse(io.IOBase): b[:len(temp)] = temp return len(temp) - def read_chunked(self, amt=None): - # FIXME: Rewrite this method and make it a class with - # a better structured logic. + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. if not self.chunked: raise ResponseNotChunked("Response is not chunked. " "Header 'transfer-encoding: chunked' is missing.") + + if self._original_response and self._original_response._method.upper() == 'HEAD': + # Don't bother reading the body of a HEAD request. + # FIXME: Can we do this somehow without accessing private httplib _method? + self._original_response.close() + return + while True: - # First, we'll figure out length of a chunk and then - # we'll try to read it from socket. - if self.chunk_left is None: - line = self._fp.fp.readline() - line = line.decode() - # See RFC 7230: Chunked Transfer Coding. - i = line.find(';') - if i >= 0: - line = line[:i] # Strip chunk-extensions. - try: - self.chunk_left = int(line, 16) - except ValueError: - # Invalid chunked protocol response, abort. - self.close() - raise httplib.IncompleteRead(''.join(line)) - if self.chunk_left == 0: - break - if amt is None: - chunk = self._fp._safe_read(self.chunk_left) - yield chunk - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - elif amt < self.chunk_left: - value = self._fp._safe_read(amt) - self.chunk_left = self.chunk_left - amt - yield value - elif amt == self.chunk_left: - value = self._fp._safe_read(amt) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - yield value - else: # amt > self.chunk_left - yield self._fp._safe_read(self.chunk_left) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) # Chunk content ends with \r\n: discard it. while True: @@ -424,5 +461,6 @@ class HTTPResponse(io.IOBase): break # We read everything; close the "file". + if self._original_response: + self._original_response.close() self.release_conn() - |