diff options
author | Andrey Petrov <shazow@gmail.com> | 2015-04-25 16:52:49 -0400 |
---|---|---|
committer | Andrey Petrov <shazow@gmail.com> | 2015-04-25 16:52:49 -0400 |
commit | 548b79a1d1e81bfd297d2ebf1c0129859c656406 (patch) | |
tree | ca8142e769c233259a265b951f8b87c9a1479878 | |
parent | 6ecc5f392a0ed66be3bef519d8a50f6395ff5a77 (diff) | |
parent | b06e1b72643255250c752286f502e93efee80e7c (diff) | |
download | urllib3-548b79a1d1e81bfd297d2ebf1c0129859c656406.tar.gz |
Merge pull request #597 from sigmavirus24/refactor/594
Refactor fix for 593
-rw-r--r-- | urllib3/response.py | 110 |
1 files changed, 62 insertions, 48 deletions
diff --git a/urllib3/response.py b/urllib3/response.py index f1ea9bb5..79a5e789 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -126,14 +126,15 @@ class HTTPResponse(io.IOBase): # Are we using the chunked-style of transfer encoding? self.chunked = False self.chunk_left = None - tr_enc = self.headers.get('transfer-encoding', '') - if tr_enc.lower() == "chunked": + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: self.chunked = True # We certainly don't want to preload content when the response is chunked. - if not self.chunked: - if preload_content and not self._body: - self._body = self.read(decode_content=decode_content) + if not self.chunked and preload_content and not self._body: + self._body = self.read(decode_content=decode_content) def get_redirect_location(self): """ @@ -179,9 +180,8 @@ class HTTPResponse(io.IOBase): # Note: content-encoding value should be case-insensitive, per RFC 7230 # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) def _decode(self, data, decode_content, flush_decoder): """ @@ -299,10 +299,9 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ - self._init_decoder() if self.chunked: - for line in self.read_chunked(amt): - yield self._decode(line, decode_content, True) + for line in self.read_chunked(amt, decode_content=decode_content): + yield line else: while not is_fp_closed(self._fp): data = self.read(amt=amt, decode_content=decode_content) @@ -387,48 +386,64 @@ class HTTPResponse(io.IOBase): b[:len(temp)] = temp return len(temp) - def read_chunked(self, amt=None): + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() # FIXME: Rewrite this method and make it a class with # a better structured logic. if not self.chunked: raise ResponseNotChunked("Response is not chunked. " "Header 'transfer-encoding: chunked' is missing.") while True: - # First, we'll figure out length of a chunk and then - # we'll try to read it from socket. - if self.chunk_left is None: - line = self._fp.fp.readline() - line = line.decode() - # See RFC 7230: Chunked Transfer Coding. - i = line.find(';') - if i >= 0: - line = line[:i] # Strip chunk-extensions. - try: - self.chunk_left = int(line, 16) - except ValueError: - # Invalid chunked protocol response, abort. - self.close() - raise httplib.IncompleteRead(''.join(line)) - if self.chunk_left == 0: - break - if amt is None: - chunk = self._fp._safe_read(self.chunk_left) - yield chunk - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - elif amt < self.chunk_left: - value = self._fp._safe_read(amt) - self.chunk_left = self.chunk_left - amt - yield value - elif amt == self.chunk_left: - value = self._fp._safe_read(amt) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - yield value - else: # amt > self.chunk_left - yield self._fp._safe_read(self.chunk_left) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) # Chunk content ends with \r\n: discard it. while True: @@ -441,4 +456,3 @@ class HTTPResponse(io.IOBase): # We read everything; close the "file". self.release_conn() - |