Merge pull request #597 from sigmavirus24/refactor/594

Refactor fix for 593
author: Andrey Petrov <shazow@gmail.com> 2015-04-25 16:52:49 -0400
committer: Andrey Petrov <shazow@gmail.com> 2015-04-25 16:52:49 -0400
commit: 548b79a1d1e81bfd297d2ebf1c0129859c656406 (patch)
tree: ca8142e769c233259a265b951f8b87c9a1479878
parent: 6ecc5f392a0ed66be3bef519d8a50f6395ff5a77 (diff)
parent: b06e1b72643255250c752286f502e93efee80e7c (diff)
download: urllib3-548b79a1d1e81bfd297d2ebf1c0129859c656406.tar.gz
1 files changed, 62 insertions, 48 deletions
diff --git a/urllib3/response.py b/urllib3/response.py
index f1ea9bb5..79a5e789 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -126,14 +126,15 @@ class HTTPResponse(io.IOBase):
         # Are we using the chunked-style of transfer encoding?
         self.chunked = False
         self.chunk_left = None
-        tr_enc = self.headers.get('transfer-encoding', '')
-        if tr_enc.lower() == "chunked":
+        tr_enc = self.headers.get('transfer-encoding', '').lower()
+        # Don't incur the penalty of creating a list and then discarding it
+        encodings = (enc.strip() for enc in tr_enc.split(","))
+        if "chunked" in encodings:
             self.chunked = True
 
         # We certainly don't want to preload content when the response is chunked.
-        if not self.chunked:
-            if preload_content and not self._body:
-                self._body = self.read(decode_content=decode_content)
+        if not self.chunked and preload_content and not self._body:
+            self._body = self.read(decode_content=decode_content)
 
     def get_redirect_location(self):
         """
@@ -179,9 +180,8 @@ class HTTPResponse(io.IOBase):
         # Note: content-encoding value should be case-insensitive, per RFC 7230
         # Section 3.2
         content_encoding = self.headers.get('content-encoding', '').lower()
-        if self._decoder is None:
-            if content_encoding in self.CONTENT_DECODERS:
-                self._decoder = _get_decoder(content_encoding)
+        if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
+            self._decoder = _get_decoder(content_encoding)
 
     def _decode(self, data, decode_content, flush_decoder):
         """
@@ -299,10 +299,9 @@ class HTTPResponse(io.IOBase):
             If True, will attempt to decode the body based on the
             'content-encoding' header.
         """
-        self._init_decoder()
         if self.chunked:
-            for line in self.read_chunked(amt):
-                yield self._decode(line, decode_content, True)
+            for line in self.read_chunked(amt, decode_content=decode_content):
+                yield line
         else:
             while not is_fp_closed(self._fp):
                 data = self.read(amt=amt, decode_content=decode_content)
@@ -387,48 +386,64 @@ class HTTPResponse(io.IOBase):
             b[:len(temp)] = temp
             return len(temp)
 
-    def read_chunked(self, amt=None):
+    def _update_chunk_length(self):
+        # First, we'll figure out length of a chunk and then
+        # we'll try to read it from socket.
+        if self.chunk_left is not None:
+            return
+        line = self._fp.fp.readline()
+        line = line.split(b';', 1)[0]
+        try:
+            self.chunk_left = int(line, 16)
+        except ValueError:
+            # Invalid chunked protocol response, abort.
+            self.close()
+            raise httplib.IncompleteRead(line)
+
+    def _handle_chunk(self, amt):
+        returned_chunk = None
+        if amt is None:
+            chunk = self._fp._safe_read(self.chunk_left)
+            returned_chunk = chunk
+            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
+            self.chunk_left = None
+        elif amt < self.chunk_left:
+            value = self._fp._safe_read(amt)
+            self.chunk_left = self.chunk_left - amt
+            returned_chunk = value
+        elif amt == self.chunk_left:
+            value = self._fp._safe_read(amt)
+            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
+            self.chunk_left = None
+            returned_chunk = value
+        else:  # amt > self.chunk_left
+            returned_chunk = self._fp._safe_read(self.chunk_left)
+            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
+            self.chunk_left = None
+        return returned_chunk
+
+    def read_chunked(self, amt=None, decode_content=None):
+        """
+        Similar to :meth:`HTTPResponse.read`, but with an additional
+        parameter: ``decode_content``.
+
+        :param decode_content:
+            If True, will attempt to decode the body based on the
+            'content-encoding' header.
+        """
+        self._init_decoder()
         # FIXME: Rewrite this method and make it a class with
         #        a better structured logic.
         if not self.chunked:
             raise ResponseNotChunked("Response is not chunked. "
                 "Header 'transfer-encoding: chunked' is missing.")
         while True:
-            # First, we'll figure out length of a chunk and then
-            # we'll try to read it from socket.
-            if self.chunk_left is None:
-                line = self._fp.fp.readline()
-                line = line.decode()
-                # See RFC 7230: Chunked Transfer Coding.
-                i = line.find(';')
-                if i >= 0:
-                    line = line[:i]  # Strip chunk-extensions.
-                try:
-                    self.chunk_left = int(line, 16)
-                except ValueError:
-                    # Invalid chunked protocol response, abort.
-                    self.close()
-                    raise httplib.IncompleteRead(''.join(line))
-                if self.chunk_left == 0:
-                    break
-            if amt is None:
-                chunk = self._fp._safe_read(self.chunk_left)
-                yield chunk
-                self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
-                self.chunk_left = None
-            elif amt < self.chunk_left:
-                value = self._fp._safe_read(amt)
-                self.chunk_left = self.chunk_left - amt
-                yield value
-            elif amt == self.chunk_left:
-                value = self._fp._safe_read(amt)
-                self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
-                self.chunk_left = None
-                yield value
-            else:  # amt > self.chunk_left
-                yield self._fp._safe_read(self.chunk_left)
-                self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
-                self.chunk_left = None
+            self._update_chunk_length()
+            if self.chunk_left == 0:
+                break
+            chunk = self._handle_chunk(amt)
+            yield self._decode(chunk, decode_content=decode_content,
+                               flush_decoder=True)
 
         # Chunk content ends with \r\n: discard it.
         while True:
@@ -441,4 +456,3 @@ class HTTPResponse(io.IOBase):
 
         # We read everything; close the "file".
         self.release_conn()
-
author	Andrey Petrov <shazow@gmail.com>	2015-04-25 16:52:49 -0400
committer	Andrey Petrov <shazow@gmail.com>	2015-04-25 16:52:49 -0400
commit	548b79a1d1e81bfd297d2ebf1c0129859c656406 (patch)
tree	ca8142e769c233259a265b951f8b87c9a1479878
parent	6ecc5f392a0ed66be3bef519d8a50f6395ff5a77 (diff)
parent	b06e1b72643255250c752286f502e93efee80e7c (diff)
download	urllib3-548b79a1d1e81bfd297d2ebf1c0129859c656406.tar.gz