Merge pull request #674 from jmoldow/issue-673

Capture httplib errors in Response.stream()
author: Andrey Petrov <shazow@gmail.com> 2015-07-15 22:13:27 +0200
committer: Andrey Petrov <shazow@gmail.com> 2015-07-15 22:13:27 +0200
commit: 9f6ec3acebbaab7b432dc9cd92e208d42236eb01 (patch)
tree: 78e631c9685454f41a5808c556b73220c590d3dd
parent: 21a288be4487040c6e21e27cec025b74d2a83152 (diff)
parent: de2164fc919ac9dabb41afdbc1bbaa9c1f656257 (diff)
download: urllib3-9f6ec3acebbaab7b432dc9cd92e208d42236eb01.tar.gz
2 files changed, 81 insertions, 68 deletions
diff --git a/test/test_response.py b/test/test_response.py
index 2e2be0ec..47d05213 100644
--- a/test/test_response.py
+++ b/test/test_response.py
@@ -7,7 +7,7 @@ try:
 except ImportError:
     import httplib
 from urllib3.response import HTTPResponse
-from urllib3.exceptions import DecodeError, ResponseNotChunked
+from urllib3.exceptions import DecodeError, ResponseNotChunked, ProtocolError
 
 
 from base64 import b64decode
@@ -487,7 +487,7 @@ class TestResponse(unittest.TestCase):
         r.chunked = True
         r.chunk_left = None
         resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'})
-        self.assertRaises(httplib.IncompleteRead, next, resp.read_chunked())
+        self.assertRaises(ProtocolError, next, resp.read_chunked())
 
     def test_chunked_response_without_crlf_on_end(self):
         stream = [b"foo", b"bar", b"baz"]
diff --git a/urllib3/response.py b/urllib3/response.py
index 2072cc12..15d4aaca 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -2,6 +2,7 @@ try:
     import http.client as httplib
 except ImportError:
     import httplib
+from contextlib import contextmanager
 import zlib
 import io
 from socket import timeout as SocketTimeout
@@ -202,56 +203,18 @@ class HTTPResponse(io.IOBase):
 
         return data
 
-    def read(self, amt=None, decode_content=None, cache_content=False):
+    @contextmanager
+    def _error_catcher(self):
         """
-        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
-        parameters: ``decode_content`` and ``cache_content``.
-
-        :param amt:
-            How much of the content to read. If specified, caching is skipped
-            because it doesn't make sense to cache partial content as the full
-            response.
+        Catch low-level python exceptions, instead re-raising urllib3
+        variants, so that low-level exceptions are not leaked in the
+        high-level api.
 
-        :param decode_content:
-            If True, will attempt to decode the body based on the
-            'content-encoding' header.
-
-        :param cache_content:
-            If True, will save the returned data such that the same result is
-            returned despite of the state of the underlying file object. This
-            is useful if you want the ``.data`` property to continue working
-            after having ``.read()`` the file object. (Overridden if ``amt`` is
-            set.)
+        On exit, release the connection back to the pool.
         """
-        self._init_decoder()
-        if decode_content is None:
-            decode_content = self.decode_content
-
-        if self._fp is None:
-            return
-
-        flush_decoder = False
-        data = None
-
         try:
             try:
-                if amt is None:
-                    # cStringIO doesn't like amt=None
-                    data = self._fp.read()
-                    flush_decoder = True
-                else:
-                    cache_content = False
-                    data = self._fp.read(amt)
-                    if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
-                        # Close the connection when no data is returned
-                        #
-                        # This is redundant to what httplib/http.client _should_
-                        # already do.  However, versions of python released before
-                        # December 15, 2012 (http://bugs.python.org/issue16298) do
-                        # not properly close the connection in all cases. There is
-                        # no harm in redundantly calling close.
-                        self._fp.close()
-                        flush_decoder = True
+                yield
 
             except SocketTimeout:
                 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
@@ -281,6 +244,56 @@ class HTTPResponse(io.IOBase):
             if self._original_response and self._original_response.isclosed():
                 self.release_conn()
 
+    def read(self, amt=None, decode_content=None, cache_content=False):
+        """
+        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
+        parameters: ``decode_content`` and ``cache_content``.
+
+        :param amt:
+            How much of the content to read. If specified, caching is skipped
+            because it doesn't make sense to cache partial content as the full
+            response.
+
+        :param decode_content:
+            If True, will attempt to decode the body based on the
+            'content-encoding' header.
+
+        :param cache_content:
+            If True, will save the returned data such that the same result is
+            returned despite of the state of the underlying file object. This
+            is useful if you want the ``.data`` property to continue working
+            after having ``.read()`` the file object. (Overridden if ``amt`` is
+            set.)
+        """
+        self._init_decoder()
+        if decode_content is None:
+            decode_content = self.decode_content
+
+        if self._fp is None:
+            return
+
+        flush_decoder = False
+        data = None
+
+        with self._error_catcher():
+            if amt is None:
+                # cStringIO doesn't like amt=None
+                data = self._fp.read()
+                flush_decoder = True
+            else:
+                cache_content = False
+                data = self._fp.read(amt)
+                if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
+                    # Close the connection when no data is returned
+                    #
+                    # This is redundant to what httplib/http.client _should_
+                    # already do.  However, versions of python released before
+                    # December 15, 2012 (http://bugs.python.org/issue16298) do
+                    # not properly close the connection in all cases. There is
+                    # no harm in redundantly calling close.
+                    self._fp.close()
+                    flush_decoder = True
+
         if data:
             self._fp_bytes_read += len(data)
 
@@ -452,24 +465,24 @@ class HTTPResponse(io.IOBase):
             self._original_response.close()
             return
 
-        while True:
-            self._update_chunk_length()
-            if self.chunk_left == 0:
-                break
-            chunk = self._handle_chunk(amt)
-            yield self._decode(chunk, decode_content=decode_content,
-                               flush_decoder=True)
-
-        # Chunk content ends with \r\n: discard it.
-        while True:
-            line = self._fp.fp.readline()
-            if not line:
-                # Some sites may not end with '\r\n'.
-                break
-            if line == b'\r\n':
-                break
-
-        # We read everything; close the "file".
-        if self._original_response:
-            self._original_response.close()
-        self.release_conn()
+        with self._error_catcher():
+            while True:
+                self._update_chunk_length()
+                if self.chunk_left == 0:
+                    break
+                chunk = self._handle_chunk(amt)
+                yield self._decode(chunk, decode_content=decode_content,
+                                   flush_decoder=True)
+
+            # Chunk content ends with \r\n: discard it.
+            while True:
+                line = self._fp.fp.readline()
+                if not line:
+                    # Some sites may not end with '\r\n'.
+                    break
+                if line == b'\r\n':
+                    break
+
+            # We read everything; close the "file".
+            if self._original_response:
+                self._original_response.close()
author	Andrey Petrov <shazow@gmail.com>	2015-07-15 22:13:27 +0200
committer	Andrey Petrov <shazow@gmail.com>	2015-07-15 22:13:27 +0200
commit	9f6ec3acebbaab7b432dc9cd92e208d42236eb01 (patch)
tree	78e631c9685454f41a5808c556b73220c590d3dd
parent	21a288be4487040c6e21e27cec025b74d2a83152 (diff)
parent	de2164fc919ac9dabb41afdbc1bbaa9c1f656257 (diff)
download	urllib3-9f6ec3acebbaab7b432dc9cd92e208d42236eb01.tar.gz