From 36088ba0346087dab0c60ed5e609e6806676d1a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?The=CC=81ophile=20Studer?= Date: Tue, 19 Jun 2012 15:01:39 +0200 Subject: contributor IPv6 url support and test coverage --- CONTRIBUTORS.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 37140ca3..9a04b6ae 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -39,5 +39,8 @@ In chronological order: * brandon-rhodes * Design review, bugfixes, test coverage. +* studer + * IPv6 url support and test coverage + * [Your name or handle] <[email or website]> * [Brief summary of your changes] -- cgit v1.2.1 From f9ecd6b462cfe2bdfd30d35190d33617eb72b809 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Fri, 22 Jun 2012 21:57:06 -0500 Subject: Added urllib3.add_stderr_logger and minor fixes in logging format and docs. --- CHANGES.rst | 3 +++ urllib3/__init__.py | 16 ++++++++++++++-- urllib3/connectionpool.py | 6 +++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7d6edbf3..e1ff69b6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,9 @@ Changes dev (master branch) +++++++++++++++++++ +* Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug + logging in urllib3. + 1.4 (2012-06-16) ++++++++++++++++ diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 81e76f50..3b68ea51 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -28,7 +28,7 @@ from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. import logging -try: +try: # Python 2.7+ from logging import NullHandler except ImportError: class NullHandler(logging.Handler): @@ -37,6 +37,18 @@ except ImportError: logging.getLogger(__name__).addHandler(NullHandler()) +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. + Returns the logger after adding it. + """ + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added an stderr logging handler.') + return logger + # ... Clean up. -del logging del NullHandler diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index f2333417..071fd7e0 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -264,7 +264,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): httplib_response = conn.getresponse() # AppEngine doesn't have a version attr. - http_version = getattr(conn, '_http_vsn_str', 'HTTP/?'), + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) @@ -324,8 +324,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Number of retries to allow before raising a MaxRetryError exception. :param redirect: - Automatically handle redirects (status codes 301, 302, 303, 307), - each redirect counts as a retry. + If True, automatically handle redirects (status codes 301, 302, + 303, 307). Each redirect counts as a retry. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is -- cgit v1.2.1 From 4059daae19a943834a3dd995c277448713483734 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Fri, 22 Jun 2012 22:15:25 -0500 Subject: Unit tests for add_stderr_logger --- test/test_util.py | 10 ++++++++++ urllib3/__init__.py | 12 ++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 9d5eb056..6fadd7d6 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,5 +1,7 @@ import unittest +import logging +from urllib3 import add_stderr_logger from urllib3.util import get_host, make_headers, split_first from urllib3.exceptions import LocationParseError @@ -100,3 +102,11 @@ class TestUtil(unittest.TestCase): for input, expected in test_cases.iteritems(): output = split_first(*input) self.assertEqual(output, expected) + + def test_add_stderr_logger(self): + handler = add_stderr_logger(level=logging.INFO) # Don't actually print debug + logger = logging.getLogger('urllib3') + self.assertTrue(handler in logger.handlers) + + logger.debug('Testing add_stderr_logger') + logger.removeHandler(handler) diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 3b68ea51..55de87e4 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -39,16 +39,20 @@ logging.getLogger(__name__).addHandler(NullHandler()) def add_stderr_logger(level=logging.DEBUG): """ - Helper for quickly adding a StreamHandler to the logger. - Returns the logger after adding it. + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. logger = logging.getLogger(__name__) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler.') - return logger + logger.debug('Added an stderr logging handler to logger: %s' % __name__) + return handler # ... Clean up. del NullHandler -- cgit v1.2.1 From 0c973cada5888e65fb2a26693e08de67038f00e5 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Fri, 22 Jun 2012 23:08:46 -0500 Subject: Added urllib3.util.parse_url with passing tests. --- test/test_util.py | 24 ++++++++++++--- urllib3/util.py | 89 ++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 91 insertions(+), 22 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 6fadd7d6..5ba37ddb 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -2,7 +2,7 @@ import unittest import logging from urllib3 import add_stderr_logger -from urllib3.util import get_host, make_headers, split_first +from urllib3.util import get_host, make_headers, split_first, parse_url, Url from urllib3.exceptions import LocationParseError @@ -62,6 +62,20 @@ class TestUtil(unittest.TestCase): for location in invalid_host: self.assertRaises(LocationParseError, get_host, location) + def test_parse_url(self): + url_host_map = { + 'http://google.com/mail': Url('http', None, 'google.com', None, '/mail'), + 'http://google.com/mail/': Url('http', None, 'google.com', None, '/mail/'), + 'google.com/mail': Url(None, None, 'google.com', None, '/mail'), + 'http://google.com/': Url('http', None, 'google.com', None, '/'), + 'http://google.com': Url('http', None, 'google.com', None, ''), + '/foo': Url(path='/foo'), + '/foo?bar=baz': Url(path='/foo', query='bar=baz'), + '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), + } + for url, expected_host in url_host_map.items(): + returned_host = parse_url(url) + self.assertEquals(returned_host, expected_host) def test_make_headers(self): self.assertEqual( @@ -95,9 +109,11 @@ class TestUtil(unittest.TestCase): def test_split_first(self): test_cases = { - ('abcd', 'b'): ('a', 'cd'), - ('abcd', 'cb'): ('a', 'cd'), - ('abcd', ''): ('abcd', ''), + ('abcd', 'b'): ('a', 'cd', 'b'), + ('abcd', 'cb'): ('a', 'cd', 'b'), + ('abcd', ''): ('abcd', '', None), + ('abcd', 'a'): ('', 'bcd', 'a'), + ('abcd', 'ab'): ('', 'bcd', 'a'), } for input, expected in test_cases.iteritems(): output = split_first(*input) diff --git a/urllib3/util.py b/urllib3/util.py index 9669ce97..d21128b5 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -6,6 +6,7 @@ from base64 import b64encode +from collections import namedtuple try: from select import poll, POLLIN @@ -20,6 +21,21 @@ from .packages import six from .exceptions import LocationParseError +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, basic_auth=None): """ @@ -75,39 +91,48 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, def split_first(s, delims): """ Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts. + delimiter. Return two split parts and the matched delimiter. If not found, then the first part is the full input string. + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + Scales linearly with number of delims. Not ideal for large number of delims. """ min_idx = None + min_delim = None for d in delims: idx = s.find(d) if idx < 0: continue - if not min_idx: + if min_idx is None or idx < min_idx: min_idx = idx - else: - min_idx = min(idx, min_idx) + min_delim = d if min_idx < 0: - return s, '' + return s, '', None - return s[:min_idx], s[min_idx+1:] + return s[:min_idx], s[min_idx+1:], min_delim -def get_host(url): +def parse_url(url): """ - Given a url, return its scheme, host and port (None if it's not there). + Given a url, return a parsed :class:`.Url` namedtuple. - For example: :: + Partly backwards-compatible with :module:`urlparse`. - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> prase_url('google.com:80') + Url(scheme='http', host='google.com', port=80, path='', ...) """ # While this code has overlap with stdlib's urlparse, it is much @@ -115,9 +140,13 @@ def get_host(url): # Additionally, this imeplementations does silly things to be optimal # on CPython. - scheme = 'http' + scheme = None # FIXME: Should the scheme default to http? + auth = None host = None port = None + path = '' # FIXME: Should the path default to None? + fragment = None + query = None # Scheme if '://' in url: @@ -125,11 +154,15 @@ def get_host(url): # Find the earliest Authority Terminator # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, _path = split_first(url, ['/', '?', '#']) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ # Auth if '@' in url: - _auth, url = url.split('@', 1) + auth, url = url.split('@', 1) # IPv6 if url and url[0] == '[': @@ -147,10 +180,30 @@ def get_host(url): port = int(port) - elif not host: + elif not host and url: host = url - return scheme, host, port + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + # Paths start with '/' + if path and path[0] != '/': + path = '/' + path + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use parse_url instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port def is_connection_dropped(conn): -- cgit v1.2.1 From 209e7889591b5912c7ae6a26f1feb336c176ad96 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 16:25:36 -0700 Subject: More tests. --- test/test_util.py | 7 ++++++- urllib3/util.py | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 5ba37ddb..67879b00 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -68,7 +68,12 @@ class TestUtil(unittest.TestCase): 'http://google.com/mail/': Url('http', None, 'google.com', None, '/mail/'), 'google.com/mail': Url(None, None, 'google.com', None, '/mail'), 'http://google.com/': Url('http', None, 'google.com', None, '/'), - 'http://google.com': Url('http', None, 'google.com', None, ''), + 'http://google.com': Url('http', None, 'google.com', None, None), + '': Url(), + '/': Url(path='/'), + '?': Url(path='', query=''), + '#': Url(path='', fragment=''), + '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), '/foo': Url(path='/foo'), '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), diff --git a/urllib3/util.py b/urllib3/util.py index d21128b5..8810f1bc 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -23,7 +23,8 @@ from .exceptions import LocationParseError class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): """ - Datastructure for representing an HTTP URL. + Datastructure for representing an HTTP URL. Used as a return value for + ``parse_url``. """ slots = () @@ -123,7 +124,8 @@ def split_first(s, delims): def parse_url(url): """ - Given a url, return a parsed :class:`.Url` namedtuple. + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. Partly backwards-compatible with :module:`urlparse`. @@ -132,7 +134,9 @@ def parse_url(url): >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) >>> prase_url('google.com:80') - Url(scheme='http', host='google.com', port=80, path='', ...) + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> prase_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ # While this code has overlap with stdlib's urlparse, it is much @@ -140,11 +144,11 @@ def parse_url(url): # Additionally, this imeplementations does silly things to be optimal # on CPython. - scheme = None # FIXME: Should the scheme default to http? + scheme = None auth = None host = None port = None - path = '' # FIXME: Should the path default to None? + path = None fragment = None query = None @@ -183,6 +187,9 @@ def parse_url(url): elif not host and url: host = url + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + # Fragment if '#' in path: path, fragment = path.split('#', 1) -- cgit v1.2.1 From d1e7327430d2c57a5bbfe0638c4b13e9a07e4d23 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 16:27:37 -0700 Subject: Docs. --- urllib3/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/urllib3/util.py b/urllib3/util.py index 8810f1bc..9943d944 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -24,7 +24,7 @@ from .exceptions import LocationParseError class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): """ Datastructure for representing an HTTP URL. Used as a return value for - ``parse_url``. + :func:`parse_url`. """ slots = () @@ -207,7 +207,7 @@ def parse_url(url): def get_host(url): """ - Deprecated. Use parse_url instead. + Deprecated. Use :func:`parse_url` instead. """ p = parse_url(url) return p.scheme or 'http', p.hostname, p.port -- cgit v1.2.1 From aa23d79c49d76824dd1076c21c69f2a5c8956ff9 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 16:28:17 -0700 Subject: Move stuff around. --- urllib3/util.py | 104 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/urllib3/util.py b/urllib3/util.py index 9943d944..7695bc8f 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -37,58 +37,6 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', return self.host -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - return headers - - def split_first(s, delims): """ Given a string and an iterable of delimiters, split on the first found @@ -213,6 +161,58 @@ def get_host(url): return p.scheme or 'http', p.hostname, p.port +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + def is_connection_dropped(conn): """ Returns True if the connection is dropped and should be closed. -- cgit v1.2.1 From 08aeb36240f4800fb9a36e843acd7d65ed33003b Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 16:50:49 -0700 Subject: Changes update. --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index e1ff69b6..9afad9ad 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,9 @@ dev (master branch) * Added ``urllib3.add_stderr_logger()`` for quickly enabling STDERR debug logging in urllib3. +* Native full URL parsing (including auth, path, query, fragment) available in + ``urllib3.util.parse_url(url)``. + 1.4 (2012-06-16) ++++++++++++++++ -- cgit v1.2.1 From 8a6c1e0f3041bf668bd9bf85f5ba6c22c9e2254d Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 16:51:13 -0700 Subject: GET if 303. --- CHANGES.rst | 2 ++ urllib3/connectionpool.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9afad9ad..721e73a4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,8 @@ dev (master branch) * Native full URL parsing (including auth, path, query, fragment) available in ``urllib3.util.parse_url(url)``. +* Built-in redirect will switch method to 'GET' if status code is 303. + 1.4 (2012-06-16) ++++++++++++++++ diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 071fd7e0..3310efc8 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -441,6 +441,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() if redirect_location: + if response.status == 303: + method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, retries - 1, redirect, assert_same_host) -- cgit v1.2.1 From 2c2049f6020f1aa7a7a38d35edbef206b2f1c1ca Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 17:31:59 -0700 Subject: Added Url.request_uri property. --- test/test_util.py | 22 +++++++++++++++++++--- urllib3/util.py | 10 ++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 67879b00..313c156a 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -78,9 +78,25 @@ class TestUtil(unittest.TestCase): '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), } - for url, expected_host in url_host_map.items(): - returned_host = parse_url(url) - self.assertEquals(returned_host, expected_host) + for url, expected_url in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url, expected_url) + + def test_request_uri(self): + url_host_map = { + 'http://google.com/mail': '/mail', + 'http://google.com/mail/': '/mail/', + 'http://google.com/': '/', + 'http://google.com': '/', + '': '/', + '/': '/', + '?': '/?', + '#': '/', + '/foo?bar=baz': '/foo?bar=baz', + } + for url, expected_request_uri in url_host_map.items(): + returned_url = parse_url(url) + self.assertEquals(returned_url.request_uri, expected_request_uri) def test_make_headers(self): self.assertEqual( diff --git a/urllib3/util.py b/urllib3/util.py index 7695bc8f..c3eb01f3 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -36,6 +36,16 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', """For backwards-compatibility with urlparse. We're nice like that.""" return self.host + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + def split_first(s, delims): """ -- cgit v1.2.1 From da66d839a931862d610a0268f4a43909648fb0a2 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 30 Jun 2012 17:58:43 -0700 Subject: New request and redirect logic for PoolManager. * Built-in redirect will switch method to 'GET' if status code is 303. (Issue #11) * ``urllib3.PoolManager`` strips the scheme and host before sending the request uri. (Issue #8) --- CHANGES.rst | 4 +++ test/with_dummyserver/test_poolmanager.py | 4 +-- urllib3/connectionpool.py | 8 ++--- urllib3/poolmanager.py | 50 ++++++++++++++++++++----------- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 721e73a4..047abc40 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,10 @@ dev (master branch) ``urllib3.util.parse_url(url)``. * Built-in redirect will switch method to 'GET' if status code is 303. + (Issue #11) + +* ``urllib3.PoolManager`` strips the scheme and host before sending the request + uri. (Issue #8) 1.4 (2012-06-16) diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index 06d8c73d..33b0ba87 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -32,7 +32,7 @@ class TestPoolManager(HTTPDummyServerTestCase): try: http.request('GET', '%s/redirect' % self.base_url, fields={'target': cross_host_location}, - timeout=0.01, retries=1) + timeout=0.01, retries=0) self.fail("Request succeeded instead of raising an exception like it should.") except MaxRetryError: @@ -40,7 +40,7 @@ class TestPoolManager(HTTPDummyServerTestCase): r = http.request('GET', '%s/redirect' % self.base_url, fields={'target': '%s/echo?a=b' % self.base_url_alt}, - timeout=0.01, retries=2) + timeout=0.01, retries=1) self.assertEqual(r._pool.host, self.host_alt) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 3310efc8..26ef1504 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -268,15 +268,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) - return httplib_response - def is_same_host(self, url): """ Check if the given ``url`` is a member of the same host as this connection pool. """ + if url.startswith('/'): + return True + # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) @@ -284,8 +285,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) - return (url.startswith('/') or - (scheme, host, port) == (self.scheme, self.host, self.port)) + return (scheme, host, port) == (self.scheme, self.host, self.port) def urlopen(self, method, url, body=None, headers=None, retries=3, redirect=True, assert_same_host=True, timeout=_Default, diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 310ea21d..6379c333 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,9 +8,10 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import get_host, connection_from_url, port_by_scheme +from .connectionpool import connection_from_url, port_by_scheme from .exceptions import HostChangedError from .request import RequestMethods +from .util import parse_url __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -54,13 +55,15 @@ class PoolManager(RequestMethods): self.connection_pool_kw = connection_pool_kw self.pools = RecentlyUsedContainer(num_pools) - def connection_from_host(self, host, port=80, scheme='http'): + def connection_from_host(self, host, port=None, scheme='http'): """ Get a :class:`ConnectionPool` based on the host, port, and scheme. - Note that an appropriate ``port`` value is required here to normalize - connection pools in our container most effectively. + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. """ + port = port or port_by_scheme.get(scheme, 80) + pool_key = (scheme, host, port) # If the scheme, host, or port doesn't match existing open connections, @@ -86,26 +89,39 @@ class PoolManager(RequestMethods): Additional parameters are taken from the :class:`.PoolManager` constructor. """ - scheme, host, port = get_host(url) + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - port = port or port_by_scheme.get(scheme, 80) + def handle_redirect(response): + pass - return self.connection_from_host(host, port=port, scheme=scheme) - - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. - ``url`` must be absolute, such that an appropriate + The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ - conn = self.connection_from_url(url) - try: - return conn.urlopen(method, url, **kw) + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + if response.status == 303: + method = 'GET' - except HostChangedError as e: - kw['retries'] = e.retries # Persist retries countdown - return self.urlopen(method, e.url, **kw) + log.info("Redirecting %s -> %s" % (url, redirect_location)) + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): -- cgit v1.2.1 From 6305ab170e99d0e4cf0bde6dd3724d683e925b6d Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 1 Jul 2012 10:35:44 -0700 Subject: py3 tests pass. --- test/test_util.py | 2 +- urllib3/util.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 313c156a..b0055bf7 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -136,7 +136,7 @@ class TestUtil(unittest.TestCase): ('abcd', 'a'): ('', 'bcd', 'a'), ('abcd', 'ab'): ('', 'bcd', 'a'), } - for input, expected in test_cases.iteritems(): + for input, expected in test_cases.items(): output = split_first(*input) self.assertEqual(output, expected) diff --git a/urllib3/util.py b/urllib3/util.py index c3eb01f3..0dad02d3 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -74,7 +74,7 @@ def split_first(s, delims): min_idx = idx min_delim = d - if min_idx < 0: + if min_idx is None or min_idx < 0: return s, '', None return s[:min_idx], s[min_idx+1:], min_delim -- cgit v1.2.1 From 60c8506946ab8c894222130ef2fa2634a4659db0 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 1 Jul 2012 10:36:31 -0700 Subject: Py3 tests pass. --- test/test_util.py | 2 +- urllib3/util.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 313c156a..b0055bf7 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -136,7 +136,7 @@ class TestUtil(unittest.TestCase): ('abcd', 'a'): ('', 'bcd', 'a'), ('abcd', 'ab'): ('', 'bcd', 'a'), } - for input, expected in test_cases.iteritems(): + for input, expected in test_cases.items(): output = split_first(*input) self.assertEqual(output, expected) diff --git a/urllib3/util.py b/urllib3/util.py index c3eb01f3..0dad02d3 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -74,7 +74,7 @@ def split_first(s, delims): min_idx = idx min_delim = d - if min_idx < 0: + if min_idx is None or min_idx < 0: return s, '', None return s[:min_idx], s[min_idx+1:], min_delim -- cgit v1.2.1 From 7c899375cbce1817cdc8a3f10781dff6427d07a5 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 1 Jul 2012 10:40:51 -0700 Subject: Removed unused codepath. --- test/test_util.py | 11 ++++++----- urllib3/util.py | 4 ---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index b0055bf7..a989da65 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -64,11 +64,12 @@ class TestUtil(unittest.TestCase): def test_parse_url(self): url_host_map = { - 'http://google.com/mail': Url('http', None, 'google.com', None, '/mail'), - 'http://google.com/mail/': Url('http', None, 'google.com', None, '/mail/'), - 'google.com/mail': Url(None, None, 'google.com', None, '/mail'), - 'http://google.com/': Url('http', None, 'google.com', None, '/'), - 'http://google.com': Url('http', None, 'google.com', None, None), + 'http://google.com/mail': Url('http', host='google.com', path='/mail'), + 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), + 'google.com/mail': Url(host='google.com', path='/mail'), + 'http://google.com/': Url('http', host='google.com', path='/'), + 'http://google.com': Url('http', host='google.com'), + 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), '': Url(), '/': Url(path='/'), '?': Url(path='', query=''), diff --git a/urllib3/util.py b/urllib3/util.py index 0dad02d3..a0fe4d1a 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -156,10 +156,6 @@ def parse_url(url): if '?' in path: path, query = path.split('?', 1) - # Paths start with '/' - if path and path[0] != '/': - path = '/' + path - return Url(scheme, auth, host, port, path, query, fragment) -- cgit v1.2.1 From 92de58417e7a11edc0dcee0002539e9128567de6 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 1 Jul 2012 10:47:49 -0700 Subject: Docs. --- urllib3/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/urllib3/util.py b/urllib3/util.py index a0fe4d1a..befba6d7 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -161,7 +161,7 @@ def parse_url(url): def get_host(url): """ - Deprecated. Use :func:`parse_url` instead. + Deprecated. Use :func:`.parse_url` instead. """ p = parse_url(url) return p.scheme or 'http', p.hostname, p.port @@ -224,7 +224,7 @@ def is_connection_dropped(conn): Returns True if the connection is dropped and should be closed. :param conn: - ``HTTPConnection`` object. + :class:`urllib3.connectionpool.HTTPConnectionPool` object. Note: For platforms like AppEngine, this will always return ``False`` to let the platform handle connection recycling transparently for us. -- cgit v1.2.1 From 9cd973b41ba1c02dda9dc3058c04cfe115e9c994 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 16 Jul 2012 15:32:58 -0700 Subject: Refactor to support eager release of pooled connections --- test/test_collections.py | 46 ++++--- urllib3/_collections.py | 171 ++++++++++--------------- urllib3/connectionpool.py | 51 ++++++-- urllib3/packages/ordered_dict.py | 260 +++++++++++++++++++++++++++++++++++++++ urllib3/poolmanager.py | 10 +- 5 files changed, 406 insertions(+), 132 deletions(-) create mode 100644 urllib3/packages/ordered_dict.py diff --git a/test/test_collections.py b/test/test_collections.py index 6cb5aca2..2bff45e2 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -36,19 +36,7 @@ class TestLRUContainer(unittest.TestCase): d[5] = '5' # Check state - self.assertEqual(list(d.keys()), [0, 2, 3, 4, 5]) - - def test_pruning(self): - d = Container(5) - - for i in xrange(5): - d[i] = str(i) - - # Contend 2 entries for the most-used slot to balloon the heap - for i in xrange(100): - d.get(i % 2) - - self.assertTrue(len(d.access_log) <= d.CLEANUP_FACTOR * d._maxsize) + self.assertEqual(list(d.keys()), [2, 3, 4, 0, 5]) def test_same_key(self): d = Container(5) @@ -57,10 +45,7 @@ class TestLRUContainer(unittest.TestCase): d['foo'] = i self.assertEqual(list(d.keys()), ['foo']) - - d._prune_invalidated_entries() - - self.assertEqual(len(d.access_log), 1) + self.assertEqual(len(d), 1) def test_access_ordering(self): d = Container(5) @@ -68,13 +53,14 @@ class TestLRUContainer(unittest.TestCase): for i in xrange(10): d[i] = True - self.assertEqual(d._get_ordered_access_keys(), [9,8,7,6,5]) + # keys should be ordered by access time + self.assertEqual(list(d.keys()), [5, 6, 7, 8, 9]) new_order = [7,8,6,9,5] - for k in reversed(new_order): + for k in new_order: d[k] - self.assertEqual(d._get_ordered_access_keys(), new_order) + self.assertEqual(list(d.keys()), new_order) def test_delete(self): d = Container(5) @@ -107,6 +93,26 @@ class TestLRUContainer(unittest.TestCase): self.assertRaises(KeyError, lambda: d[5]) + def test_disposal(self): + evicted_items = [] + def dispose_func(arg): + # save the evicted datum for inspection + evicted_items.append(arg) + + d = Container(5, dispose_func=dispose_func) + for i in xrange(5): + d[i] = i + self.assertEqual(list(d.keys()), list(xrange(5))) + # nothing should have been disposed + self.assertEqual(evicted_items, []) + + d[5] = 5 + self.assertEqual(list(d.keys()), list(xrange(1, 6))) + # dispose_func should have been called on the LRU item, 0 + self.assertEqual(evicted_items, [0]) + + d.clear() + self.assertEqual(evicted_items, list(xrange(0, 6))) if __name__ == '__main__': unittest.main() diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 3cef081e..e027b906 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -3,129 +3,92 @@ # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +from __future__ import with_statement -from collections import deque +import threading +from collections import MutableMapping -from threading import RLock +try: + # available on 2.7 and up + from collections import OrderedDict + # hush pyflakes + OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict __all__ = ['RecentlyUsedContainer'] - -class AccessEntry(object): - __slots__ = ('key', 'is_valid') - - def __init__(self, key, is_valid=True): - self.key = key - self.is_valid = is_valid - - -class RecentlyUsedContainer(dict): +class RecentlyUsedContainer(MutableMapping): """ Provides a dict-like that maintains up to ``maxsize`` keys while throwing away the least-recently-used keys beyond ``maxsize``. """ - # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we - # will attempt to cleanup the invalidated entries in the access_log - # datastructure during the next 'get' operation. - CLEANUP_FACTOR = 10 - - def __init__(self, maxsize=10): - self._maxsize = maxsize - - self._container = {} - - # We use a deque to to store our keys ordered by the last access. - self.access_log = deque() - self.access_log_lock = RLock() - - # We look up the access log entry by the key to invalidate it so we can - # insert a new authorative entry at the head without having to dig and - # find the old entry for removal immediately. - self.access_lookup = {} - - # Trigger a heap cleanup when we get past this size - self.access_log_limit = maxsize * self.CLEANUP_FACTOR - - def _invalidate_entry(self, key): - "If exists: Invalidate old entry and return it." - old_entry = self.access_lookup.get(key) - if old_entry: - old_entry.is_valid = False - - return old_entry + # this is an object no one else knows about, sort of a hyper-None + # cf. the implementation of OrderedDict + __marker = object() - def _push_entry(self, key): - "Push entry onto our access log, invalidate the old entry if exists." - self._invalidate_entry(key) + def __init__(self, maxsize=10, dispose_func=None): + """Constructor. - new_entry = AccessEntry(key) - self.access_lookup[key] = new_entry - - self.access_log_lock.acquire() - self.access_log.appendleft(new_entry) - self.access_log_lock.release() - - def _prune_entries(self, num): - "Pop entries from our access log until we popped ``num`` valid ones." - while num > 0: - self.access_log_lock.acquire() - p = self.access_log.pop() - self.access_log_lock.release() - - if not p.is_valid: - continue # Invalidated entry, skip - - dict.pop(self, p.key, None) - self.access_lookup.pop(p.key, None) - num -= 1 + Args: + maxsize - int, maximum number of elements to retain + dispose_func - callback taking a single argument, called to destroy + elements that are being evicted or released + """ + self._maxsize = maxsize + self.dispose_func = dispose_func - def _prune_invalidated_entries(self): - "Rebuild our access_log without the invalidated entries." - self.access_log_lock.acquire() - self.access_log = deque(e for e in self.access_log if e.is_valid) - self.access_log_lock.release() + # OrderedDict is not inherently threadsafe, so protect it with a lock + self._mapping = OrderedDict() + self._lock = threading.Lock() - def _get_ordered_access_keys(self): - "Return ordered access keys for inspection. Used for testing." - self.access_log_lock.acquire() - r = [e.key for e in self.access_log if e.is_valid] - self.access_log_lock.release() + def clear(self): + with self._lock: + # copy pointers to all values, then wipe the mapping + # under Python 2, this copies the list of values twice :-| + values = list(self._mapping.values()) + self._mapping.clear() - return r + if self.dispose_func: + for value in values: + self.dispose_func(value) def __getitem__(self, key): - item = dict.get(self, key) - - if not item: - raise KeyError(key) + with self._lock: + # remove and re-add the item, moving it to the end of the eviction line + # throw the KeyError back to calling code if it's not present: + item = self._mapping.pop(key) + self._mapping[key] = item + return item - # Insert new entry with new high priority, also implicitly invalidates - # the old entry. - self._push_entry(key) + def __setitem__(self, key, item): + evicted_entry = self.__marker + with self._lock: + # possibly evict the existing value of 'key' + evicted_entry = self._mapping.get(key, self.__marker) + self._mapping[key] = item + # if we didn't evict an existing value, we might have to evict the LRU value + if len(self._mapping) > self._maxsize: + # pop from the beginning of the dict + _key, evicted_entry = self._mapping.popitem(last=False) + + if self.dispose_func and evicted_entry is not self.__marker: + self.dispose_func(evicted_entry) - if len(self.access_log) > self.access_log_limit: - # Heap is getting too big, try to clean up any tailing invalidated - # entries. - self._prune_invalidated_entries() + def __delitem__(self, key): + with self._lock: + entry = self._mapping.pop(key) + if self.dispose_func: + self.dispose_func(entry) - return item + def __len__(self): + with self._lock: + return len(self._mapping) - def __setitem__(self, key, item): - # Add item to our container and access log - dict.__setitem__(self, key, item) - self._push_entry(key) + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') - # Discard invalid and excess entries - self._prune_entries(len(self) - self._maxsize) - - def __delitem__(self, key): - self._invalidate_entry(key) - self.access_lookup.pop(key, None) - dict.__delitem__(self, key) - - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default + def keys(self): + with self._lock: + return self._mapping.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 3310efc8..c166cc44 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -6,6 +6,7 @@ import logging import socket +import threading from socket import error as SocketError, timeout as SocketTimeout @@ -177,6 +178,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): for _ in xrange(maxsize): self.pool.put(None) + # this is annoying: wrap the Queue with an additional lock, + # so there's no race between closing the pool and trying to return + # a connection to the already-closed pool (which would leak the connection) + self.closure_lock = threading.Lock() + # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 @@ -190,6 +196,25 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): (self.num_connections, self.host)) return HTTPConnection(host=self.host, port=self.port) + def close(self): + """Close all pooled connections; refuse further requests.""" + conns = [] + with self.closure_lock: + # acquire all available connections from the pool, so we can close them. + # connections can't be checked back in to the pool while we're working; + # after we're done working, instead of getting checked back in, + # they'll be closed. + while True: + try: + conns.append(self.pool.get(block=False)) + except Empty: + break + self.pool = None + + for conn in conns: + if conn is not None: + conn.close() + def _get_conn(self, timeout=None): """ Get a connection. Will return a pooled connection if one is available. @@ -204,7 +229,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ conn = None try: - conn = self.pool.get(block=self.block, timeout=timeout) + with self.closure_lock: + if self.pool is None: + raise Exception("Pool has been closed.") + conn = self.pool.get(block=self.block, timeout=timeout) # If this is a persistent connection, check if it got disconnected if conn and is_connection_dropped(conn): @@ -232,12 +260,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): exceeded maxsize. If connections are discarded frequently, then maxsize should be increased. """ - try: - self.pool.put(conn, block=False) - except Full: - # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + returned = False + + with self.closure_lock: + if self.pool is not None: + try: + self.pool.put(conn, block=False) + returned = True + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + # if the pool is closed or full, close the connection + if not returned: + conn.close() def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py new file mode 100644 index 00000000..7f8ee154 --- /dev/null +++ b/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 310ea21d..70c4e45c 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -52,7 +52,15 @@ class PoolManager(RequestMethods): def __init__(self, num_pools=10, **connection_pool_kw): self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools) + self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + + def close(self): + """Empty our store of pools and direct them all to close. + + This won't break in-flight connections, because the message is forwarded to + HTTPConnectionPool.close(), which is thread-safe. + """ + self.pools.clear() def connection_from_host(self, host, port=80, scheme='http'): """ -- cgit v1.2.1 From 19125c85bda0699cba6a47c00744c6a688b0b2da Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 21 Jul 2012 17:47:16 -0700 Subject: Manual merge of #86, fixes #76: Pool Depletion/Leaking Connections. --- test/test_connectionpool.py | 47 ++++++++++++++++++++++++++++++++++++++++++++- urllib3/connectionpool.py | 20 ++++++++++++------- urllib3/util.py | 6 +++++- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 3ba236c7..5462462f 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,7 +1,23 @@ import unittest from urllib3.connectionpool import connection_from_url, HTTPConnectionPool -from urllib3.exceptions import EmptyPoolError +from urllib3.packages.ssl_match_hostname import CertificateError +from urllib3.exceptions import ( + EmptyPoolError, + MaxRetryError, + SSLError, + TimeoutError, +) + +from socket import timeout as SocketTimeout +from ssl import SSLError as BaseSSLError + +try: # Python 3 + from queue import Empty + from http.client import HTTPException +except ImportError: + from Queue import Empty + from httplib import HTTPException class TestConnectionPool(unittest.TestCase): @@ -68,6 +84,35 @@ class TestConnectionPool(unittest.TestCase): str(EmptyPoolError(HTTPConnectionPool(host='localhost'), "Test.")), "HTTPConnectionPool(host='localhost', port=None): Test.") + def test_pool_size(self): + POOL_SIZE = 1 + pool = HTTPConnectionPool(host='localhost', maxsize=POOL_SIZE, block=True) + + def _raise(ex): + raise ex() + + def _test(exception, expect): + pool._make_request = lambda *args, **kwargs: _raise(exception) + with self.assertRaises(expect): + pool.request('GET', '/') + + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + + #make sure that all of the exceptions return the connection to the pool + _test(Empty, TimeoutError) + _test(SocketTimeout, TimeoutError) + _test(BaseSSLError, SSLError) + _test(CertificateError, SSLError) + + # The pool should never be empty, and with these two exceptions being raised, + # a retry will be triggered, but that retry will fail, eventually raising + # MaxRetryError, not EmptyPoolError + # See: https://github.com/shazow/urllib3/issues/76 + pool._make_request = lambda *args, **kwargs: _raise(HTTPException) + with self.assertRaises(MaxRetryError): + pool.request('GET', '/', retries=1, pool_timeout=0.01) + self.assertEqual(pool.pool.qsize(), POOL_SIZE) + if __name__ == '__main__': unittest.main() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 26ef1504..cfadb26c 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,7 +7,7 @@ import logging import socket -from socket import error as SocketError, timeout as SocketTimeout +from socket import timeout as SocketTimeout try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -378,7 +378,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue - # (Could raise SocketError: Bad file descriptor) conn = self._get_conn(timeout=pool_timeout) # Make the request on the httplib connection object @@ -421,22 +420,27 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except (HTTPException, SocketError) as e: + except HTTPException as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e finally: - if conn and release_conn: - # Put the connection back to be reused + if release_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. self._put_conn(conn) if not conn: + # Try again log.warn("Retrying (%d attempts remain) after connection " "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, - redirect, assert_same_host) # Try again + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() @@ -445,7 +449,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host) + retries - 1, redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response diff --git a/urllib3/util.py b/urllib3/util.py index befba6d7..f05b1a6d 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -7,6 +7,7 @@ from base64 import b64encode from collections import namedtuple +from socket import error as SocketError try: from select import poll, POLLIN @@ -237,7 +238,10 @@ def is_connection_dropped(conn): if not select: # Platform-specific: AppEngine return False - return select([sock], [], [], 0.0)[0] + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True # This version is better on platforms that support it. p = poll() -- cgit v1.2.1 From 112d5d3c46f108af05b21e2a462f0b64cd1cf06c Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 21 Jul 2012 18:31:34 -0700 Subject: Tests for decode error, new DecodeError exception. --- dummyserver/handlers.py | 10 ++++++++-- test/test_connectionpool.py | 7 +++++++ test/test_response.py | 4 ++-- test/with_dummyserver/test_connectionpool.py | 16 +++++++++++++++- urllib3/exceptions.py | 4 ++++ urllib3/response.py | 8 ++++---- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 3e328817..ca809ad9 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -145,14 +145,20 @@ class TestingApp(WSGIHandler): data = b"hello, world!" encoding = request.headers.get('Accept-Encoding', '') headers = None - if 'gzip' in encoding: + if encoding == 'gzip': headers = [('Content-Encoding', 'gzip')] file_ = BytesIO() gzip.GzipFile('', mode='w', fileobj=file_).write(data) data = file_.getvalue() - elif 'deflate' in encoding: + elif encoding == 'deflate': headers = [('Content-Encoding', 'deflate')] data = zlib.compress(data) + elif encoding == 'garbage-gzip': + headers = [('Content-Encoding', 'gzip')] + data = 'garbage' + elif encoding == 'garbage-deflate': + headers = [('Content-Encoding', 'deflate')] + data = 'garbage' return Response(data, headers=headers) def shutdown(self, request): diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 5462462f..56426431 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -4,6 +4,7 @@ from urllib3.connectionpool import connection_from_url, HTTPConnectionPool from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.exceptions import ( EmptyPoolError, + HostChangedError, MaxRetryError, SSLError, TimeoutError, @@ -113,6 +114,12 @@ class TestConnectionPool(unittest.TestCase): pool.request('GET', '/', retries=1, pool_timeout=0.01) self.assertEqual(pool.pool.qsize(), POOL_SIZE) + def test_assert_same_host(self): + c = connection_from_url('http://google.com:80') + + with self.assertRaises(HostChangedError): + c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 0ef379ce..964f6772 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -1,9 +1,9 @@ import unittest -import zlib from io import BytesIO from urllib3.response import HTTPResponse +from urllib3.exceptions import DecodeError class TestLegacyResponse(unittest.TestCase): def test_getheaders(self): @@ -50,7 +50,7 @@ class TestResponse(unittest.TestCase): def test_decode_bad_data(self): fp = BytesIO(b'\x00' * 10) - self.assertRaises(zlib.error, HTTPResponse, fp, headers={ + self.assertRaises(DecodeError, HTTPResponse, fp, headers={ 'content-encoding': 'deflate' }) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 22bf93de..8003323e 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -8,7 +8,12 @@ except: from urllib import urlencode from urllib3 import encode_multipart_formdata, HTTPConnectionPool -from urllib3.exceptions import TimeoutError, EmptyPoolError, MaxRetryError +from urllib3.exceptions import ( + EmptyPoolError, + DecodeError, + MaxRetryError, + TimeoutError, +) from urllib3.packages.six import u from dummyserver.testcase import HTTPDummyServerTestCase @@ -221,6 +226,15 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.assertEqual(r.headers.get('content-encoding'), 'deflate') self.assertEqual(r.data, b'hello, world!') + def test_bad_decode(self): + with self.assertRaises(DecodeError): + self.pool.request('GET', '/encodingrequest', + headers={'accept-encoding': 'garbage-deflate'}) + + with self.assertRaises(DecodeError): + self.pool.request('GET', '/encodingrequest', + headers={'accept-encoding': 'garbage-gzip'}) + def test_connection_count(self): pool = HTTPConnectionPool(self.host, self.port, maxsize=1) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 15c9699e..4ce514b2 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -23,6 +23,10 @@ class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." pass +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + ## Leaf Exceptions diff --git a/urllib3/response.py b/urllib3/response.py index 5fab8243..28537d3b 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -10,7 +10,7 @@ import zlib from io import BytesIO -from .exceptions import HTTPError +from .exceptions import DecodeError from .packages.six import string_types as basestring @@ -148,9 +148,9 @@ class HTTPResponse(object): try: if decode_content and decoder: data = decoder(data) - except IOError: - raise HTTPError("Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding) + except (IOError, zlib.error): + raise DecodeError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) if cache_content: self._body = data -- cgit v1.2.1 From b2eea8ef266f013bbc63239c07a4617b3fcf726f Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 21 Jul 2012 18:33:34 -0700 Subject: Updated CHANGES.rst --- CHANGES.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 047abc40..fb0d0c0f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,6 +16,11 @@ dev (master branch) * ``urllib3.PoolManager`` strips the scheme and host before sending the request uri. (Issue #8) +* New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, + based on the Content-Type header, fails. + +* Fixed bug with pool depletion and leaking connections (Issue #76). + 1.4 (2012-06-16) ++++++++++++++++ -- cgit v1.2.1 From 8fc94d7a0a7a98062f5204743ce293005512ddb0 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 11:45:04 -0700 Subject: Cleanup new RecentlyUsedContainer. --- urllib3/_collections.py | 74 +++++++++++++++++++++++------------------------ urllib3/connectionpool.py | 8 ++--- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index e027b906..a2512edf 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -3,92 +3,92 @@ # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from __future__ import with_statement -import threading from collections import MutableMapping +from threading import Lock -try: - # available on 2.7 and up +try: # Python 2.7+ from collections import OrderedDict - # hush pyflakes - OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict + __all__ = ['RecentlyUsedContainer'] + +_Empty = object() + + class RecentlyUsedContainer(MutableMapping): """ Provides a dict-like that maintains up to ``maxsize`` keys while throwing away the least-recently-used keys beyond ``maxsize``. + + :param maxsize: + Maximum number of recent elements to retain. + + :param dispose_func: + Callback which will get called wwhenever an element is evicted from + the container. """ - # this is an object no one else knows about, sort of a hyper-None - # cf. the implementation of OrderedDict - __marker = object() + ContainerType = OrderedDict def __init__(self, maxsize=10, dispose_func=None): - """Constructor. - - Args: - maxsize - int, maximum number of elements to retain - dispose_func - callback taking a single argument, called to destroy - elements that are being evicted or released - """ self._maxsize = maxsize self.dispose_func = dispose_func # OrderedDict is not inherently threadsafe, so protect it with a lock - self._mapping = OrderedDict() - self._lock = threading.Lock() + self._container = self.ContainerType() + self._lock = Lock() def clear(self): with self._lock: # copy pointers to all values, then wipe the mapping # under Python 2, this copies the list of values twice :-| - values = list(self._mapping.values()) - self._mapping.clear() + values = list(self._container.values()) + self._container.clear() if self.dispose_func: for value in values: self.dispose_func(value) def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. with self._lock: - # remove and re-add the item, moving it to the end of the eviction line - # throw the KeyError back to calling code if it's not present: - item = self._mapping.pop(key) - self._mapping[key] = item + item = self._container.pop(key) + self._container[key] = item return item def __setitem__(self, key, item): - evicted_entry = self.__marker + evicted_entry = _Empty with self._lock: - # possibly evict the existing value of 'key' - evicted_entry = self._mapping.get(key, self.__marker) - self._mapping[key] = item - # if we didn't evict an existing value, we might have to evict the LRU value - if len(self._mapping) > self._maxsize: - # pop from the beginning of the dict - _key, evicted_entry = self._mapping.popitem(last=False) - - if self.dispose_func and evicted_entry is not self.__marker: + # Possibly evict the existing value of 'key' + evicted_entry = self._container.get(key, _Empty) + self._container[key] = item + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_entry = self._container.popitem(last=False) + + if self.dispose_func and evicted_entry is not _Empty: self.dispose_func(evicted_entry) def __delitem__(self, key): with self._lock: - entry = self._mapping.pop(key) + entry = self._container.pop(key) + if self.dispose_func: self.dispose_func(entry) def __len__(self): with self._lock: - return len(self._mapping) + return len(self._container) def __iter__(self): raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') def keys(self): with self._lock: - return self._mapping.keys() + return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 851d960a..05947aed 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -10,25 +10,25 @@ import threading from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT except ImportError: from httplib import HTTPConnection, HTTPException from httplib import HTTP_PORT, HTTPS_PORT -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full -try: # Compiled with SSL? +try: # Compiled with SSL? HTTPSConnection = object BaseSSLError = None ssl = None - try: # Python 3 + try: # Python 3 from http.client import HTTPSConnection except ImportError: from httplib import HTTPSConnection -- cgit v1.2.1 From 4b9694e29ee7ea3dab1eedb8ba1c4344c0c9935a Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 11:52:04 -0700 Subject: More tests for the new collection. --- test/test_collections.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/test/test_collections.py b/test/test_collections.py index 2bff45e2..098b31ad 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -53,7 +53,7 @@ class TestLRUContainer(unittest.TestCase): for i in xrange(10): d[i] = True - # keys should be ordered by access time + # Keys should be ordered by access time self.assertEqual(list(d.keys()), [5, 6, 7, 8, 9]) new_order = [7,8,6,9,5] @@ -95,24 +95,33 @@ class TestLRUContainer(unittest.TestCase): def test_disposal(self): evicted_items = [] + def dispose_func(arg): - # save the evicted datum for inspection + # Save the evicted datum for inspection evicted_items.append(arg) d = Container(5, dispose_func=dispose_func) for i in xrange(5): d[i] = i self.assertEqual(list(d.keys()), list(xrange(5))) - # nothing should have been disposed - self.assertEqual(evicted_items, []) + self.assertEqual(evicted_items, []) # Nothing disposed d[5] = 5 self.assertEqual(list(d.keys()), list(xrange(1, 6))) - # dispose_func should have been called on the LRU item, 0 self.assertEqual(evicted_items, [0]) + del d[1] + self.assertEqual(evicted_items, [0, 1]) + d.clear() - self.assertEqual(evicted_items, list(xrange(0, 6))) + self.assertEqual(evicted_items, [0, 1, 2, 3, 4, 5]) + + def test_iter(self): + d = Container() + + with self.assertRaises(NotImplementedError): + for i in d: + self.fail("Iteration shouldn't be implemented.") if __name__ == '__main__': unittest.main() -- cgit v1.2.1 From a046bae9b61e5f086d94e61e9d542497a375de67 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 12:19:11 -0700 Subject: Cleaning up code. --- urllib3/connectionpool.py | 76 ++++++++++++++++++++--------------------------- urllib3/exceptions.py | 6 ++++ urllib3/poolmanager.py | 16 ++++------ 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 05947aed..cdcae1dd 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -6,7 +6,6 @@ import logging import socket -import threading from socket import timeout as SocketTimeout @@ -36,7 +35,7 @@ try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except (ImportError, AttributeError): +except (ImportError, AttributeError): # Platform-specific: No SSL. pass @@ -44,6 +43,7 @@ from .request import RequestMethods from .response import HTTPResponse from .util import get_host, is_connection_dropped from .exceptions import ( + ClosedPoolError, EmptyPoolError, HostChangedError, MaxRetryError, @@ -178,11 +178,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): for _ in xrange(maxsize): self.pool.put(None) - # this is annoying: wrap the Queue with an additional lock, - # so there's no race between closing the pool and trying to return - # a connection to the already-closed pool (which would leak the connection) - self.closure_lock = threading.Lock() - # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 @@ -196,25 +191,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): (self.num_connections, self.host)) return HTTPConnection(host=self.host, port=self.port) - def close(self): - """Close all pooled connections; refuse further requests.""" - conns = [] - with self.closure_lock: - # acquire all available connections from the pool, so we can close them. - # connections can't be checked back in to the pool while we're working; - # after we're done working, instead of getting checked back in, - # they'll be closed. - while True: - try: - conns.append(self.pool.get(block=False)) - except Empty: - break - self.pool = None - - for conn in conns: - if conn is not None: - conn.close() - def _get_conn(self, timeout=None): """ Get a connection. Will return a pooled connection if one is available. @@ -227,12 +203,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and :prop:`.block` is ``True``. """ + if self.pool is None: + raise ClosedPoolError("Pool is closed.") + conn = None try: - with self.closure_lock: - if self.pool is None: - raise Exception("Pool has been closed.") - conn = self.pool.get(block=self.block, timeout=timeout) + conn = self.pool.get(block=self.block, timeout=timeout) # If this is a persistent connection, check if it got disconnected if conn and is_connection_dropped(conn): @@ -259,22 +235,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): If the pool is already full, the connection is discarded because we exceeded maxsize. If connections are discarded frequently, then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. """ - returned = False - - with self.closure_lock: - if self.pool is not None: - try: - self.pool.put(conn, block=False) - returned = True - except Full: - # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) - - # if the pool is closed or full, close the connection - if not returned: + if self.pool is None: conn.close() + return + + try: + self.pool.put(conn, block=False) + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): @@ -307,6 +280,21 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): httplib_response.length)) return httplib_response + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + conn.close() + + except Empty: + pass # Done. + def is_same_host(self, url): """ Check if the given ``url`` is a member of the same host as this diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 4ce514b2..99ebb67e 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -23,6 +23,7 @@ class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." pass + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass @@ -61,6 +62,11 @@ class EmptyPoolError(PoolError): pass +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + class LocationParseError(ValueError, HTTPError): "Raised when get_host or similar fails to parse the URL input." diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 0f765424..aa85a2ab 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -9,7 +9,6 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import connection_from_url, port_by_scheme -from .exceptions import HostChangedError from .request import RequestMethods from .util import parse_url @@ -49,17 +48,17 @@ class PoolManager(RequestMethods): """ - # TODO: Make sure there are no memory leaks here. - def __init__(self, num_pools=10, **connection_pool_kw): self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) def close(self): - """Empty our store of pools and direct them all to close. + """ + Empty our store of pools and direct them all to close. - This won't break in-flight connections, because the message is forwarded to - HTTPConnectionPool.close(), which is thread-safe. + This will not affect in-flight connections, but they will not be + re-used after completion. """ self.pools.clear() @@ -100,9 +99,6 @@ class PoolManager(RequestMethods): u = parse_url(url) return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def handle_redirect(response): - pass - def urlopen(self, method, url, redirect=True, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` -- cgit v1.2.1 From 31f2711272b9858ac3a598ddc880cc66978a3f44 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 12:35:25 -0700 Subject: Clarification and documentation for our new RecentlyUsedContainer. --- urllib3/_collections.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index a2512edf..84b3d9ad 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -16,20 +16,21 @@ except ImportError: __all__ = ['RecentlyUsedContainer'] -_Empty = object() +_Null = object() class RecentlyUsedContainer(MutableMapping): """ - Provides a dict-like that maintains up to ``maxsize`` keys while throwing - away the least-recently-used keys beyond ``maxsize``. + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. :param maxsize: Maximum number of recent elements to retain. :param dispose_func: - Callback which will get called wwhenever an element is evicted from - the container. + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called """ ContainerType = OrderedDict @@ -38,13 +39,12 @@ class RecentlyUsedContainer(MutableMapping): self._maxsize = maxsize self.dispose_func = dispose_func - # OrderedDict is not inherently threadsafe, so protect it with a lock self._container = self.ContainerType() self._lock = Lock() def clear(self): with self._lock: - # copy pointers to all values, then wipe the mapping + # Copy pointers to all values, then wipe the mapping # under Python 2, this copies the list of values twice :-| values = list(self._container.values()) self._container.clear() @@ -60,27 +60,27 @@ class RecentlyUsedContainer(MutableMapping): self._container[key] = item return item - def __setitem__(self, key, item): - evicted_entry = _Empty + def __setitem__(self, key, value): + evicted_value = _Null with self._lock: # Possibly evict the existing value of 'key' - evicted_entry = self._container.get(key, _Empty) - self._container[key] = item + evicted_value = self._container.get(key, _Null) + self._container[key] = value # If we didn't evict an existing value, we might have to evict the # least recently used item from the beginning of the container. if len(self._container) > self._maxsize: - _key, evicted_entry = self._container.popitem(last=False) + _key, evicted_value = self._container.popitem(last=False) - if self.dispose_func and evicted_entry is not _Empty: - self.dispose_func(evicted_entry) + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) def __delitem__(self, key): with self._lock: - entry = self._container.pop(key) + value = self._container.pop(key) if self.dispose_func: - self.dispose_func(entry) + self.dispose_func(value) def __len__(self): with self._lock: -- cgit v1.2.1 From ba162b7a3953b5d6d3259860ce83f61107f036c5 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 12:49:03 -0700 Subject: More tests, PoolManager.close is now PoolManager.clear. --- test/test_poolmanager.py | 24 ++++++++++++++++++++++++ urllib3/_collections.py | 22 +++++++++++----------- urllib3/connectionpool.py | 5 +++-- urllib3/poolmanager.py | 2 +- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 12722f76..b15e498f 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -2,6 +2,7 @@ import unittest from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url +from urllib3.exceptions import ClosedPoolError class TestPoolManager(unittest.TestCase): @@ -42,6 +43,29 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(connections), 5) + def test_pool_close(self): + p = PoolManager(5) + + conn_pool = p.connection_from_url('http://google.com') + self.assertEqual(len(p.pools), 1) + + conn = conn_pool._get_conn() + + p.clear() + self.assertEqual(len(p.pools), 0) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + conn_pool._put_conn(conn) + + with self.assertRaises(ClosedPoolError): + conn_pool._get_conn() + + self.assertEqual(len(p.pools), 0) + + + if __name__ == '__main__': unittest.main() diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 84b3d9ad..ae9eb5b6 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -42,17 +42,6 @@ class RecentlyUsedContainer(MutableMapping): self._container = self.ContainerType() self._lock = Lock() - def clear(self): - with self._lock: - # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) - self._container.clear() - - if self.dispose_func: - for value in values: - self.dispose_func(value) - def __getitem__(self, key): # Re-insert the item, moving it to the end of the eviction line. with self._lock: @@ -89,6 +78,17 @@ class RecentlyUsedContainer(MutableMapping): def __iter__(self): raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') + def clear(self): + with self._lock: + # Copy pointers to all values, then wipe the mapping + # under Python 2, this copies the list of values twice :-| + values = list(self._container.values()) + self._container.clear() + + if self.dispose_func: + for value in values: + self.dispose_func(value) + def keys(self): with self._lock: return self._container.keys() diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index cdcae1dd..bda0a4cf 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -204,7 +204,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :prop:`.block` is ``True``. """ if self.pool is None: - raise ClosedPoolError("Pool is closed.") + raise ClosedPoolError(self, "Pool is closed.") conn = None try: @@ -290,7 +290,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: while True: conn = old_pool.get(block=False) - conn.close() + if conn: + conn.close() except Empty: pass # Done. diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index aa85a2ab..8f5b54c1 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -53,7 +53,7 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) - def close(self): + def clear(self): """ Empty our store of pools and direct them all to close. -- cgit v1.2.1 From 9fb930fc3de782624cabfc526822ca14dd64d004 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 12:56:54 -0700 Subject: More pool-closing tests, 99% coverage. --- test/test_connectionpool.py | 27 +++++++++++++++++++++++++++ test/test_poolmanager.py | 2 +- urllib3/_collections.py | 4 ++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 56426431..afc30986 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -3,6 +3,7 @@ import unittest from urllib3.connectionpool import connection_from_url, HTTPConnectionPool from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.exceptions import ( + ClosedPoolError, EmptyPoolError, HostChangedError, MaxRetryError, @@ -120,6 +121,32 @@ class TestConnectionPool(unittest.TestCase): with self.assertRaises(HostChangedError): c.request('GET', 'http://yahoo.com:80', assert_same_host=True) + def test_pool_close(self): + pool = connection_from_url('http://google.com:80') + + # Populate with some connections + conn1 = pool._get_conn() + conn2 = pool._get_conn() + conn3 = pool._get_conn() + pool._put_conn(conn1) + pool._put_conn(conn2) + + old_pool_queue = pool.pool + + pool.close() + self.assertEqual(pool.pool, None) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + pool._put_conn(conn3) + + with self.assertRaises(ClosedPoolError): + pool._get_conn() + + with self.assertRaises(Empty): + old_pool_queue.get(block=False) + if __name__ == '__main__': unittest.main() diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index b15e498f..273abf98 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -43,7 +43,7 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(connections), 5) - def test_pool_close(self): + def test_manager_clear(self): p = PoolManager(5) conn_pool = p.connection_from_url('http://google.com') diff --git a/urllib3/_collections.py b/urllib3/_collections.py index ae9eb5b6..a052b1da 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -33,13 +33,13 @@ class RecentlyUsedContainer(MutableMapping): ``dispose_func(value)`` is called. Callback which will get called """ - ContainerType = OrderedDict + ContainerCls = OrderedDict def __init__(self, maxsize=10, dispose_func=None): self._maxsize = maxsize self.dispose_func = dispose_func - self._container = self.ContainerType() + self._container = self.ContainerCls() self._lock = Lock() def __getitem__(self, key): -- cgit v1.2.1 From 526ec6762f0729c676bc1aeb39beedbfed920575 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Sun, 22 Jul 2012 13:39:24 -0700 Subject: adding myself to contributors --- CONTRIBUTORS.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 9a04b6ae..7dfbcafc 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -42,5 +42,8 @@ In chronological order: * studer * IPv6 url support and test coverage +* Shivaram Lingamneni + * Support for explicitly closing pooled connections + * [Your name or handle] <[email or website]> * [Brief summary of your changes] -- cgit v1.2.1 From 96bf85e1e893fe41f41457e8ca284659f88d5145 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 14:15:03 -0700 Subject: raise ClosedPoolError is self.pool is None. --- urllib3/connectionpool.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index bda0a4cf..ba22de11 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -203,17 +203,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and :prop:`.block` is ``True``. """ - if self.pool is None: - raise ClosedPoolError(self, "Pool is closed.") - conn = None try: conn = self.pool.get(block=self.block, timeout=timeout) - # If this is a persistent connection, check if it got disconnected - if conn and is_connection_dropped(conn): - log.info("Resetting dropped connection: %s" % self.host) - conn.close() + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") except Empty: if self.block: @@ -222,6 +217,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "connections are allowed.") pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + return conn or self._new_conn() def _put_conn(self, conn): -- cgit v1.2.1 From 0b3bd3b6089742e20b615d37f0c5beb87fd8ebea Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 14:23:56 -0700 Subject: CHANGES update. --- CHANGES.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index fb0d0c0f..6faeb698 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,7 +19,9 @@ dev (master branch) * New ``urllib3.exceptions.DecodeError`` exception for when automatic decoding, based on the Content-Type header, fails. -* Fixed bug with pool depletion and leaking connections (Issue #76). +* Fixed bug with pool depletion and leaking connections (Issue #76). Added + explicit connection closing on pool eviction. Added + ``urllib3.PoolManager.clear()``. 1.4 (2012-06-16) -- cgit v1.2.1 From 3e6ff21a98c613772bb347e84f543c8771c58d95 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Sun, 22 Jul 2012 14:29:47 -0700 Subject: apply the try-except fix to _put_conn as well --- urllib3/connectionpool.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ba22de11..ffb9c340 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -238,17 +238,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): If the pool is closed, then the connection will be closed and discarded. """ - if self.pool is None: - conn.close() - return + conn_returned = False try: self.pool.put(conn, block=False) + conn_returned = True + except AttributeError: + # self.pool is None. + pass except Full: # This should never happen if self.block == True log.warning("HttpConnectionPool is full, discarding connection: %s" % self.host) + if not conn_returned: + conn.close() + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ -- cgit v1.2.1 From 13d0d6e09404d567a8883b5eec34e7d7095698f0 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:11:28 -0700 Subject: Refactor _put_conn with implicit state tracking. --- urllib3/connectionpool.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ffb9c340..97da5446 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -232,17 +232,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Connection object for the current host and port as returned by :meth:`._new_conn` or :meth:`._get_conn`. - If the pool is already full, the connection is discarded because we - exceeded maxsize. If connections are discarded frequently, then maxsize - should be increased. + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. If the pool is closed, then the connection will be closed and discarded. """ - conn_returned = False - try: self.pool.put(conn, block=False) - conn_returned = True + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass @@ -251,8 +249,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.warning("HttpConnectionPool is full, discarding connection: %s" % self.host) - if not conn_returned: - conn.close() + # Connection never got put back into the pool, close it. + conn.close() def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): -- cgit v1.2.1 From e7fb5e81e9f75fdce7513bc7510a518d09148a51 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:36:25 -0700 Subject: More timeout-related tests: 100% coverage. --- test/with_dummyserver/test_connectionpool.py | 32 ++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 8003323e..3d069b80 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -15,6 +15,7 @@ from urllib3.exceptions import ( TimeoutError, ) from urllib3.packages.six import u +from socket import timeout as SocketTimeout from dummyserver.testcase import HTTPDummyServerTestCase @@ -97,13 +98,30 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.assertEqual(r.status, 200, r.data) def test_timeout(self): - pool = HTTPConnectionPool(self.host, self.port, timeout=0.01) - try: - pool.request('GET', '/sleep', - fields={'seconds': '0.02'}) - self.fail("Failed to raise TimeoutError exception") - except TimeoutError: - pass + url = '/sleep?seconds=0.005' + timeout = 0.001 + + # Pool-global timeout + pool = HTTPConnectionPool(self.host, self.port, timeout=timeout) + + conn = pool._get_conn() + with self.assertRaises(SocketTimeout): + pool._make_request(conn, 'GET', url) + pool._put_conn(conn) + + with self.assertRaises(TimeoutError): + pool.request('GET', url) + + # Request-specific timeout + pool = HTTPConnectionPool(self.host, self.port, timeout=0.5) + + conn = pool._get_conn() + with self.assertRaises(SocketTimeout): + pool._make_request(conn, 'GET', url, timeout=timeout) + pool._put_conn(conn) + + with self.assertRaises(TimeoutError): + pool.request('GET', url, timeout=timeout) def test_redirect(self): r = self.pool.request('GET', '/redirect', fields={'target': '/'}, redirect=False) -- cgit v1.2.1 From 14b98fe8378b57a16df60231351e3415425b3dee Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:37:49 -0700 Subject: Updated README and CHANGES with 100% unit test coverage. --- CHANGES.rst | 2 ++ README.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6faeb698..feb27888 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,6 +23,8 @@ dev (master branch) explicit connection closing on pool eviction. Added ``urllib3.PoolManager.clear()``. +* 100% unit test coverage. + 1.4 (2012-06-16) ++++++++++++++++ diff --git a/README.rst b/README.rst index f177852b..144df0eb 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Highlights - Supports gzip and deflate decoding. - Thread-safe and sanity-safe. - Works with AppEngine, gevent, and eventlib. -- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at `Requests `_ which is also powered by urllib3. -- cgit v1.2.1 From 2be25d6b39ef05b8056e5f0b5c4254127aa21ddd Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:39:52 -0700 Subject: Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index feb27888..38874e22 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,7 +23,7 @@ dev (master branch) explicit connection closing on pool eviction. Added ``urllib3.PoolManager.clear()``. -* 100% unit test coverage. +* 99% -> 100% unit test coverage. 1.4 (2012-06-16) -- cgit v1.2.1 From 52de865aa71a953d1093dee6399bfc732bebb986 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:46:02 -0700 Subject: docs. --- docs/helpers.rst | 7 +++++++ docs/index.rst | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/helpers.rst b/docs/helpers.rst index 6a759116..d03d6c06 100644 --- a/docs/helpers.rst +++ b/docs/helpers.rst @@ -4,6 +4,13 @@ Helpers Useful methods for working with :mod:`httplib`, completely decoupled from code specific to **urllib3**. + +Utilities +--------- + +.. automodule:: urllib3.util + :members: + Filepost -------- diff --git a/docs/index.rst b/docs/index.rst index 7423e358..7ed25dbf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,7 +30,7 @@ Highlights - Thread-safe and sanity-safe. -- Tested on Python 2.6+ and Python 3.2+, 99% unit test coverage. +- Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Works with AppEngine, gevent, and eventlib. -- cgit v1.2.1 From e657a152d1bb581070ee3fbd3d4718fa0fef684a Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:52:58 -0700 Subject: docs. --- urllib3/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/util.py b/urllib3/util.py index f05b1a6d..b08dd30f 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -225,7 +225,7 @@ def is_connection_dropped(conn): Returns True if the connection is dropped and should be closed. :param conn: - :class:`urllib3.connectionpool.HTTPConnectionPool` object. + :class:`httplib.HTTPConnection` object. Note: For platforms like AppEngine, this will always return ``False`` to let the platform handle connection recycling transparently for us. -- cgit v1.2.1 From d8149baef5a200c1fe05c337e82c8ff846905ad4 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 22 Jul 2012 15:55:05 -0700 Subject: sphinx syntax. --- urllib3/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/util.py b/urllib3/util.py index b08dd30f..8ec990bc 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -86,7 +86,7 @@ def parse_url(url): Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. - Partly backwards-compatible with :module:`urlparse`. + Partly backwards-compatible with :mod:`urlparse`. Example: :: -- cgit v1.2.1