diff options
author | Cory Benfield <lukasaoz@gmail.com> | 2016-09-05 13:17:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-09-05 13:17:53 +0100 |
commit | cc9de256b8f3fba20bde0fe4d39d5554f1bb30b7 (patch) | |
tree | d7cf20b9a26ae4c7d32c33b0b170d396aab32a40 | |
parent | 65b8c52c16dee5c3a523de2c1c21853ba0e581f2 (diff) | |
parent | 37565e8ed412da0dcc400e3f1ab624a34cc9f5c4 (diff) | |
download | urllib3-sessionmanager.tar.gz |
Merge pull request #904 from haikuginger/approved-session-managersessionmanager
Approved SessionManager changes
-rw-r--r-- | CONTRIBUTORS.txt | 2 | ||||
-rw-r--r-- | docs/advanced-usage.rst | 29 | ||||
-rw-r--r-- | dummyserver/handlers.py | 33 | ||||
-rw-r--r-- | test/test_collections.py | 6 | ||||
-rw-r--r-- | test/test_retry.py | 22 | ||||
-rw-r--r-- | test/test_sessionmanager.py | 177 | ||||
-rw-r--r-- | test/with_dummyserver/test_sessionmanager.py | 95 | ||||
-rw-r--r-- | urllib3/__init__.py | 5 | ||||
-rw-r--r-- | urllib3/_collections.py | 3 | ||||
-rw-r--r-- | urllib3/connectionpool.py | 32 | ||||
-rw-r--r-- | urllib3/poolmanager.py | 46 | ||||
-rw-r--r-- | urllib3/request.py | 63 | ||||
-rw-r--r-- | urllib3/response.py | 7 | ||||
-rw-r--r-- | urllib3/sessionmanager.py | 86 | ||||
-rw-r--r-- | urllib3/util/retry.py | 18 | ||||
-rw-r--r-- | urllib3/util/sessioncontext.py | 74 |
16 files changed, 639 insertions, 59 deletions
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 9ef5c69f..922a079c 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -193,7 +193,7 @@ In chronological order: * Started Recipes documentation and added a recipe about handling concatenated gzip data in HTTP response * Jesse Shapiro <jesse@jesseshapiro.net> - * Various character-encoding fixes/tweaks + * Created SessionManager * Disabling IPv6 DNS when IPv6 connections not supported * David Foster <http://dafoster.net/> diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst index f28c83c5..cb42ea0c 100644 --- a/docs/advanced-usage.rst +++ b/docs/advanced-usage.rst @@ -3,6 +3,35 @@ Advanced Usage .. currentmodule:: urllib3 +SessionManager +-------------- + +:doc:`SessionManager(...) <manager>` is currently urllib3's most abstract manager +level. Currently, it's in charge of creating and managing a :class:`~urllib3.util.context.SessionContext` +object, which in turn will extract and apply cookies to and from each HTTP request +and response that you make with that :class:`SessionManager`. + +By default, to create a `SessionManager`, you'll just create it; :class:`SessionManager` will handle +creating a :class:`PoolManager` to actually make HTTP requests; meanwhile, :class:`SessionManager` +will handle extracting and applying context to each request. + +You can also pass in another manager explicitly if you'd like different behavior: + +:: + + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> session = urllib3.SessionManager(manager=proxy) + +If you want to customize the :class:`SessionContext` that your :class:`SessionManager` +is using, just pass your own when initializing it: + +:: + + >>> liberal = urllib3.util.sessioncontext.DefaultCookiePolicy.DomainLiberal + >>> liberal_policy = urllib3.util.sessioncontext.DefaultCookiePolicy(strict_ns_domain=liberal) + >>> cj = urllib3.util.sessioncontext.CookieJar(policy=liberal_policy) + >>> context = urllib3.SessionContext(cookie_jar=cj) + >>> session = urllib3.SessionManager(manager=urllib3.PoolManager(), context=context) Customizing pool behavior ------------------------- diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index d72a6bdd..5542fc93 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -260,6 +260,39 @@ class TestingApp(RequestHandler): return Response(status=status) + def set_cookie_on_client(self, request): + """ + Set a standard cookie on the response + """ + self.set_cookie('testing_cookie', 'test_cookie_value') + return Response('Attached a cookie!') + + def set_undomained_cookie_on_client(self, request): + """ + Sets a cookie on the response without a specific domain + """ + headers = [('Set-Cookie', 'testing_cookie=test_cookie_value')] + return Response(headers=headers) + + def verify_cookie(self, request): + """ + Verifies that we receive a cookie back + """ + cookie = self.get_cookie('testing_cookie') + if cookie == 'test_cookie_value': + return Response('Received cookie') + else: + return Response(str(cookie), status='400 Bad Request') + + def set_cookie_and_redirect(self, request): + """ + Sets a cookie on the response and redirects to a page to + check that the cookie was set + """ + self.set_cookie('testing_cookie', 'test_cookie_value') + headers = [('Location', '/verify_cookie')] + return Response(status='303 See Other', headers=headers) + def shutdown(self, request): sys.exit() diff --git a/test/test_collections.py b/test/test_collections.py index 9d72939c..d7882d95 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -144,6 +144,12 @@ class TestHTTPHeaderDict(unittest.TestCase): self.d = HTTPHeaderDict(Cookie='foo') self.d.add('cookie', 'bar') + def test_get_all(self): + self.assertEqual(self.d.get_all('cookie', None), ['foo', 'bar']) + + def test_get_all_with_default(self): + self.assertEqual(self.d.get_all('NOT THERE', {}), {}) + def test_create_from_kwargs(self): h = HTTPHeaderDict(ab=1, cd=2, ef=3, gh=4) self.assertEqual(len(h), 4) diff --git a/test/test_retry.py b/test/test_retry.py index b6015c13..86cbb348 100644 --- a/test/test_retry.py +++ b/test/test_retry.py @@ -239,3 +239,25 @@ class RetryTest(unittest.TestCase): self.assertEqual(retry.history, (RequestHistory('GET', '/test1', connection_error, None, None), RequestHistory('POST', '/test2', read_error, None, None), RequestHistory('GET', '/test3', None, 500, None))) + + +class RedirectTest(unittest.TestCase): + + def setUp(self): + self.retries = Retry() + + def test_redirect_method(self): + tests = [ + ('GET', 303, 'GET'), + ('HEAD', 303, 'HEAD'), + ('PUT', 303, 'GET'), + ('DELETE', 303, 'GET'), + ('POST', 303, 'GET'), + ('OPTIONS', 303, 'GET'), + ('POST', 301, 'GET'), + ('POST', 302, 'GET'), + ('OPTIONS', 301, 'OPTIONS'), + ('DELETE', 302, 'DELETE') + ] + for test in tests: + self.assertEqual(test[2], self.retries.redirect_method(*test[:2])) diff --git a/test/test_sessionmanager.py b/test/test_sessionmanager.py new file mode 100644 index 00000000..ae6893c1 --- /dev/null +++ b/test/test_sessionmanager.py @@ -0,0 +1,177 @@ +import unittest + +from urllib3.sessionmanager import SessionManager, ProxySessionManager +from urllib3.poolmanager import ProxyManager, PoolManager +from urllib3.util.sessioncontext import DefaultCookiePolicy, CookieJar, SessionContext +from urllib3.packages import six +from urllib3.response import HTTPResponse +from urllib3.request import Request + +cookielib = six.moves.http_cookiejar + + +class TestSessionManager(unittest.TestCase): + + def setUp(self): + self.manager = SessionManager() + + def test_set_cookie_policy(self): + """ + Test that we're able to set the policy on the SessionManager's CookieJar + """ + policy = cookielib.DefaultCookiePolicy() + self.assertNotEqual(policy, self.manager.context.cookie_jar._policy) + self.manager.context.cookie_jar.set_policy(policy) + self.assertTrue(policy is self.manager.context.cookie_jar._policy) + + def test_create_proxy_manager(self): + """ + Make sure that the ProxySessionManager has its `manager` attribute + set correctly + """ + manager = ProxySessionManager(proxy_url='http://none') + self.assertTrue(isinstance(manager.manager, ProxyManager)) + + def test_creates_pool_manager(self): + """ + Make sure that when we just create a SessionManager, the embedded manager + is of the PoolManager type. + """ + manager = SessionManager() + self.assertTrue(isinstance(manager.manager, PoolManager)) + + def test_with_external_jar(self): + """ + Make sure that when we pass a CookieJar in, we use it. + """ + this_policy = cookielib.DefaultCookiePolicy() + jar = CookieJar(policy=this_policy) + context = SessionContext(cookie_jar=jar) + manager = SessionManager(manager=PoolManager(), context=context) + self.assertTrue(manager.context.cookie_jar is jar) + + def test_with_own_manager(self): + """ + Make sure that when we pass our own manager instance in, + it gets used. + """ + manager = 18743 + sm = SessionManager(manager=manager) + self.assertEqual(manager, sm.manager) + + def test_with_own_manager_class(self): + """ + If we subclass SessionManager and give it a new manager class, + check that we instantiate that class on creation of a new instance. + """ + class PointlessSessionManager(SessionManager): + + manager_class = int + + psm = PointlessSessionManager() + self.assertEqual(psm.manager, 0) + +class TestSessionContext(unittest.TestCase): + + def setUp(self): + self.context = SessionContext() + + def test_extract_cookie(self): + + """ + Check to be sure that we're properly extracting cookies into the + SessionManager's jar, and that the cookie has the expected value. + """ + + expected_cookie = cookielib.Cookie( + version=0, name='cookiename', value='cookieval', port=None, + port_specified=False, domain='google.com', domain_specified=False, + domain_initial_dot=False, path='/', path_specified=False, + secure=False, expires=None, discard=True, comment=None, + comment_url=None, rest={}, rfc2109=False + ) + + self.assertFalse(self.context.cookie_jar) + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertTrue(self.context.cookie_jar) + for each in self.context.cookie_jar: + self.assertEqual(each.__dict__, expected_cookie.__dict__) + + def test_apply_cookie(self): + """ + Ensure that we're setting relevant cookies on outgoing requests + """ + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') + + def test_no_apply_cookie(self): + """ + Ensure that we don't set cookies on requests to another domain + """ + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertTrue(self.context.cookie_jar) + req = Request(method='GET', url='http://evil.com') + self.context.apply_to(req) + self.assertTrue(req.headers.get('Cookie', None) is None) + + def test_unacceptable_cookie(self): + """ + Ensure that we don't accept cookies for domains other than the one + the request was sent to + """ + self.assertFalse(self.context.cookie_jar) + req = Request(method='GET', url='http://evil.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertFalse(self.context.cookie_jar) + + def test_parent_domain(self): + """ + Ensure that cookies set by child domains are sent to their parent domains + """ + req = Request(method='GET', url='http://www.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') + + def test_sibling_domain(self): + """ + Ensure that cookies set by sibling subdomains are not sent to + other siblings under the same domain automatically. + """ + req = Request(method='GET', url='http://ww1.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://ww2.google.com') + self.context.apply_to(req) + self.assertTrue(req.headers.get('Cookie') is None) + + def test_sibling_domain_with_wildcard(self): + """ + Ensure that when sibling domains specify a parent domain to send a cookie + to, that cookie is also sent to other sibling domains under the + same parent domain. + """ + req = Request(method='GET', url='http://ww1.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://ww2.google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') diff --git a/test/with_dummyserver/test_sessionmanager.py b/test/with_dummyserver/test_sessionmanager.py new file mode 100644 index 00000000..165953da --- /dev/null +++ b/test/with_dummyserver/test_sessionmanager.py @@ -0,0 +1,95 @@ +from urllib3.sessionmanager import SessionManager +from urllib3.exceptions import MaxRetryError +from urllib3.poolmanager import PoolManager + +from urllib3.packages.six import b, u +from urllib3.packages import six + +from dummyserver.testcase import HTTPDummyServerTestCase + + +class TestSessionManager(HTTPDummyServerTestCase): + + def create_url(self, route): + return 'http://' + self.host + ':' + str(self.port) + route + + def create_alternate_url(self, route): + return 'http://' + self.host_alt + ':' + str(self.port) + route + + def setUp(self): + self.manager = SessionManager() + + def test_cookie_handler(self): + """ + Test that the client gets, sets, and returns the appropriate cookie + """ + route = self.create_url('/set_cookie_on_client') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 200) + self.assertTrue(self.manager.context.cookie_jar) + route = self.create_url('/verify_cookie') + r = self.manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_restrict_undomained_cookie_by_host(self): + """ + Test that undomained cookies are only sent to the domain that they + were originally received from + """ + route = self.create_url('/set_undomained_cookie_on_client') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 200) + self.assertTrue(self.manager.context.cookie_jar) + route = self.create_alternate_url('/verify_cookie') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 400) + + def test_merge_cookie_header(self): + """ + Test that cookies passed in the headers argument are successfully + merged, rather than skipped + """ + self.assertFalse(self.manager.context.cookie_jar) + headers = {'Cookie': 'testing_cookie=test_cookie_value'} + route = self.create_url('/verify_cookie') + r = self.manager.request('GET', route, headers=headers) + self.assertEqual(r.data, b'Received cookie') + + def test_instance_headers(self): + """ + Test that cookies set on the SessionManager instance are + successfully merged, rather than skipped + """ + headers = {'Cookie': 'testing_cookie=test_cookie_value'} + manager = SessionManager(headers=headers) + route = self.create_url('/verify_cookie') + r = manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_collect_cookie_on_redirect(self): + """ + Test that when we're redirected to a different URL, we + still save the cookie and transmit it when relevant. + """ + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_no_retry(self): + """ + Test that the SessionManager follows the pattern of + raising MaxRetryError when we don't have any retries left + """ + def execute_query(): + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route, retries=0) + self.assertRaises(MaxRetryError, execute_query) + + def test_no_redirect(self): + """ + Test that SessionManager follows the pattern of returning + the redirect status code when redirect is set to False. + """ + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route, redirect=False) + self.assertEqual(r.status, 303) diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 49b9dc66..b0a4da46 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -18,7 +18,8 @@ from .util.request import make_headers from .util.url import get_host from .util.timeout import Timeout from .util.retry import Retry - +from .util.sessioncontext import SessionContext +from .sessionmanager import SessionManager # Set default logging handler to avoid "No handler found" warnings. import logging @@ -38,6 +39,7 @@ __all__ = ( 'HTTPSConnectionPool', 'PoolManager', 'ProxyManager', + 'SessionManager', 'HTTPResponse', 'Retry', 'Timeout', @@ -48,6 +50,7 @@ __all__ = ( 'get_host', 'make_headers', 'proxy_from_url', + 'SessionContext' ) logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 77cee017..9edce3c9 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -272,6 +272,9 @@ class HTTPHeaderDict(MutableMapping): getallmatchingheaders = getlist iget = getlist + def get_all(self, key, failobj=None): + return self.getlist(key) or failobj + def __repr__(self): return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index d7362d5d..e8c02c89 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -453,7 +453,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Get a connection from the pool and perform an HTTP request. This is the lowest level call for making a request, so you'll need to specify all - the raw details. + the raw details. To make HTTP calls, use :func:`urllib3.request.RequestMethods.request` + instead. .. note:: @@ -671,28 +672,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, **response_kw) # Handle redirect? - redirect_location = redirect and response.get_redirect_location() - if redirect_location: - if response.status == 303: - method = 'GET' - - try: - retries = retries.increment(method, url, response=response, _pool=self) - except MaxRetryError: - if retries.raise_on_redirect: - # Release the connection for this response, since we're not - # returning it to be released manually. - response.release_conn() - raise - return response - - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen( - method, redirect_location, body, headers, - retries=retries, redirect=redirect, - assert_same_host=assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + if redirect and response.get_redirect_location(): + return self.redirect( + response=response, method=method, retries=retries, + url=url, headers=headers, body=body, + assert_same_host=assert_same_host, timeout=timeout, + pool_timeout=pool_timeout, release_conn=release_conn, + redirect=redirect, **response_kw) # Check if we should retry the HTTP response. if retries.is_forced_retry(method, status_code=response.status): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 276b54dd..992a281f 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -6,8 +6,7 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown -from .packages.six.moves.urllib.parse import urljoin +from .exceptions import LocationValueError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -221,7 +220,7 @@ class PoolManager(RequestMethods): u = parse_url(url) return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def urlopen(self, method, url, redirect=True, **kw): + def urlopen(self, method, url, redirect=True, retries=None, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri @@ -229,47 +228,30 @@ class PoolManager(RequestMethods): The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. """ u = parse_url(url) conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - kw['assert_same_host'] = False kw['redirect'] = False if 'headers' not in kw: kw['headers'] = self.headers - if self.proxy is not None and u.scheme == "http": - response = conn.urlopen(method, url, **kw) - else: - response = conn.urlopen(method, u.request_uri, **kw) - - redirect_location = redirect and response.get_redirect_location() - if not redirect_location: - return response - - # Support relative URLs for redirecting. - redirect_location = urljoin(url, redirect_location) - - # RFC 7231, Section 6.4.4 - if response.status == 303: - method = 'GET' - - retries = kw.get('retries') if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - try: - retries = retries.increment(method, url, response=response, _pool=conn) - except MaxRetryError: - if retries.raise_on_redirect: - raise - return response - - kw['retries'] = retries - kw['redirect'] = redirect + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, retries=retries, **kw) + else: + response = conn.urlopen(method, u.request_uri, retries=retries, **kw) - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen(method, redirect_location, **kw) + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + return self.redirect(response=response, method=method, retries=retries, + url=url, pool=conn, **kw) + return response class ProxyManager(PoolManager): diff --git a/urllib3/request.py b/urllib3/request.py index c0fddff0..f1171498 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,10 +1,50 @@ from __future__ import absolute_import +import logging from .filepost import encode_multipart_formdata +from .exceptions import MaxRetryError + +from .packages.six.moves.urllib.request import Request as _Request from .packages.six.moves.urllib.parse import urlencode +from .packages.six.moves.urllib.parse import urljoin +__all__ = ['RequestMethods', 'Request'] -__all__ = ['RequestMethods'] +log = logging.getLogger(__name__) + + +class Request(_Request): + """ + Currently used as a shim to allow us to work with the stdlib cookie + handling, which expects a `urllib.request.Request`-like object. + """ + def __init__(self, *args, **kwargs): + del kwargs['method'] + # Request is an old-style class in Python 2 + _Request.__init__(self, *args, **kwargs) + self._cookies = [] + # If there's an existing Cookie header, let's split it up + # so we can handle it similarly to those we get from a jar. + if self.has_header('Cookie'): + self._cookies = self.get_header('Cookie').split('; ') + + def add_cookies(self, *cookies): + """ + We keep track of individual cookies so that we can keep them from + duplicating, and re-render the Cookie header when we get new ones. + """ + for each in cookies: + if each not in self._cookies: + self._cookies.append(each) + self.add_header('Cookie', '; '.join(self._cookies)) + + def get_all_headers(self): + """ + Returns a complete set of all headers + """ + headers = self.unredirected_hdrs.copy() + headers.update(self.headers) + return headers class RequestMethods(object): @@ -146,3 +186,24 @@ class RequestMethods(object): extra_kw.update(urlopen_kw) return self.urlopen(method, url, **extra_kw) + + def redirect(self, response, method, retries, **kwargs): + """ + Abstracts the redirect process to be used from any :class:`RequestMethods` object + """ + url = kwargs.pop('url', '') + redirect_location = urljoin(url, response.get_redirect_location()) + method = retries.redirect_method(method, response.status) + pool = kwargs.pop('pool', self) + try: + retries = retries.increment(method, url, response=response, _pool=pool) + except MaxRetryError: + if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method=method, url=redirect_location, retries=retries, **kwargs) diff --git a/urllib3/response.py b/urllib3/response.py index be2accda..5b8d28f6 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -187,6 +187,13 @@ class HTTPResponse(io.IOBase): def connection(self): return self._connection + def info(self): + """ + This is a compatibility method that's only used by urllib3's cookie + handlers; don't use it in your own code. + """ + return self.headers + def tell(self): """ Obtain the number of bytes pulled over the wire so far. May differ from diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py new file mode 100644 index 00000000..3df71e6d --- /dev/null +++ b/urllib3/sessionmanager.py @@ -0,0 +1,86 @@ +from .util.retry import Retry +from .poolmanager import PoolManager, ProxyManager +from .request import RequestMethods, Request +from .util.sessioncontext import SessionContext + + +class SessionManager(RequestMethods): + """ + Allows arbitrary requests while maintaining session context across + those requests. Currently, that context consists of automatic + cookie storage and retrieval. + + :param manager: + An appropriate :class:`urllib3.poolmanager.PoolManager` or + :class:`urllib3.poolmanager.ProxyManager` object + to handle HTTP requests for the SessionManager + + :param context: + A predefined :class:`urllib3.util.context.SessionContext` object to use in the session; + if not provided, a new one will be created. + + :param headers: + Headers to include with all requests, unless other + headers are given explicitly. + + Example:: + + >>> manager = SessionManager(PoolManager()) + >>> manager.context.cookie_jar + <CookieJar[]> + >>> len(manager.context.cookie_jar) + 0 + >>> manager.request('GET', 'http://google.com') + >>> manager.request('GET', 'http://yahoo.com') + >>> len(manager.context.cookie_jar) + 2 + + """ + + manager_class = PoolManager + + def __init__(self, context=None, headers=None, manager=None, **manager_kw): + super(SessionManager, self).__init__(headers=headers) + self.manager = self.manager_class(**manager_kw) if manager is None else manager + self.context = context or SessionContext() + + def urlopen(self, method, url, redirect=True, retries=None, **kw): + """ + Same as :meth:`urllib3.poolmanager.PoolManager.urlopen` with added + request-context-managing special sauce. The received ``url`` param + must be an absolute path. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. + """ + headers = kw.pop('headers', self.headers) + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + # Build a mock Request object to work with + request_object = Request(url=url, method=method, headers=headers) + self.context.apply_to(request_object) + modified_headers = request_object.get_all_headers() + + # Ensure that redirects happen at this level only + kw['redirect'] = False + kw['headers'] = modified_headers + + response = self.manager.urlopen(method, url, retries=retries, **kw) + + # Retrieve any context from the response + self.context.extract_from(response, request_object) + + # Redirect as necessary, and return. + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + kw['headers'] = headers + return self.redirect(response=response, method=method, + retries=retries, url=url, **kw) + return response + + +class ProxySessionManager(SessionManager): + + manager_class = ProxyManager diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index f8f21810..edbfc873 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -13,7 +13,6 @@ from ..exceptions import ( ) from ..packages import six - log = logging.getLogger(__name__) # Data structure for representing the metadata of requests that result in a retry. @@ -21,6 +20,9 @@ RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", "status", "redirect_location"]) +_POST_REDIRECT_DOWNGRADE_STATUSES = set([301, 302, 303]) + + class Retry(object): """ Retry configuration. @@ -311,6 +313,20 @@ class Retry(object): 'read={self.read}, redirect={self.redirect})').format( cls=type(self), self=self) + def redirect_method(self, method, status): + """ + Assuming we're doing a redirect, should we change HTTP methods? + """ + if method == 'GET': + return method + if method == 'HEAD': + return method + if status == 303: + return 'GET' + if method == 'POST' and status in _POST_REDIRECT_DOWNGRADE_STATUSES: + return 'GET' + return method + # For backwards compatibility (equivalent to pre-v1.9): Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py new file mode 100644 index 00000000..3b323dca --- /dev/null +++ b/urllib3/util/sessioncontext.py @@ -0,0 +1,74 @@ +import time + +from ..packages.six.moves.http_cookiejar import ( + DefaultCookiePolicy as PythonCookiePolicy, + CookieJar as PythonCookieJar +) + + +class DefaultCookiePolicy(PythonCookiePolicy): + """ + The default urllib3 cookie policy - similar to the Python default, + but :param:`strict_ns_domain` is set to `DomainStrict` for security. + """ + def __init__(self, *args, **kwargs): + policy = PythonCookiePolicy.DomainStrict + kwargs.setdefault('strict_ns_domain', policy) + # Old-style class on Python 2 + PythonCookiePolicy.__init__(self, *args, **kwargs) + + +class CookieJar(PythonCookieJar): + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + # Old-style class on Python 2 + PythonCookieJar.__init__(self, policy=policy) + + def add_cookie_header(self, request): + """ + Add correct Cookie: header to Request object. + This is copied from and slightly modified from the stdlib version. + """ + with self._cookies_lock: + self._policy._now = self._now = int(time.time()) + cookies = self._cookies_for_request(request) + attrs = self._cookie_attrs(cookies) + # This is a modification; stdlib sets the entire cookie header + # and only if it's not there already. We're less picky. + if attrs: + request.add_cookies(*attrs) + + self.clear_expired_cookies() + + +class SessionContext(object): + """ + Extensible class encapsulated by :class:`.SessionManager`; currently + used to manage cookies. + + :param cookie_jar: + Used to pass a prebuilt :class:`CookieJar` into the + context to be used instead of an empty jar. + """ + + def __init__(self, cookie_jar=None): + # We unfortunately have to do it this way; empty cookie jars + # evaluate as falsey. + if cookie_jar is not None: + self.cookie_jar = cookie_jar + else: + self.cookie_jar = CookieJar() + + def apply_to(self, request): + """ + Applies changes from the context to the supplied :class:`.request.Request`. + """ + self.cookie_jar.add_cookie_header(request) + + def extract_from(self, response, request): + """ + Extracts context modifications (new cookies, etc) from the response and stores them. + """ + self.cookie_jar.extract_cookies(response, request) |