From 9e03ca2bf52c2093560905bbe2c4dfcdede29297 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Sat, 11 Jun 2016 17:24:14 -0400 Subject: Implementing SessionManager; documenting; testing; moving redirect logic to RequestMethods --- CONTRIBUTORS.txt | 2 +- docs/advanced-usage.rst | 29 ++++++++ dummyserver/handlers.py | 33 +++++++++ test/test_collections.py | 6 ++ test/test_retry.py | 22 ++++++ test/test_sessionmanager.py | 105 +++++++++++++++++++++++++++ test/with_dummyserver/test_sessionmanager.py | 68 +++++++++++++++++ urllib3/__init__.py | 5 +- urllib3/_collections.py | 3 + urllib3/connectionpool.py | 32 +++----- urllib3/poolmanager.py | 43 ++++------- urllib3/request.py | 63 +++++++++++++++- urllib3/response.py | 4 + urllib3/sessionmanager.py | 77 ++++++++++++++++++++ urllib3/util/retry.py | 15 +++- urllib3/util/sessioncontext.py | 75 +++++++++++++++++++ 16 files changed, 525 insertions(+), 57 deletions(-) create mode 100644 test/test_sessionmanager.py create mode 100644 test/with_dummyserver/test_sessionmanager.py create mode 100644 urllib3/sessionmanager.py create mode 100644 urllib3/util/sessioncontext.py diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 9ef5c69f..922a079c 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -193,7 +193,7 @@ In chronological order: * Started Recipes documentation and added a recipe about handling concatenated gzip data in HTTP response * Jesse Shapiro - * Various character-encoding fixes/tweaks + * Created SessionManager * Disabling IPv6 DNS when IPv6 connections not supported * David Foster diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst index f28c83c5..cb42ea0c 100644 --- a/docs/advanced-usage.rst +++ b/docs/advanced-usage.rst @@ -3,6 +3,35 @@ Advanced Usage .. currentmodule:: urllib3 +SessionManager +-------------- + +:doc:`SessionManager(...) ` is currently urllib3's most abstract manager +level. Currently, it's in charge of creating and managing a :class:`~urllib3.util.context.SessionContext` +object, which in turn will extract and apply cookies to and from each HTTP request +and response that you make with that :class:`SessionManager`. + +By default, to create a `SessionManager`, you'll just create it; :class:`SessionManager` will handle +creating a :class:`PoolManager` to actually make HTTP requests; meanwhile, :class:`SessionManager` +will handle extracting and applying context to each request. + +You can also pass in another manager explicitly if you'd like different behavior: + +:: + + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> session = urllib3.SessionManager(manager=proxy) + +If you want to customize the :class:`SessionContext` that your :class:`SessionManager` +is using, just pass your own when initializing it: + +:: + + >>> liberal = urllib3.util.sessioncontext.DefaultCookiePolicy.DomainLiberal + >>> liberal_policy = urllib3.util.sessioncontext.DefaultCookiePolicy(strict_ns_domain=liberal) + >>> cj = urllib3.util.sessioncontext.CookieJar(policy=liberal_policy) + >>> context = urllib3.SessionContext(cookie_jar=cj) + >>> session = urllib3.SessionManager(manager=urllib3.PoolManager(), context=context) Customizing pool behavior ------------------------- diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index d72a6bdd..5542fc93 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -260,6 +260,39 @@ class TestingApp(RequestHandler): return Response(status=status) + def set_cookie_on_client(self, request): + """ + Set a standard cookie on the response + """ + self.set_cookie('testing_cookie', 'test_cookie_value') + return Response('Attached a cookie!') + + def set_undomained_cookie_on_client(self, request): + """ + Sets a cookie on the response without a specific domain + """ + headers = [('Set-Cookie', 'testing_cookie=test_cookie_value')] + return Response(headers=headers) + + def verify_cookie(self, request): + """ + Verifies that we receive a cookie back + """ + cookie = self.get_cookie('testing_cookie') + if cookie == 'test_cookie_value': + return Response('Received cookie') + else: + return Response(str(cookie), status='400 Bad Request') + + def set_cookie_and_redirect(self, request): + """ + Sets a cookie on the response and redirects to a page to + check that the cookie was set + """ + self.set_cookie('testing_cookie', 'test_cookie_value') + headers = [('Location', '/verify_cookie')] + return Response(status='303 See Other', headers=headers) + def shutdown(self, request): sys.exit() diff --git a/test/test_collections.py b/test/test_collections.py index 9d72939c..d7882d95 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -144,6 +144,12 @@ class TestHTTPHeaderDict(unittest.TestCase): self.d = HTTPHeaderDict(Cookie='foo') self.d.add('cookie', 'bar') + def test_get_all(self): + self.assertEqual(self.d.get_all('cookie', None), ['foo', 'bar']) + + def test_get_all_with_default(self): + self.assertEqual(self.d.get_all('NOT THERE', {}), {}) + def test_create_from_kwargs(self): h = HTTPHeaderDict(ab=1, cd=2, ef=3, gh=4) self.assertEqual(len(h), 4) diff --git a/test/test_retry.py b/test/test_retry.py index b6015c13..86cbb348 100644 --- a/test/test_retry.py +++ b/test/test_retry.py @@ -239,3 +239,25 @@ class RetryTest(unittest.TestCase): self.assertEqual(retry.history, (RequestHistory('GET', '/test1', connection_error, None, None), RequestHistory('POST', '/test2', read_error, None, None), RequestHistory('GET', '/test3', None, 500, None))) + + +class RedirectTest(unittest.TestCase): + + def setUp(self): + self.retries = Retry() + + def test_redirect_method(self): + tests = [ + ('GET', 303, 'GET'), + ('HEAD', 303, 'HEAD'), + ('PUT', 303, 'GET'), + ('DELETE', 303, 'GET'), + ('POST', 303, 'GET'), + ('OPTIONS', 303, 'GET'), + ('POST', 301, 'GET'), + ('POST', 302, 'GET'), + ('OPTIONS', 301, 'OPTIONS'), + ('DELETE', 302, 'DELETE') + ] + for test in tests: + self.assertEqual(test[2], self.retries.redirect_method(*test[:2])) diff --git a/test/test_sessionmanager.py b/test/test_sessionmanager.py new file mode 100644 index 00000000..d0a9e32b --- /dev/null +++ b/test/test_sessionmanager.py @@ -0,0 +1,105 @@ +import unittest + +from urllib3.sessionmanager import SessionManager +from urllib3.poolmanager import ProxyManager, PoolManager +from urllib3.util.sessioncontext import DefaultCookiePolicy, CookieJar, SessionContext +from urllib3.packages import six +from urllib3.response import HTTPResponse +from urllib3.request import Request + +cookielib = six.moves.http_cookiejar + + +class TestSessionManager(unittest.TestCase): + + def setUp(self): + self.manager = SessionManager(PoolManager()) + + def test_set_cookie_policy(self): + policy = cookielib.DefaultCookiePolicy() + self.assertNotEqual(policy, self.manager.context.cookie_jar._policy) + self.manager.context.cookie_jar.set_policy(policy) + self.assertTrue(policy is self.manager.context.cookie_jar._policy) + + def test_create_proxy_manager(self): + manager = SessionManager(ProxyManager(proxy_url='http://none')) + self.assertTrue(isinstance(manager.manager, ProxyManager)) + + def test_creates_pool_manager(self): + self.assertTrue(isinstance(self.manager.manager, PoolManager)) + + def test_with_external_jar(self): + this_policy = cookielib.DefaultCookiePolicy() + jar = CookieJar(policy=this_policy) + context = SessionContext(cookie_jar=jar) + manager = SessionManager(PoolManager(), context=context) + print(dir(manager.context)) + self.assertTrue(manager.context.cookie_jar is jar) + + +class TestSessionContext(unittest.TestCase): + + def setUp(self): + self.context = SessionContext() + + def test_extract_cookie(self): + self.assertFalse(self.context.cookie_jar) + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertTrue(self.context.cookie_jar) + + def test_apply_cookie(self): + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') + + def test_no_apply_cookie(self): + req = Request(method='GET', url='http://google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertTrue(self.context.cookie_jar) + req = Request(method='GET', url='http://evil.com') + self.context.apply_to(req) + self.assertTrue(req.headers.get('Cookie', None) is None) + + def test_unacceptable_cookie(self): + self.assertFalse(self.context.cookie_jar) + req = Request(method='GET', url='http://evil.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + self.assertFalse(self.context.cookie_jar) + + def test_parent_domain(self): + req = Request(method='GET', url='http://www.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') + + def test_sibling_domain(self): + req = Request(method='GET', url='http://ww1.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://ww2.google.com') + self.context.apply_to(req) + self.assertTrue(req.headers.get('Cookie') is None) + + def test_sibling_domain_with_wildcard(self): + req = Request(method='GET', url='http://ww1.google.com') + headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} + resp = HTTPResponse(headers=headers) + self.context.extract_from(resp, req) + req = Request(method='GET', url='http://ww2.google.com') + self.context.apply_to(req) + self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') diff --git a/test/with_dummyserver/test_sessionmanager.py b/test/with_dummyserver/test_sessionmanager.py new file mode 100644 index 00000000..d1fb325e --- /dev/null +++ b/test/with_dummyserver/test_sessionmanager.py @@ -0,0 +1,68 @@ +from urllib3.sessionmanager import SessionManager +from urllib3.exceptions import MaxRetryError +from urllib3.poolmanager import PoolManager + +from urllib3.packages.six import b, u +from urllib3.packages import six + +from dummyserver.testcase import HTTPDummyServerTestCase + + +class TestSessionManager(HTTPDummyServerTestCase): + + def create_url(self, route): + return 'http://' + self.host + ':' + str(self.port) + route + + def create_alternate_url(self, route): + return 'http://' + self.host_alt + ':' + str(self.port) + route + + def setUp(self): + self.manager = SessionManager(PoolManager()) + + def test_cookie_handler(self): + route = self.create_url('/set_cookie_on_client') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 200) + self.assertTrue(self.manager.context.cookie_jar) + route = self.create_url('/verify_cookie') + r = self.manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_restrict_undomained_cookie_by_host(self): + route = self.create_url('/set_undomained_cookie_on_client') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 200) + self.assertTrue(self.manager.context.cookie_jar) + route = self.create_alternate_url('/verify_cookie') + r = self.manager.request('GET', route) + self.assertEqual(r.status, 400) + + def test_merge_cookie_header(self): + self.assertFalse(self.manager.context.cookie_jar) + headers = {'Cookie': 'testing_cookie=test_cookie_value'} + route = self.create_url('/verify_cookie') + r = self.manager.request('GET', route, headers=headers) + self.assertEqual(r.data, b'Received cookie') + + def test_instance_headers(self): + headers = {'Cookie': 'testing_cookie=test_cookie_value'} + manager = SessionManager(PoolManager(), headers=headers) + route = self.create_url('/verify_cookie') + r = manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_collect_cookie_on_redirect(self): + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route) + self.assertEqual(r.data, b'Received cookie') + + def test_no_retry(self): + def execute_query(): + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route, retries=0) + self.assertRaises(MaxRetryError, execute_query) + + def test_no_redirect(self): + route = self.create_url('/set_cookie_and_redirect') + r = self.manager.request('GET', route, redirect=False) + self.assertEqual(r.status, 303) diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 49b9dc66..b0a4da46 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -18,7 +18,8 @@ from .util.request import make_headers from .util.url import get_host from .util.timeout import Timeout from .util.retry import Retry - +from .util.sessioncontext import SessionContext +from .sessionmanager import SessionManager # Set default logging handler to avoid "No handler found" warnings. import logging @@ -38,6 +39,7 @@ __all__ = ( 'HTTPSConnectionPool', 'PoolManager', 'ProxyManager', + 'SessionManager', 'HTTPResponse', 'Retry', 'Timeout', @@ -48,6 +50,7 @@ __all__ = ( 'get_host', 'make_headers', 'proxy_from_url', + 'SessionContext' ) logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 77cee017..9edce3c9 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -272,6 +272,9 @@ class HTTPHeaderDict(MutableMapping): getallmatchingheaders = getlist iget = getlist + def get_all(self, key, failobj=None): + return self.getlist(key) or failobj + def __repr__(self): return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index d7362d5d..e8c02c89 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -453,7 +453,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Get a connection from the pool and perform an HTTP request. This is the lowest level call for making a request, so you'll need to specify all - the raw details. + the raw details. To make HTTP calls, use :func:`urllib3.request.RequestMethods.request` + instead. .. note:: @@ -671,28 +672,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, **response_kw) # Handle redirect? - redirect_location = redirect and response.get_redirect_location() - if redirect_location: - if response.status == 303: - method = 'GET' - - try: - retries = retries.increment(method, url, response=response, _pool=self) - except MaxRetryError: - if retries.raise_on_redirect: - # Release the connection for this response, since we're not - # returning it to be released manually. - response.release_conn() - raise - return response - - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen( - method, redirect_location, body, headers, - retries=retries, redirect=redirect, - assert_same_host=assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + if redirect and response.get_redirect_location(): + return self.redirect( + response=response, method=method, retries=retries, + url=url, headers=headers, body=body, + assert_same_host=assert_same_host, timeout=timeout, + pool_timeout=pool_timeout, release_conn=release_conn, + redirect=redirect, **response_kw) # Check if we should retry the HTTP response. if retries.is_forced_retry(method, status_code=response.status): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 276b54dd..df661770 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -221,7 +221,7 @@ class PoolManager(RequestMethods): u = parse_url(url) return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def urlopen(self, method, url, redirect=True, **kw): + def urlopen(self, method, url, redirect=True, retries=None, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri @@ -229,47 +229,30 @@ class PoolManager(RequestMethods): The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. """ u = parse_url(url) conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - kw['assert_same_host'] = False kw['redirect'] = False if 'headers' not in kw: kw['headers'] = self.headers - if self.proxy is not None and u.scheme == "http": - response = conn.urlopen(method, url, **kw) - else: - response = conn.urlopen(method, u.request_uri, **kw) - - redirect_location = redirect and response.get_redirect_location() - if not redirect_location: - return response - - # Support relative URLs for redirecting. - redirect_location = urljoin(url, redirect_location) - - # RFC 7231, Section 6.4.4 - if response.status == 303: - method = 'GET' - - retries = kw.get('retries') if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - try: - retries = retries.increment(method, url, response=response, _pool=conn) - except MaxRetryError: - if retries.raise_on_redirect: - raise - return response - - kw['retries'] = retries - kw['redirect'] = redirect + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, retries=retries, **kw) + else: + response = conn.urlopen(method, u.request_uri, retries=retries, **kw) - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen(method, redirect_location, **kw) + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + return self.redirect(response=response, method=method, retries=retries, + url=url, pool=conn, **kw) + return response class ProxyManager(PoolManager): diff --git a/urllib3/request.py b/urllib3/request.py index c0fddff0..4708c059 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,10 +1,50 @@ from __future__ import absolute_import +import logging from .filepost import encode_multipart_formdata +from .exceptions import MaxRetryError + +from .packages.six.moves.urllib.request import Request as _Request from .packages.six.moves.urllib.parse import urlencode +from .packages.six.moves.urllib.parse import urljoin +__all__ = ['RequestMethods', 'Request'] -__all__ = ['RequestMethods'] +log = logging.getLogger(__name__) + + +class Request(_Request): + """ + Currently used as a shim to allow us to work with the stdlib cookie + handling, which expects a `urllib.request.Request`-like object. + """ + def __init__(self, *args, **kwargs): + kwargs.pop('method', None) + # Request is an old-style class in Python 2 + _Request.__init__(self, *args, **kwargs) + self._cookies = [] + # If there's an existing Cookie header, let's split it up + # so we can handle it similarly to those we get from a jar. + if self.has_header('Cookie'): + self._cookies = self.get_header('Cookie').split('; ') + + def add_cookies(self, *cookies): + """ + We keep track of individual cookies so that we can keep them from + duplicating, and re-render the Cookie header when we get new ones. + """ + for each in cookies: + if each not in self._cookies: + self._cookies.append(each) + self.add_header('Cookie', '; '.join(self._cookies)) + + def get_all_headers(self): + """ + Returns a complete set of all headers + """ + headers = self.unredirected_hdrs.copy() + headers.update(self.headers) + return headers class RequestMethods(object): @@ -146,3 +186,24 @@ class RequestMethods(object): extra_kw.update(urlopen_kw) return self.urlopen(method, url, **extra_kw) + + def redirect(self, response, method, retries, **kwargs): + """ + Abstracts the redirect process to be used from any :class:`RequestMethods` object + """ + url = kwargs.pop('url', '') + redirect_location = urljoin(url, response.get_redirect_location()) + method = retries.redirect_method(method, response.status) + try: + pool = kwargs.pop('pool', self) + retries = retries.increment(method, url, response=response, _pool=pool) + except MaxRetryError: + if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method=method, url=redirect_location, retries=retries, **kwargs) diff --git a/urllib3/response.py b/urllib3/response.py index be2accda..1e8e3de8 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -187,6 +187,10 @@ class HTTPResponse(io.IOBase): def connection(self): return self._connection + def info(self): + # Compatibility method for cookie handler + return self.headers + def tell(self): """ Obtain the number of bytes pulled over the wire so far. May differ from diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py new file mode 100644 index 00000000..381ad78b --- /dev/null +++ b/urllib3/sessionmanager.py @@ -0,0 +1,77 @@ +from .util.retry import Retry +from .request import RequestMethods, Request +from .util.sessioncontext import SessionContext + + +class SessionManager(RequestMethods): + """ + Allows arbitrary requests while maintaining session context across + those requests. Currently, that context consists of automatic + cookie storage and retrieval. + + :param manager: + An appropriate :class:`urllib3.poolmanager.PoolManager` or + :class:`urllib3.poolmanager.ProxyManager` object + to handle HTTP requests for the SessionManager + + :param context: + A predefined :class:`urllib3.util.context.SessionContext` object to use in the session; + if not provided, a new one will be created. + + :param headers: + A set of headers to include with all requests, unless other + headers are given explicitly. + + Example:: + + >>> manager = SessionManager(PoolManager()) + >>> manager.context.cookie_jar + + >>> len(manager.context.cookie_jar) + 0 + >>> manager.request('GET', 'http://google.com') + >>> manager.request('GET', 'http://yahoo.com') + >>> len(manager.context.cookie_jar) + 2 + + """ + def __init__(self, manager, context=None, headers=None): + super(SessionManager, self).__init__(headers=headers) + self.manager = manager + self.context = context or SessionContext() + + def urlopen(self, method, url, redirect=True, retries=None, **kw): + """ + Same as :meth:`urllib2.poolmanager.PoolManager.urlopen` with added + request-context-managing special sauce. The received ``url`` param + must be an absolute path. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. + """ + headers = kw.pop('headers', self.headers) + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + # Build a mock Request object to work with + request_object = Request(url=url, method=method, headers=headers) + self.context.apply_to(request_object) + modified_headers = request_object.get_all_headers() + + # Ensure that redirects happen at this level only + kw['redirect'] = False + kw['headers'] = modified_headers + + response = self.manager.urlopen(method, url, retries=retries, **kw) + + # Retrieve any context from the response + self.context.extract_from(response, request_object) + + # Redirect as necessary, and return. + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + kw['headers'] = headers + return self.redirect(response=response, method=method, + retries=retries, url=url, **kw) + return response diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index f8f21810..2144a0e1 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -13,7 +13,6 @@ from ..exceptions import ( ) from ..packages import six - log = logging.getLogger(__name__) # Data structure for representing the metadata of requests that result in a retry. @@ -311,6 +310,20 @@ class Retry(object): 'read={self.read}, redirect={self.redirect})').format( cls=type(self), self=self) + def redirect_method(self, method, status): + """ + Assuming we're doing a redirect, should we change HTTP methods? + """ + if method == 'GET': + return method + if method == 'HEAD': + return method + if status == 303: + return 'GET' + if method == 'POST' and status in set([301, 302, 303]): + return 'GET' + return method + # For backwards compatibility (equivalent to pre-v1.9): Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py new file mode 100644 index 00000000..4e657ce5 --- /dev/null +++ b/urllib3/util/sessioncontext.py @@ -0,0 +1,75 @@ +import time + +from ..packages.six.moves.http_cookiejar import( + DefaultCookiePolicy as PythonCookiePolicy, + CookieJar as PythonCookieJar +) + + +class DefaultCookiePolicy(PythonCookiePolicy): + """ + The default urllib3 cookie policy - similar to the Python default, + but :param:`strict_ns_domain` is set to `DomainStrict` for security. + """ + def __init__(self, *args, **kwargs): + policy = PythonCookiePolicy.DomainStrict + kwargs.setdefault('strict_ns_domain', policy) + # Old-style class on Python 2 + PythonCookiePolicy.__init__(self, *args, **kwargs) + + +class CookieJar(PythonCookieJar): + + def __init__(self, policy=None): + policy = policy or DefaultCookiePolicy() + # Old-style class on Python 2 + PythonCookieJar.__init__(self, policy=policy) + + def add_cookie_header(self, request): + """ + Add correct Cookie: header to Request object. + This is copied from and slightly modified from the stdlib version. + """ + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + cookies = self._cookies_for_request(request) + attrs = self._cookie_attrs(cookies) + # This is a modification; stdlib sets the entire cookie header + # and only if it's not there already. We're less picky. + if attrs: + request.add_cookies(*attrs) + finally: + self._cookies_lock.release() + self.clear_expired_cookies() + + +class SessionContext(object): + """ + Extensible class encapsulated by :class:`.SessionManager`; currently + used to manage cookies. + + :param cookie_jar: + Used to pass a prebuilt :class:`CookieJar` into the + context to be used instead of an empty jar. + """ + + def __init__(self, cookie_jar=None): + # We unfortunately have to do it this way; empty cookie jars + # evaluate as falsey. + if cookie_jar is not None: + self.cookie_jar = cookie_jar + else: + self.cookie_jar = CookieJar() + + def apply_to(self, request): + """ + Applies changes from the context to the supplied :class:`.request.Request`. + """ + self.cookie_jar.add_cookie_header(request) + + def extract_from(self, response, request): + """ + Extracts context modifications (new cookies, etc) from the response and stores them. + """ + self.cookie_jar.extract_cookies(response, request) -- cgit v1.2.1 From e306fe5e056a8ebfc0470b7efdb230311451f01d Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Wed, 22 Jun 2016 12:35:37 -0400 Subject: Flake8 nit --- urllib3/util/sessioncontext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py index 4e657ce5..1a29978a 100644 --- a/urllib3/util/sessioncontext.py +++ b/urllib3/util/sessioncontext.py @@ -1,6 +1,6 @@ import time -from ..packages.six.moves.http_cookiejar import( +from ..packages.six.moves.http_cookiejar import ( DefaultCookiePolicy as PythonCookiePolicy, CookieJar as PythonCookieJar ) -- cgit v1.2.1 From 3d01b133d82fc8b5cd4c995e1bb4cbfc40334de3 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Mon, 11 Jul 2016 10:11:49 -0400 Subject: Addressing review comments --- test/test_sessionmanager.py | 60 ++++++++++++++++++++++++++-- test/with_dummyserver/test_sessionmanager.py | 27 +++++++++++++ urllib3/request.py | 4 +- urllib3/response.py | 5 ++- urllib3/sessionmanager.py | 2 +- urllib3/util/retry.py | 4 +- urllib3/util/sessioncontext.py | 9 ++--- 7 files changed, 97 insertions(+), 14 deletions(-) diff --git a/test/test_sessionmanager.py b/test/test_sessionmanager.py index d0a9e32b..dd214bc8 100644 --- a/test/test_sessionmanager.py +++ b/test/test_sessionmanager.py @@ -16,24 +16,38 @@ class TestSessionManager(unittest.TestCase): self.manager = SessionManager(PoolManager()) def test_set_cookie_policy(self): + """ + Test that we're able to set the policy on the SessionManager's CookieJar + """ policy = cookielib.DefaultCookiePolicy() self.assertNotEqual(policy, self.manager.context.cookie_jar._policy) self.manager.context.cookie_jar.set_policy(policy) self.assertTrue(policy is self.manager.context.cookie_jar._policy) def test_create_proxy_manager(self): - manager = SessionManager(ProxyManager(proxy_url='http://none')) - self.assertTrue(isinstance(manager.manager, ProxyManager)) + """ + Make sure that when we pass a ProxyManager in, we use it. + """ + pm = ProxyManager(proxy_url='http://none') + manager = SessionManager(pm) + self.assertTrue(manager.manager is pm) def test_creates_pool_manager(self): - self.assertTrue(isinstance(self.manager.manager, PoolManager)) + """ + Make sure that when we pass a PoolManager in, we use it. + """ + pm = PoolManager() + manager = SessionManager(pm) + self.assertTrue(manager.manager is pm) def test_with_external_jar(self): + """ + Make sure that when we pass a CookieJar in, we use it. + """ this_policy = cookielib.DefaultCookiePolicy() jar = CookieJar(policy=this_policy) context = SessionContext(cookie_jar=jar) manager = SessionManager(PoolManager(), context=context) - print(dir(manager.context)) self.assertTrue(manager.context.cookie_jar is jar) @@ -43,14 +57,33 @@ class TestSessionContext(unittest.TestCase): self.context = SessionContext() def test_extract_cookie(self): + + """ + Check to be sure that we're properly extracting cookies into the + SessionManager's jar, and that the cookie has the expected value. + """ + + expected_cookie = cookielib.Cookie( + version=0, name='cookiename', value='cookieval', port=None, + port_specified=False, domain='google.com', domain_specified=False, + domain_initial_dot=False, path='/', path_specified=False, + secure=False, expires=None, discard=True, comment=None, + comment_url=None, rest={}, rfc2109=False + ) + self.assertFalse(self.context.cookie_jar) req = Request(method='GET', url='http://google.com') headers = {'Set-Cookie': 'cookiename=cookieval'} resp = HTTPResponse(headers=headers) self.context.extract_from(resp, req) self.assertTrue(self.context.cookie_jar) + for each in self.context.cookie_jar: + self.assertEqual(repr(each), repr(expected_cookie)) def test_apply_cookie(self): + """ + Ensure that we're setting relevant cookies on outgoing requests + """ req = Request(method='GET', url='http://google.com') headers = {'Set-Cookie': 'cookiename=cookieval'} resp = HTTPResponse(headers=headers) @@ -60,6 +93,9 @@ class TestSessionContext(unittest.TestCase): self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') def test_no_apply_cookie(self): + """ + Ensure that we don't set cookies on requests to another domain + """ req = Request(method='GET', url='http://google.com') headers = {'Set-Cookie': 'cookiename=cookieval'} resp = HTTPResponse(headers=headers) @@ -70,6 +106,10 @@ class TestSessionContext(unittest.TestCase): self.assertTrue(req.headers.get('Cookie', None) is None) def test_unacceptable_cookie(self): + """ + Ensure that we don't accept cookies for domains other than the one + the request was sent to + """ self.assertFalse(self.context.cookie_jar) req = Request(method='GET', url='http://evil.com') headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} @@ -78,6 +118,9 @@ class TestSessionContext(unittest.TestCase): self.assertFalse(self.context.cookie_jar) def test_parent_domain(self): + """ + Ensure that cookies set by child domains are sent to their parent domains + """ req = Request(method='GET', url='http://www.google.com') headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} resp = HTTPResponse(headers=headers) @@ -87,6 +130,10 @@ class TestSessionContext(unittest.TestCase): self.assertEqual(req.headers.get('Cookie'), 'cookiename=cookieval') def test_sibling_domain(self): + """ + Ensure that cookies set by sibling subdomains are not sent to + other siblings under the same domain automatically. + """ req = Request(method='GET', url='http://ww1.google.com') headers = {'Set-Cookie': 'cookiename=cookieval'} resp = HTTPResponse(headers=headers) @@ -96,6 +143,11 @@ class TestSessionContext(unittest.TestCase): self.assertTrue(req.headers.get('Cookie') is None) def test_sibling_domain_with_wildcard(self): + """ + Ensure that when sibling domains specify a parent domain to send a cookie + to, that cookie is also sent to other sibling domains under the + same parent domain. + """ req = Request(method='GET', url='http://ww1.google.com') headers = {'Set-Cookie': 'cookiename=cookieval; domain=.google.com'} resp = HTTPResponse(headers=headers) diff --git a/test/with_dummyserver/test_sessionmanager.py b/test/with_dummyserver/test_sessionmanager.py index d1fb325e..8550c2db 100644 --- a/test/with_dummyserver/test_sessionmanager.py +++ b/test/with_dummyserver/test_sessionmanager.py @@ -20,6 +20,9 @@ class TestSessionManager(HTTPDummyServerTestCase): self.manager = SessionManager(PoolManager()) def test_cookie_handler(self): + """ + Test that the client gets, sets, and returns the appropriate cookie + """ route = self.create_url('/set_cookie_on_client') r = self.manager.request('GET', route) self.assertEqual(r.status, 200) @@ -29,6 +32,10 @@ class TestSessionManager(HTTPDummyServerTestCase): self.assertEqual(r.data, b'Received cookie') def test_restrict_undomained_cookie_by_host(self): + """ + Test that undomained cookies are only sent to the domain that they + were originally received from + """ route = self.create_url('/set_undomained_cookie_on_client') r = self.manager.request('GET', route) self.assertEqual(r.status, 200) @@ -38,6 +45,10 @@ class TestSessionManager(HTTPDummyServerTestCase): self.assertEqual(r.status, 400) def test_merge_cookie_header(self): + """ + Test that cookies passed in the headers argument are successfully + merged, rather than skipped + """ self.assertFalse(self.manager.context.cookie_jar) headers = {'Cookie': 'testing_cookie=test_cookie_value'} route = self.create_url('/verify_cookie') @@ -45,6 +56,10 @@ class TestSessionManager(HTTPDummyServerTestCase): self.assertEqual(r.data, b'Received cookie') def test_instance_headers(self): + """ + Test that cookies set on the SessionManager instance are + successfully merged, rather than skipped + """ headers = {'Cookie': 'testing_cookie=test_cookie_value'} manager = SessionManager(PoolManager(), headers=headers) route = self.create_url('/verify_cookie') @@ -52,17 +67,29 @@ class TestSessionManager(HTTPDummyServerTestCase): self.assertEqual(r.data, b'Received cookie') def test_collect_cookie_on_redirect(self): + """ + Test that when we're redirected to a different URL, we + still save the cookie and transmit it when relevant. + """ route = self.create_url('/set_cookie_and_redirect') r = self.manager.request('GET', route) self.assertEqual(r.data, b'Received cookie') def test_no_retry(self): + """ + Test that the SessionManager follows the pattern of + raising MaxRetryError when we don't have any retries left + """ def execute_query(): route = self.create_url('/set_cookie_and_redirect') r = self.manager.request('GET', route, retries=0) self.assertRaises(MaxRetryError, execute_query) def test_no_redirect(self): + """ + Test that SessionManager follows the pattern of returning + the redirect status code when redirect is set to False. + """ route = self.create_url('/set_cookie_and_redirect') r = self.manager.request('GET', route, redirect=False) self.assertEqual(r.status, 303) diff --git a/urllib3/request.py b/urllib3/request.py index 4708c059..f1171498 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -19,7 +19,7 @@ class Request(_Request): handling, which expects a `urllib.request.Request`-like object. """ def __init__(self, *args, **kwargs): - kwargs.pop('method', None) + del kwargs['method'] # Request is an old-style class in Python 2 _Request.__init__(self, *args, **kwargs) self._cookies = [] @@ -194,8 +194,8 @@ class RequestMethods(object): url = kwargs.pop('url', '') redirect_location = urljoin(url, response.get_redirect_location()) method = retries.redirect_method(method, response.status) + pool = kwargs.pop('pool', self) try: - pool = kwargs.pop('pool', self) retries = retries.increment(method, url, response=response, _pool=pool) except MaxRetryError: if retries.raise_on_redirect: diff --git a/urllib3/response.py b/urllib3/response.py index 1e8e3de8..5b8d28f6 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -188,7 +188,10 @@ class HTTPResponse(io.IOBase): return self._connection def info(self): - # Compatibility method for cookie handler + """ + This is a compatibility method that's only used by urllib3's cookie + handlers; don't use it in your own code. + """ return self.headers def tell(self): diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py index 381ad78b..97899a49 100644 --- a/urllib3/sessionmanager.py +++ b/urllib3/sessionmanager.py @@ -19,7 +19,7 @@ class SessionManager(RequestMethods): if not provided, a new one will be created. :param headers: - A set of headers to include with all requests, unless other + Headers to include with all requests, unless other headers are given explicitly. Example:: diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index 2144a0e1..8e5be27f 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -20,6 +20,8 @@ RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", "status", "redirect_location"]) +_POST_REDIRECT_DOWNGRADE_STATUSES = set([301, 302, 303]) + class Retry(object): """ Retry configuration. @@ -320,7 +322,7 @@ class Retry(object): return method if status == 303: return 'GET' - if method == 'POST' and status in set([301, 302, 303]): + if method == 'POST' and status in _POST_REDIRECT_DOWNGRADE_STATUSES: return 'GET' return method diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py index 1a29978a..3b323dca 100644 --- a/urllib3/util/sessioncontext.py +++ b/urllib3/util/sessioncontext.py @@ -21,7 +21,8 @@ class DefaultCookiePolicy(PythonCookiePolicy): class CookieJar(PythonCookieJar): def __init__(self, policy=None): - policy = policy or DefaultCookiePolicy() + if policy is None: + policy = DefaultCookiePolicy() # Old-style class on Python 2 PythonCookieJar.__init__(self, policy=policy) @@ -30,8 +31,7 @@ class CookieJar(PythonCookieJar): Add correct Cookie: header to Request object. This is copied from and slightly modified from the stdlib version. """ - self._cookies_lock.acquire() - try: + with self._cookies_lock: self._policy._now = self._now = int(time.time()) cookies = self._cookies_for_request(request) attrs = self._cookie_attrs(cookies) @@ -39,8 +39,7 @@ class CookieJar(PythonCookieJar): # and only if it's not there already. We're less picky. if attrs: request.add_cookies(*attrs) - finally: - self._cookies_lock.release() + self.clear_expired_cookies() -- cgit v1.2.1 From b7a8a7504dfddf8e1388a9ffe46e1026e911c39a Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Mon, 11 Jul 2016 10:20:30 -0400 Subject: Continue fixing review items --- test/test_sessionmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_sessionmanager.py b/test/test_sessionmanager.py index dd214bc8..e76f5400 100644 --- a/test/test_sessionmanager.py +++ b/test/test_sessionmanager.py @@ -78,7 +78,7 @@ class TestSessionContext(unittest.TestCase): self.context.extract_from(resp, req) self.assertTrue(self.context.cookie_jar) for each in self.context.cookie_jar: - self.assertEqual(repr(each), repr(expected_cookie)) + self.assertEqual(each.__dict__, expected_cookie.__dict__) def test_apply_cookie(self): """ -- cgit v1.2.1 From e9dd46876ae2e1933b185a9446e5cdec43a585b0 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Mon, 11 Jul 2016 10:36:10 -0400 Subject: Fixing flake8 nit --- urllib3/util/retry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index 8e5be27f..edbfc873 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -22,6 +22,7 @@ RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", _POST_REDIRECT_DOWNGRADE_STATUSES = set([301, 302, 303]) + class Retry(object): """ Retry configuration. -- cgit v1.2.1 From 5361875dd4caea1f1e31e4e2d48ffa5e81c9ed89 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Fri, 2 Sep 2016 17:29:01 -0400 Subject: Switching to easier method of creating ProxySessionManager --- test/test_sessionmanager.py | 42 ++++++++++++++++++++-------- test/with_dummyserver/test_sessionmanager.py | 4 +-- urllib3/sessionmanager.py | 15 ++++++++-- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/test/test_sessionmanager.py b/test/test_sessionmanager.py index e76f5400..ae6893c1 100644 --- a/test/test_sessionmanager.py +++ b/test/test_sessionmanager.py @@ -1,6 +1,6 @@ import unittest -from urllib3.sessionmanager import SessionManager +from urllib3.sessionmanager import SessionManager, ProxySessionManager from urllib3.poolmanager import ProxyManager, PoolManager from urllib3.util.sessioncontext import DefaultCookiePolicy, CookieJar, SessionContext from urllib3.packages import six @@ -13,7 +13,7 @@ cookielib = six.moves.http_cookiejar class TestSessionManager(unittest.TestCase): def setUp(self): - self.manager = SessionManager(PoolManager()) + self.manager = SessionManager() def test_set_cookie_policy(self): """ @@ -26,19 +26,19 @@ class TestSessionManager(unittest.TestCase): def test_create_proxy_manager(self): """ - Make sure that when we pass a ProxyManager in, we use it. + Make sure that the ProxySessionManager has its `manager` attribute + set correctly """ - pm = ProxyManager(proxy_url='http://none') - manager = SessionManager(pm) - self.assertTrue(manager.manager is pm) + manager = ProxySessionManager(proxy_url='http://none') + self.assertTrue(isinstance(manager.manager, ProxyManager)) def test_creates_pool_manager(self): """ - Make sure that when we pass a PoolManager in, we use it. + Make sure that when we just create a SessionManager, the embedded manager + is of the PoolManager type. """ - pm = PoolManager() - manager = SessionManager(pm) - self.assertTrue(manager.manager is pm) + manager = SessionManager() + self.assertTrue(isinstance(manager.manager, PoolManager)) def test_with_external_jar(self): """ @@ -47,9 +47,29 @@ class TestSessionManager(unittest.TestCase): this_policy = cookielib.DefaultCookiePolicy() jar = CookieJar(policy=this_policy) context = SessionContext(cookie_jar=jar) - manager = SessionManager(PoolManager(), context=context) + manager = SessionManager(manager=PoolManager(), context=context) self.assertTrue(manager.context.cookie_jar is jar) + def test_with_own_manager(self): + """ + Make sure that when we pass our own manager instance in, + it gets used. + """ + manager = 18743 + sm = SessionManager(manager=manager) + self.assertEqual(manager, sm.manager) + + def test_with_own_manager_class(self): + """ + If we subclass SessionManager and give it a new manager class, + check that we instantiate that class on creation of a new instance. + """ + class PointlessSessionManager(SessionManager): + + manager_class = int + + psm = PointlessSessionManager() + self.assertEqual(psm.manager, 0) class TestSessionContext(unittest.TestCase): diff --git a/test/with_dummyserver/test_sessionmanager.py b/test/with_dummyserver/test_sessionmanager.py index 8550c2db..165953da 100644 --- a/test/with_dummyserver/test_sessionmanager.py +++ b/test/with_dummyserver/test_sessionmanager.py @@ -17,7 +17,7 @@ class TestSessionManager(HTTPDummyServerTestCase): return 'http://' + self.host_alt + ':' + str(self.port) + route def setUp(self): - self.manager = SessionManager(PoolManager()) + self.manager = SessionManager() def test_cookie_handler(self): """ @@ -61,7 +61,7 @@ class TestSessionManager(HTTPDummyServerTestCase): successfully merged, rather than skipped """ headers = {'Cookie': 'testing_cookie=test_cookie_value'} - manager = SessionManager(PoolManager(), headers=headers) + manager = SessionManager(headers=headers) route = self.create_url('/verify_cookie') r = manager.request('GET', route) self.assertEqual(r.data, b'Received cookie') diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py index 97899a49..3df71e6d 100644 --- a/urllib3/sessionmanager.py +++ b/urllib3/sessionmanager.py @@ -1,4 +1,5 @@ from .util.retry import Retry +from .poolmanager import PoolManager, ProxyManager from .request import RequestMethods, Request from .util.sessioncontext import SessionContext @@ -35,14 +36,17 @@ class SessionManager(RequestMethods): 2 """ - def __init__(self, manager, context=None, headers=None): + + manager_class = PoolManager + + def __init__(self, context=None, headers=None, manager=None, **manager_kw): super(SessionManager, self).__init__(headers=headers) - self.manager = manager + self.manager = self.manager_class(**manager_kw) if manager is None else manager self.context = context or SessionContext() def urlopen(self, method, url, redirect=True, retries=None, **kw): """ - Same as :meth:`urllib2.poolmanager.PoolManager.urlopen` with added + Same as :meth:`urllib3.poolmanager.PoolManager.urlopen` with added request-context-managing special sauce. The received ``url`` param must be an absolute path. @@ -75,3 +79,8 @@ class SessionManager(RequestMethods): return self.redirect(response=response, method=method, retries=retries, url=url, **kw) return response + + +class ProxySessionManager(SessionManager): + + manager_class = ProxyManager -- cgit v1.2.1 From 37565e8ed412da0dcc400e3f1ab624a34cc9f5c4 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Fri, 2 Sep 2016 17:58:37 -0400 Subject: Removing merge cruft --- urllib3/poolmanager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index df661770..992a281f 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -6,8 +6,7 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown -from .packages.six.moves.urllib.parse import urljoin +from .exceptions import LocationValueError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry -- cgit v1.2.1