diff options
| author | Kenneth Reitz <me@kennethreitz.com> | 2013-08-14 13:41:08 -0700 |
|---|---|---|
| committer | Kenneth Reitz <me@kennethreitz.com> | 2013-08-14 13:41:08 -0700 |
| commit | d8268fb7b44da7b8aa225eb1ca6fbdb4f9dc2457 (patch) | |
| tree | ca8063ab7db7d2004a33c58b884f47d9937ce12a | |
| parent | d3b786399e1a1041426cd6ab6a619b58a86d6300 (diff) | |
| parent | 2e5a6746019140bf2ef3a497c30c17992de36426 (diff) | |
| download | python-requests-d8268fb7b44da7b8aa225eb1ca6fbdb4f9dc2457.tar.gz | |
Merge pull request #1515 from schlamar/https-proxy-2.0
[2.0] Https proxy support
| -rw-r--r-- | AUTHORS.rst | 1 | ||||
| -rw-r--r-- | README.rst | 1 | ||||
| -rw-r--r-- | requests/adapters.py | 43 | ||||
| -rw-r--r-- | requests/packages/urllib3/connectionpool.py | 76 | ||||
| -rw-r--r-- | requests/packages/urllib3/poolmanager.py | 107 | ||||
| -rw-r--r-- | requests/packages/urllib3/response.py | 2 |
6 files changed, 177 insertions, 53 deletions
diff --git a/AUTHORS.rst b/AUTHORS.rst index fcab91a3..4f77687d 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -133,3 +133,4 @@ Patches and Suggestions - Kevin Burke <kev@inburke.com> - Flavio Curella - David Pursehouse <david.pursehouse@gmail.com> @dpursehouse +- Marc Schlaich @schlamar @@ -54,6 +54,7 @@ Features - Multipart File Uploads - Connection Timeouts - Thread-safety +- HTTP(S) proxy support Installation diff --git a/requests/adapters.py b/requests/adapters.py index e6cb50ed..750afece 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,7 +11,7 @@ and maintain connections. import socket from .models import Response -from .packages.urllib3.poolmanager import PoolManager, ProxyManager +from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, @@ -71,6 +71,7 @@ class HTTPAdapter(BaseAdapter): pool_block=DEFAULT_POOLBLOCK): self.max_retries = max_retries self.config = {} + self.proxy_manager = {} super(HTTPAdapter, self).__init__() @@ -194,7 +195,14 @@ class HTTPAdapter(BaseAdapter): if proxy: except_on_missing_scheme(proxy) - conn = ProxyManager(self.poolmanager.connection_from_url(proxy)) + proxy_headers = self.proxy_headers(proxy) + + if not proxy in self.proxy_manager: + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers) + + conn = self.proxy_manager[proxy].connection_from_url(url) else: conn = self.poolmanager.connection_from_url(url.lower()) @@ -232,8 +240,9 @@ class HTTPAdapter(BaseAdapter): return url def add_headers(self, request, **kwargs): - """Add any headers needed by the connection. Currently this adds a - Proxy-Authorization header. + """Add any headers needed by the connection. As of v2.0 this does + nothing by default, but is left for overriding by users that subclass + the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. This should not be called from user code, and is only exposed for use when subclassing the @@ -242,12 +251,22 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to. :param kwargs: The keyword arguments from the call to send(). """ - proxies = kwargs.get('proxies', {}) + pass - if proxies is None: - proxies = {} + def proxy_headers(self, proxy): + """Returns a dictionary of the headers to add to any request sent + through a proxy. This works with urllib3 magic to ensure that they are + correctly sent to the proxy, rather than in a tunnelled request if + CONNECT is being used. - proxy = proxies.get(urlparse(request.url).scheme) + This should not be called from user code, and is only exposed for use + when subclassing the + :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. + + :param proxies: The url of the proxy being used for this request. + :param kwargs: Optional additional keyword arguments. + """ + headers = {} username, password = get_auth_from_url(proxy) if username and password: @@ -255,8 +274,10 @@ class HTTPAdapter(BaseAdapter): # to decode them. username = unquote(username) password = unquote(password) - request.headers['Proxy-Authorization'] = _basic_auth_str(username, - password) + headers['Proxy-Authorization'] = _basic_auth_str(username, + password) + + return headers def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. @@ -273,7 +294,7 @@ class HTTPAdapter(BaseAdapter): self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request, proxies=proxies) + self.add_headers(request) chunked = not (request.body is None or 'Content-Length' in request.headers) diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 030eae89..93c0b4b1 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -101,6 +101,12 @@ class VerifiedHTTPSConnection(HTTPSConnection): resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -174,12 +180,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param headers: Headers to include with all requests, unless other headers are given explicitly. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" """ scheme = 'http' def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, - block=False, headers=None): + block=False, headers=None, _proxy=None, _proxy_headers=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -188,6 +202,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.pool = self.QueueCls(maxsize) self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -526,13 +543,14 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, + _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers) + block, headers, _proxy, _proxy_headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -541,6 +559,34 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + def _prepare_conn(self, connection): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(connection, VerifiedHTTPSConnection): + connection.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + connection.ssl_version = self.ssl_version + + if self.proxy is not None: + # Python 2.7+ + try: + set_tunnel = connection.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = connection._set_tunnel + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib + # would improperly set Host: header to proxy's IP:port. + connection.connect() + + return connection + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -549,26 +595,24 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") + connection_class = HTTPSConnection + else: + connection_class = VerifiedHTTPSConnection - return HTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) + connection = connection_class(host=actual_host, port=actual_port, + strict=self.strict) - connection = VerifiedHTTPSConnection(host=self.host, - port=self.port, - strict=self.strict) - connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - - connection.ssl_version = self.ssl_version - - return connection + return self._prepare_conn(connection) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index 66ceb1b4..e7f8667e 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -13,7 +13,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme from .request import RequestMethods from .util import parse_url @@ -60,6 +60,8 @@ class PoolManager(RequestMethods): """ + proxy = None + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw @@ -99,21 +101,23 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' + port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) with self.pools.lock: - # If the scheme, host, or port doesn't match existing open connections, - # open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool - - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + pool = self._new_pool(scheme, host, port) + self.pools[pool_key] = pool return pool def connection_from_url(self, url): @@ -145,7 +149,10 @@ class PoolManager(RequestMethods): if 'headers' not in kw: kw['headers'] = self.headers - response = conn.urlopen(method, u.request_uri, **kw) + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: @@ -164,15 +171,59 @@ class PoolManager(RequestMethods): return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager): """ - Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. The ProxyManager - class will automatically set the 'Host' header if it is not provided. + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param poxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + """ - def __init__(self, proxy_pool): - self.proxy_pool = proxy_pool + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + assert self.proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http'): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme) def _set_proxy_headers(self, url, headers=None): """ @@ -187,16 +238,22 @@ class ProxyManager(RequestMethods): if headers: headers_.update(headers) - return headers_ - def urlopen(self, method, url, **kw): + def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) - return self.proxy_pool.urlopen(method, url, **kw) + u = parse_url(url) + + if u.scheme == "http": + # It's too late to set proxy headers on per-request basis for + # tunnelled HTTPS connections, should use + # constructor's proxy_headers instead. + kw['headers'] = self._set_proxy_headers(url, kw.get('headers', + self.headers)) + kw['headers'].update(self.proxy_headers) + + return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): - proxy_pool = connection_from_url(url, **pool_kw) - return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 007a5ab9..c7f93b82 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -191,7 +191,7 @@ class HTTPResponse(io.IOBase): "failed to decode it." % content_encoding, e) - if flush_decoder and self._decoder: + if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() |
