summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJorge <JALopezSilva@gmail.com>2020-03-12 01:39:39 -0700
committerGitHub <noreply@github.com>2020-03-12 12:39:39 +0400
commit8c7a43b4a4ca0c8d36d55f132daa2a43d06fe3c4 (patch)
tree1dd43f9f6c688fbcb774abd323a86c57ef784f3f /src
parent33a29c5e34ee3375cde07addeb979aba56f2ca5a (diff)
downloadurllib3-8c7a43b4a4ca0c8d36d55f132daa2a43d06fe3c4.tar.gz
Add support for HTTPS connections to proxies. (#1679)
* Add support to talk HTTPS to proxies. Currently there's no way to validate identify for the proxy you might be connecting. Proxies supporting HTTPS endpoints are becoming more common and we need to extend the support for them. When an HTTPS proxy is provided, instead of doing the HTTP CONNECT, we'll forward any requests directly to the proxy and ultimately to the destination. * Fix proxy_headers missing on HTTPS proxy connections. * blackfmt missing files. * Prevent usage of HTTPS proxies when fetching HTTPS resources. - Will be supported by default when we can do TLS within TLS. * Update proxy documentation with more information. * Renamed flag for HTTPS websites through HTTPS proxies. * Added myself to contributors. * Documentation and contributors fixes. * Removed mention that TLS in TLS is being developed as requested. * Space in between my name and the github page. * Add flag to enable HTTPS proxy support. Now that we're adding support for HTTPS proxies we want to avoid a breaking change with clients that had an improper proxy configuration. For now, we're adding a warning an defaulting to the previous behavior. In the future we'll change the behavior to enable HTTPS proxies by default. * Remove guard flag, error out on HTTPS/HTTPS. As requested in the last revision for the PR: - Removed the _enable_https_proxies flag. Instead the feature will be enabled and will error out on invalid configurations. (HTTPS + HTTPS) - Other comments: rename a method, parentheses to clarify order of operations.
Diffstat (limited to 'src')
-rw-r--r--src/urllib3/connection.py12
-rw-r--r--src/urllib3/connectionpool.py19
-rw-r--r--src/urllib3/exceptions.py5
-rw-r--r--src/urllib3/poolmanager.py74
4 files changed, 89 insertions, 21 deletions
diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py
index 6da1cf4b..ce94b256 100644
--- a/src/urllib3/connection.py
+++ b/src/urllib3/connection.py
@@ -111,7 +111,6 @@ class HTTPConnection(_HTTPConnection, object):
#: The socket options provided by the user. If no options are
#: provided, we use the default options.
self.socket_options = kw.pop("socket_options", self.default_socket_options)
-
_HTTPConnection.__init__(self, *args, **kw)
@property
@@ -174,10 +173,13 @@ class HTTPConnection(_HTTPConnection, object):
return conn
+ def _is_using_tunnel(self):
+ # Google App Engine's httplib does not define _tunnel_host
+ return getattr(self, "_tunnel_host", None)
+
def _prepare_conn(self, conn):
self.sock = conn
- # Google App Engine's httplib does not define _tunnel_host
- if getattr(self, "_tunnel_host", None):
+ if self._is_using_tunnel():
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
@@ -308,9 +310,9 @@ class HTTPSConnection(HTTPConnection):
conn = self._new_conn()
hostname = self.host
- # Google App Engine's httplib does not define _tunnel_host
- if getattr(self, "_tunnel_host", None):
+ if self._is_using_tunnel():
self.sock = conn
+
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py
index d42eb7be..a09c78f1 100644
--- a/src/urllib3/connectionpool.py
+++ b/src/urllib3/connectionpool.py
@@ -629,10 +629,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# [1] <https://github.com/urllib3/urllib3/issues/651>
release_this_conn = release_conn
- # Merge the proxy headers. Only do this in HTTP. We have to copy the
- # headers dict so we can safely change it without those changes being
- # reflected in anyone else's copy.
- if self.scheme == "http":
+ # Merge the proxy headers. Only done when not using HTTP CONNECT. We
+ # have to copy the headers dict so we can safely change it without those
+ # changes being reflected in anyone else's copy.
+ if self.scheme == "http" or (self.proxy and self.proxy.scheme == "https"):
headers = headers.copy()
headers.update(self.proxy_headers)
@@ -941,10 +941,15 @@ class HTTPSConnectionPool(HTTPConnectionPool):
def _prepare_proxy(self, conn):
"""
- Establish tunnel connection early, because otherwise httplib
- would improperly set Host: header to proxy's IP:port.
+ Establishes a tunnel connection through HTTP CONNECT.
+
+ Tunnel connection is established early because otherwise httplib would
+ improperly set Host: header to proxy's IP:port.
"""
- conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
+
+ if self.proxy.scheme != "https":
+ conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
+
conn.connect()
def _new_conn(self):
diff --git a/src/urllib3/exceptions.py b/src/urllib3/exceptions.py
index 0a74c79b..202ba58a 100644
--- a/src/urllib3/exceptions.py
+++ b/src/urllib3/exceptions.py
@@ -242,6 +242,11 @@ class ProxySchemeUnknown(AssertionError, ValueError):
super(ProxySchemeUnknown, self).__init__(message)
+class ProxySchemeUnsupported(ValueError):
+ "Fetching HTTPS resources through HTTPS proxies is unsupported"
+ pass
+
+
class HeaderParsingError(HTTPError):
"Raised by assert_header_parsing, but we convert it to a log.warning statement."
diff --git a/src/urllib3/poolmanager.py b/src/urllib3/poolmanager.py
index 242a2f82..d081d753 100644
--- a/src/urllib3/poolmanager.py
+++ b/src/urllib3/poolmanager.py
@@ -2,11 +2,18 @@ from __future__ import absolute_import
import collections
import functools
import logging
+import warnings
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
-from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
+from .exceptions import (
+ HTTPWarning,
+ LocationValueError,
+ MaxRetryError,
+ ProxySchemeUnknown,
+ ProxySchemeUnsupported,
+)
from .packages import six
from .packages.six.moves.urllib.parse import urljoin
from .request import RequestMethods
@@ -17,6 +24,12 @@ from .util.retry import Retry
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
+class InvalidProxyConfigurationWarning(HTTPWarning):
+ """Raised when a user has an HTTPS proxy without enabling HTTPS proxies."""
+
+ pass
+
+
log = logging.getLogger(__name__)
SSL_KEYWORDS = (
@@ -306,6 +319,18 @@ class PoolManager(RequestMethods):
base_pool_kwargs[key] = value
return base_pool_kwargs
+ def _proxy_requires_url_absolute_form(self, parsed_url):
+ """
+ Indicates if the proxy requires the complete destination URL in the
+ request.
+
+ Normally this is only needed when not using an HTTP CONNECT tunnel.
+ """
+ if self.proxy is None:
+ return False
+
+ return parsed_url.scheme == "http" or self.proxy.scheme == "https"
+
def urlopen(self, method, url, redirect=True, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
@@ -324,7 +349,7 @@ class PoolManager(RequestMethods):
if "headers" not in kw:
kw["headers"] = self.headers.copy()
- if self.proxy is not None and u.scheme == "http":
+ if self._proxy_requires_url_absolute_form(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
@@ -383,6 +408,12 @@ class ProxyManager(PoolManager):
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
+ :param _allow_https_proxy_to_see_traffic:
+ Allows forwarding of HTTPS requests to HTTPS proxies. The proxy will
+ have visibility of all the traffic sent. ONLY USE IF YOU KNOW WHAT
+ YOU'RE DOING. This flag might be removed at any time in any future
+ update.
+
Example:
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
>>> r1 = proxy.request('GET', 'http://google.com/')
@@ -402,6 +433,7 @@ class ProxyManager(PoolManager):
num_pools=10,
headers=None,
proxy_headers=None,
+ _allow_https_proxy_to_see_traffic=False,
**connection_pool_kw
):
@@ -412,19 +444,22 @@ class ProxyManager(PoolManager):
proxy_url.port,
)
proxy = parse_url(proxy_url)
- if not proxy.port:
- port = port_by_scheme.get(proxy.scheme, 80)
- proxy = proxy._replace(port=port)
if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)
+ if not proxy.port:
+ port = port_by_scheme.get(proxy.scheme, 80)
+ proxy = proxy._replace(port=port)
+
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
connection_pool_kw["_proxy"] = self.proxy
connection_pool_kw["_proxy_headers"] = self.proxy_headers
+ self.allow_insecure_proxy = _allow_https_proxy_to_see_traffic
+
super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
@@ -452,14 +487,35 @@ class ProxyManager(PoolManager):
headers_.update(headers)
return headers_
+ def _validate_proxy_scheme_url_selection(self, url_scheme):
+ if (
+ url_scheme == "https"
+ and self.proxy.scheme == "https"
+ and not self.allow_insecure_proxy
+ ):
+ warnings.warn(
+ "Your proxy configuration specified an HTTPS scheme for the proxy. "
+ "Are you sure you want to use HTTPS to contact the proxy? "
+ "This most likely indicates an error in your configuration."
+ "If you are sure you want use HTTPS to contact the proxy, enable "
+ "the _allow_https_proxy_to_see_traffic.",
+ InvalidProxyConfigurationWarning,
+ )
+
+ raise ProxySchemeUnsupported(
+ "Contacting HTTPS destinations through HTTPS proxies is not supported."
+ )
+
def urlopen(self, method, url, redirect=True, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
u = parse_url(url)
+ self._validate_proxy_scheme_url_selection(u.scheme)
- if u.scheme == "http":
- # For proxied HTTPS requests, httplib sets the necessary headers
- # on the CONNECT to the proxy. For HTTP, we'll definitely
- # need to set 'Host' at the very least.
+ if u.scheme == "http" or self.proxy.scheme == "https":
+ # For connections using HTTP CONNECT, httplib sets the necessary
+ # headers on the CONNECT to the proxy. For HTTP or when talking
+ # HTTPS to the proxy, we'll definitely need to set 'Host' at the
+ # very least.
headers = kw.get("headers", self.headers)
kw["headers"] = self._set_proxy_headers(url, headers)