diff options
author | Seth Michael Larson <sethmichaellarson@gmail.com> | 2022-11-10 15:02:02 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-10 15:02:02 -0600 |
commit | e0243ada1afb46f46ac9c5c25b8d638327a6e883 (patch) | |
tree | e439ab0df1776e4f548e7d160bff0bb40ea24e5a | |
parent | c2d37391b96ef398df662086422ac960beced071 (diff) | |
download | urllib3-e0243ada1afb46f46ac9c5c25b8d638327a6e883.tar.gz |
Create BaseHTTPConnection APIs
-rw-r--r-- | .coveragerc | 2 | ||||
-rw-r--r-- | changelog/1985.removal.rst | 22 | ||||
-rw-r--r-- | ci/0005-botocore-fakesocket-settimeout.patch | 14 | ||||
-rw-r--r-- | docs/conf.py | 1 | ||||
-rw-r--r-- | notes/connection-lifecycle.md | 130 | ||||
-rw-r--r-- | notes/public-and-private-apis.md | 28 | ||||
-rw-r--r-- | noxfile.py | 1 | ||||
-rw-r--r-- | src/urllib3/__init__.py | 2 | ||||
-rw-r--r-- | src/urllib3/_base_connection.py | 171 | ||||
-rw-r--r-- | src/urllib3/_request_methods.py | 2 | ||||
-rw-r--r-- | src/urllib3/connection.py | 216 | ||||
-rw-r--r-- | src/urllib3/connectionpool.py | 172 | ||||
-rw-r--r-- | src/urllib3/response.py | 3 | ||||
-rw-r--r-- | src/urllib3/util/connection.py | 18 | ||||
-rw-r--r-- | src/urllib3/util/retry.py | 12 | ||||
-rw-r--r-- | src/urllib3/util/timeout.py | 4 | ||||
-rw-r--r-- | src/urllib3/util/url.py | 10 | ||||
-rw-r--r-- | test/test_connectionpool.py | 4 | ||||
-rw-r--r-- | test/with_dummyserver/test_connection.py | 76 | ||||
-rw-r--r-- | test/with_dummyserver/test_connectionpool.py | 41 | ||||
-rw-r--r-- | test/with_dummyserver/test_https.py | 41 | ||||
-rw-r--r-- | test/with_dummyserver/test_proxy_poolmanager.py | 2 |
22 files changed, 756 insertions, 216 deletions
diff --git a/.coveragerc b/.coveragerc index 50e2acde..4e955040 100644 --- a/.coveragerc +++ b/.coveragerc @@ -25,5 +25,5 @@ exclude_lines = .*:.* # Python \d.* .* # Abstract .* # Defensive: - if TYPE_CHECKING: + if (?:typing.)?TYPE_CHECKING: ^\s*?\.\.\.\s*$ diff --git a/changelog/1985.removal.rst b/changelog/1985.removal.rst new file mode 100644 index 00000000..7f7b51b5 --- /dev/null +++ b/changelog/1985.removal.rst @@ -0,0 +1,22 @@ +All changes below are to distance the ``urllib3.connection.HTTPConnection`` API from the standard library ``http.client.HTTPConnection`` API enough to allow future HTTP implementations. + +Added the ``scheme`` parameter to ``HTTPConnection.set_tunnel`` to configure the scheme of the origin being tunnelled to. + +Added the ``is_closed``, ``is_connected`` and ``has_connected_to_proxy`` properties to ``HTTPConnection``. + +Changed all parameters in the ``HTTPConnection`` and ``HTTPSConnection`` constructors to be keyword-only except ``host`` and ``port``. + +Changed ``urllib3.util.is_connection_dropped()`` to use ``HTTPConnection.is_connected``. + +Changed ``HTTPConnection.getresponse()`` to set the socket timeout from ``HTTPConnection.timeout`` value before reading +data from the socket. This previously was done manually by the ``HTTPConnectionPool`` calling ``HTTPConnection.sock.settimeout(...)``. + +Changed the ``_proxy_host`` property to ``_tunnel_host`` in ``HTTPConnectionPool`` to more closely match how the property is used (value in ``HTTPConnection.set_tunnel()``). + +Removed the ``_prepare_conn`` method from ``HTTPConnectionPool``. Previously this was only used to call ``HTTPSConnection.set_cert()`` by ``HTTPSConnectionPool``. + +Removed ``tls_in_tls_required`` property from ``HTTPSConnection``. This is now calculated from the ``scheme`` parameter in ``HTTPConnection.set_tunnel()``. + +Deprecated ``HTTPSConnection.set_cert()`` method. Instead pass parameters to the ``HTTPSConnection`` constructor. + +Deprecated ``HTTPConnection.request_chunked()`` method. Instead pass ``chunked=True`` to ``HTTPConnection.request()``. diff --git a/ci/0005-botocore-fakesocket-settimeout.patch b/ci/0005-botocore-fakesocket-settimeout.patch new file mode 100644 index 00000000..2a1fd310 --- /dev/null +++ b/ci/0005-botocore-fakesocket-settimeout.patch @@ -0,0 +1,14 @@ +diff --git a/tests/unit/test_awsrequest.py b/tests/unit/test_awsrequest.py +index 22bd9a7..c9d4060 100644 +--- a/tests/unit/test_awsrequest.py ++++ b/tests/unit/test_awsrequest.py +@@ -58,6 +59,9 @@ class FakeSocket: + def close(self): + pass + ++ def settimeout(self, value): ++ pass ++ + + class BytesIOWithLen(six.BytesIO): + def __len__(self): diff --git a/docs/conf.py b/docs/conf.py index a648f307..ffcd6d9c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,4 +102,5 @@ nitpick_ignore = [ ("py:class", "urllib3._request_methods.RequestMethods"), ("py:class", "urllib3.contrib.socks._TYPE_SOCKS_OPTIONS"), ("py:class", "urllib3.util.timeout._TYPE_DEFAULT"), + ("py:class", "BaseHTTPConnection"), ] diff --git a/notes/connection-lifecycle.md b/notes/connection-lifecycle.md new file mode 100644 index 00000000..e7c007fc --- /dev/null +++ b/notes/connection-lifecycle.md @@ -0,0 +1,130 @@ +# Connection lifecycle + +## Current implementation + +`HTTPConnection` should be instantiated with `host` and `port` of the +**first origin being connected to** to reach the target origin. This either means +the target origin itself or the proxy origin if one is desired. + +```python +import urllib3.connection + +# Initialize the HTTPSConnection ('https://...') +conn = urllib3.connection.HTTPSConnection( + host="example.com", + # Here you can configure other options like + # 'ssl_minimum_version', 'ca_certs', etc. +) + +# Set the connect timeout either in the +# constructor above or via the property. +conn.timeout = 3.0 # (connect timeout) +``` + +If using CONNECT tunneling with the proxy, call `HTTPConnection.set_tunnel()` +with the tunneled host, port, and headers. This should be called before calling +`HTTPConnection.connect()` or sending a request. + +```python +conn = urllib3.connection.HTTPConnection( + # Remember that the *first* origin we want to connect to should + # be configured as 'host' and 'port', *not* the target origin. + host="myproxy.net", + port=8080, + proxy="http://myproxy.net:8080" +) + +conn.set_tunnel("example.com", scheme="http", headers={"Proxy-Header": "value"}) +``` + +Connect to the first origin by calling the `HTTPConnection.connect()` method. +If an error occurs here you can check whether the error occurred during the +connection to the proxy if `HTTPConnection.has_connected_to_proxy` is false. +If the value is true then the error didn't occur while connecting to a proxy. + +```python +# Explicitly connect to the origin. This isn't +# required as sending the first request will +# automatically connect if not done explicitly. +conn.connect() +``` + +After connecting to the origin, the connection can be checked to see if `is_verified` is set to true. If not the `HTTPConnectionPool` would emit a warning. The warning only matters for when verification is disabled, because otherwise an error is raised on unverified TLS handshake. + +```python +if not conn.is_verified: + # There isn't a verified TLS connection to target origin. +if not conn.is_proxy_verified: + # There isn't a verified TLS connection to proxy origin. +``` + +If the read timeout is different from the connect timeout then the +`HTTPConnection.timeout` property can be changed at this point. + +```python +conn.timeout = 5.0 # (read timeout) +``` + +Then the HTTP request can be sent with `HTTPConnection.request()`. If a `BrokenPipeError` is raised while sending the request body it can be swallowed as a response can still be received from the origin even when the request isn't completely sent. + +```python +try: + conn.request("GET", "/") +except BrokenPipeError: + # We can still try to get a response! + +resp = conn.getresponse() +``` + +Then response headers (and other info) are read from the connection via `HTTPConnection.getresponse()` and returned as a `urllib3.HTTPResponse`. The `HTTPResponse` instance carries a reference to the `HTTPConnection` instance so the connection can be closed if the connection gets into an undefined protocol state. + +```python +assert resp.connection is conn +``` + +If pooling is in use the `HTTPConnectionPool` will set `_pool` on the `HTTPResponse` instance. This will return the connection to the pool once the response is exhausted. If retries are in use set `retries` on the `HTTPResponse` instance. + +```python +# Set by the HTTPConnectionPool before returning to the caller. +resp = conn.getresponse() +resp._pool = pool + +# This will call resp._pool._put_conn(resp.connection) +# Connection can get auto-released by exhausting. +resp.release_conn() +``` + +If any error is received from connecting to the origin, sending the request, or receiving the response, the caller will call `HTTPConnection.close()` and discard the connection. Connections can be re-used after being closed, a new TCP connection to proxies and origins will be established. + +If instead of a tunneling proxy we were using a forwarding proxy then we configure the `HTTPConnection` similarly, except instead of `set_tunnel()` we send absolute URLs to `HTTPConnection.request()`: + +```python +import urllib3.connection + +# Initialize the HTTPConnection. +conn = urllib3.connection.HTTPConnection( + host="myproxy.net", + port=8080, + proxy="http://myproxy.net:8080" +) + +# You can request HTTP or HTTPS resources over the proxy +# using the absolute URL. +conn.request("GET", "http://example.com") +resp = conn.getresponse() + +conn.request("GET", "https://example.com") +resp = conn.getresponse() +``` + +### HTTP/HTTPS/proxies + +This is how `HTTPConnection` instances will be configured and used when a `PoolManager` or `ProxyManager` receives a given config: + +- No proxy, HTTP origin -> `HTTPConnection` +- No proxy, HTTPS origin -> `HTTPSConnection` +- HTTP proxy, HTTP origin -> `HTTPConnection` in forwarding mode +- HTTP proxy, HTTPS origin -> `HTTPSConnection` in tunnel mode +- HTTPS proxy, HTTP origin -> `HTTPSConnection` in forwarding mode +- HTTPS proxy, HTTPS origin -> `HTTPSConnection` in tunnel mode +- HTTPS proxy, HTTPS origin, `ProxyConfig.use_forwarding_for_https=True` -> `HTTPSConnection` in forwarding mode diff --git a/notes/public-and-private-apis.md b/notes/public-and-private-apis.md new file mode 100644 index 00000000..6321696d --- /dev/null +++ b/notes/public-and-private-apis.md @@ -0,0 +1,28 @@ +# Public and private APIs + +## Public APIs + +- `urllib3.request()` +- `urllib3.PoolManager` +- `urllib3.ProxyManager` +- `urllib3.HTTPConnectionPool` +- `urllib3.HTTPSConnectionPool` +- `urllib3.BaseHTTPResponse` +- `urllib3.HTTPResponse` +- `urllib3.HTTPHeaderDict` +- `urllib3.filepost` +- `urllib3.fields` +- `urllib3.exceptions` +- `urllib3.contrib.*` +- `urllib3.util` + +Only public way to configure proxies is through `ProxyManager`? + +## Private APIs + +- `urllib3.connection` +- `urllib3.connection.BaseHTTPConnection` +- `urllib3.connection.BaseHTTPSConnection` +- `urllib3.connection.HTTPConnection` +- `urllib3.connection.HTTPSConnection` +- `urllib3.util.*` (submodules) @@ -95,6 +95,7 @@ def downstream_botocore(session: nox.Session) -> None: for patch in [ "0001-Mark-100-Continue-tests-as-failing.patch", "0002-Stop-relying-on-removed-DEFAULT_CIPHERS.patch", + "0005-botocore-fakesocket-settimeout.patch", ]: session.run("git", "apply", f"{root}/ci/{patch}", external=True) session.run("git", "rev-parse", "HEAD", external=True) diff --git a/src/urllib3/__init__.py b/src/urllib3/__init__.py index ec99d060..66f81506 100644 --- a/src/urllib3/__init__.py +++ b/src/urllib3/__init__.py @@ -9,9 +9,9 @@ from logging import NullHandler from typing import Any, Mapping, Optional, TextIO, Type, Union from . import exceptions +from ._base_connection import _TYPE_BODY from ._collections import HTTPHeaderDict from ._version import __version__ -from .connection import _TYPE_BODY from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url from .filepost import _TYPE_FIELDS, encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url diff --git a/src/urllib3/_base_connection.py b/src/urllib3/_base_connection.py new file mode 100644 index 00000000..7083298a --- /dev/null +++ b/src/urllib3/_base_connection.py @@ -0,0 +1,171 @@ +import typing + +from .util.connection import _TYPE_SOCKET_OPTIONS +from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT +from .util.url import Url + +_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str] + + +class ProxyConfig(typing.NamedTuple): + ssl_context: typing.Optional["ssl.SSLContext"] + use_forwarding_for_https: bool + assert_hostname: typing.Union[None, str, "Literal[False]"] + assert_fingerprint: typing.Optional[str] + + +class _ResponseOptions(typing.NamedTuple): + # TODO: Remove this in favor of a better + # HTTP request/response lifecycle tracking. + request_method: str + request_url: str + preload_content: bool + decode_content: bool + enforce_content_length: bool + + +if typing.TYPE_CHECKING: + import ssl + + from typing_extensions import Literal, Protocol + + from .response import BaseHTTPResponse + + class BaseHTTPConnection(Protocol): + default_port: typing.ClassVar[int] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + host: str + port: int + timeout: typing.Optional[ + float + ] # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved. + blocksize: int + source_address: typing.Optional[typing.Tuple[str, int]] + socket_options: typing.Optional[_TYPE_SOCKET_OPTIONS] + + proxy: typing.Optional[Url] + proxy_config: typing.Optional[ProxyConfig] + + is_verified: bool + proxy_is_verified: typing.Optional[bool] + + def __init__( + self, + host: str, + port: typing.Optional[int] = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: typing.Optional[typing.Tuple[str, int]] = None, + blocksize: int = 8192, + socket_options: typing.Optional[_TYPE_SOCKET_OPTIONS] = ..., + proxy: typing.Optional[Url] = None, + proxy_config: typing.Optional[ProxyConfig] = None, + ) -> None: + ... + + def set_tunnel( + self, + host: str, + port: typing.Optional[int] = None, + headers: typing.Optional[typing.Mapping[str, str]] = None, + scheme: str = "http", + ) -> None: + ... + + def connect(self) -> None: + ... + + def request( + self, + method: str, + url: str, + body: typing.Optional[_TYPE_BODY] = None, + headers: typing.Optional[typing.Mapping[str, str]] = None, + # We know *at least* botocore is depending on the order of the + # first 3 parameters so to be safe we only mark the later ones + # as keyword-only to ensure we have space to extend. + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: + ... + + def getresponse(self) -> "BaseHTTPResponse": + ... + + def close(self) -> None: + ... + + @property + def is_closed(self) -> bool: + """Whether the connection either is brand new or has been previously closed. + If this property is True then both ``is_connected`` and ``has_connected_to_proxy`` + properties must be False. + """ + + @property + def is_connected(self) -> bool: + """Whether the connection is actively connected to any origin (proxy or target)""" + + @property + def has_connected_to_proxy(self) -> bool: + """Whether the connection has successfully connected to its proxy. + This returns False if no proxy is in use. Used to determine whether + errors are coming from the proxy layer or from tunnelling to the target origin. + """ + + class BaseHTTPSConnection(BaseHTTPConnection, Protocol): + default_port: typing.ClassVar[int] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + # Certificate verification methods + cert_reqs: typing.Optional[typing.Union[int, str]] + assert_hostname: typing.Union[None, str, "Literal[False]"] + assert_fingerprint: typing.Optional[str] + ssl_context: typing.Optional[ssl.SSLContext] + + # Trusted CAs + ca_certs: typing.Optional[str] + ca_cert_dir: typing.Optional[str] + ca_cert_data: typing.Union[None, str, bytes] + + # TLS version + ssl_minimum_version: typing.Optional[int] + ssl_maximum_version: typing.Optional[int] + ssl_version: typing.Optional[typing.Union[int, str]] # Deprecated + + # Client certificates + cert_file: typing.Optional[str] + key_file: typing.Optional[str] + key_password: typing.Optional[str] + + def __init__( + self, + host: str, + port: typing.Optional[int] = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: typing.Optional[typing.Tuple[str, int]] = None, + blocksize: int = 8192, + socket_options: typing.Optional[_TYPE_SOCKET_OPTIONS] = ..., + proxy: typing.Optional[Url] = None, + proxy_config: typing.Optional[ProxyConfig] = None, + cert_reqs: typing.Optional[typing.Union[int, str]] = None, + assert_hostname: typing.Union[None, str, "Literal[False]"] = None, + assert_fingerprint: typing.Optional[str] = None, + server_hostname: typing.Optional[str] = None, + ssl_context: typing.Optional["ssl.SSLContext"] = None, + ca_certs: typing.Optional[str] = None, + ca_cert_dir: typing.Optional[str] = None, + ca_cert_data: typing.Union[None, str, bytes] = None, + ssl_minimum_version: typing.Optional[int] = None, + ssl_maximum_version: typing.Optional[int] = None, + ssl_version: typing.Optional[typing.Union[int, str]] = None, # Deprecated + cert_file: typing.Optional[str] = None, + key_file: typing.Optional[str] = None, + key_password: typing.Optional[str] = None, + ) -> None: + ... diff --git a/src/urllib3/_request_methods.py b/src/urllib3/_request_methods.py index d4bada0d..df65b152 100644 --- a/src/urllib3/_request_methods.py +++ b/src/urllib3/_request_methods.py @@ -2,8 +2,8 @@ import json as _json from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Union from urllib.parse import urlencode +from ._base_connection import _TYPE_BODY from ._collections import HTTPHeaderDict -from .connection import _TYPE_BODY from .filepost import _TYPE_FIELDS, encode_multipart_formdata from .response import BaseHTTPResponse diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index 679c4514..1eb9b54c 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -9,12 +9,8 @@ from http.client import HTTPException as HTTPException # noqa: F401 from http.client import ResponseNotReady from socket import timeout as SocketTimeout from typing import ( - IO, TYPE_CHECKING, - Any, - Callable, ClassVar, - Iterable, Mapping, NamedTuple, Optional, @@ -34,6 +30,7 @@ from ._collections import HTTPHeaderDict from .util.response import assert_header_parsing from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT, Timeout from .util.util import to_str +from .util.wait import wait_for_read try: # Compiled with SSL? import ssl @@ -46,6 +43,9 @@ except (ImportError, AttributeError): pass +from ._base_connection import _TYPE_BODY +from ._base_connection import ProxyConfig as ProxyConfig +from ._base_connection import _ResponseOptions as _ResponseOptions from ._version import __version__ from .exceptions import ( ConnectTimeoutError, @@ -83,25 +83,6 @@ RECENT_DATE = datetime.date(2022, 1, 1) _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") -_TYPE_BODY = Union[bytes, IO[Any], Iterable[bytes], str] - - -class ProxyConfig(NamedTuple): - ssl_context: Optional["ssl.SSLContext"] - use_forwarding_for_https: bool - assert_hostname: Union[None, str, "Literal[False]"] - assert_fingerprint: Optional[str] - - -class _ResponseOptions(NamedTuple): - # TODO: Remove this in favor of a better - # HTTP request/response lifecycle tracking. - request_method: str - request_url: str - preload_content: bool - decode_content: bool - enforce_content_length: bool - class HTTPConnection(_HTTPConnection): """ @@ -128,33 +109,36 @@ class HTTPConnection(_HTTPConnection): Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ - default_port: int = port_by_scheme["http"] + default_port: ClassVar[int] = port_by_scheme["http"] # type: ignore[misc] #: Disable Nagle's algorithm by default. #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` - default_socket_options: connection._TYPE_SOCKET_OPTIONS = [ + default_socket_options: ClassVar[connection._TYPE_SOCKET_OPTIONS] = [ (socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) ] #: Whether this connection verifies the host's certificate. is_verified: bool = False - #: Whether this proxy connection (if used) verifies the proxy host's - #: certificate. If no HTTPS proxy is being used will be ``None``. + #: Whether this proxy connection verified the proxy host's certificate. + # If no proxy is currently connected to the value will be ``None``. proxy_is_verified: Optional[bool] = None blocksize: int source_address: Optional[Tuple[str, int]] socket_options: Optional[connection._TYPE_SOCKET_OPTIONS] - _tunnel_host: Optional[str] - _tunnel: ClassVar[Callable[["HTTPConnection"], None]] - _connecting_to_proxy: bool + + _has_connected_to_proxy: bool _response_options: Optional[_ResponseOptions] + _tunnel_host: Optional[str] + _tunnel_port: Optional[int] + _tunnel_scheme: Optional[str] def __init__( self, host: str, port: Optional[int] = None, + *, timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, source_address: Optional[Tuple[str, int]] = None, blocksize: int = 8192, @@ -164,15 +148,6 @@ class HTTPConnection(_HTTPConnection): proxy: Optional[Url] = None, proxy_config: Optional[ProxyConfig] = None, ) -> None: - # Pre-set source_address. - self.source_address = source_address - - self.socket_options = socket_options - - # Proxy options provided by the user. - self.proxy = proxy - self.proxy_config = proxy_config - super().__init__( host=host, port=port, @@ -180,9 +155,15 @@ class HTTPConnection(_HTTPConnection): source_address=source_address, blocksize=blocksize, ) + self.socket_options = socket_options + self.proxy = proxy + self.proxy_config = proxy_config - self._connecting_to_proxy = False + self._has_connected_to_proxy = False self._response_options = None + self._tunnel_host: Optional[str] = None + self._tunnel_port: Optional[int] = None + self._tunnel_scheme: Optional[str] = None # https://github.com/python/mypy/issues/4125 # Mypy treats this as LSP violation, which is considered a bug. @@ -223,7 +204,6 @@ class HTTPConnection(_HTTPConnection): :return: New socket connection. """ - try: sock = connection.create_connection( (self._dns_host, self.port), @@ -246,25 +226,62 @@ class HTTPConnection(_HTTPConnection): return sock - def _is_using_tunnel(self) -> Optional[str]: - return self._tunnel_host + def set_tunnel( + self, + host: str, + port: Optional[int] = None, + headers: Optional[Mapping[str, str]] = None, + scheme: str = "http", + ) -> None: + if scheme not in ("http", "https"): + raise ValueError( + f"Invalid proxy scheme for tunneling: {scheme!r}, must be either 'http' or 'https'" + ) + super().set_tunnel(host, port=port, headers=headers) + self._tunnel_scheme = scheme + + def connect(self) -> None: + self.sock = self._new_conn() + if self._tunnel_host: + # If we're tunneling it means we're connected to our proxy. + self._has_connected_to_proxy = True - def _prepare_conn(self, conn: socket.socket) -> None: - self.sock = conn - if self._is_using_tunnel(): # TODO: Fix tunnel so it doesn't depend on self.sock state. - self._tunnel() - self._connecting_to_proxy = False + self._tunnel() # type: ignore[attr-defined] - def connect(self) -> None: - self._connecting_to_proxy = bool(self.proxy) - conn = self._new_conn() - self._prepare_conn(conn) - self._connecting_to_proxy = False + # If there's a proxy to be connected to we are fully connected. + # This is set twice (once above and here) due to forwarding proxies + # not using tunnelling. + self._has_connected_to_proxy = bool(self.proxy) + + @property + def is_closed(self) -> bool: + return self.sock is None + + @property + def is_connected(self) -> bool: + if self.sock is None: + return False + return not wait_for_read(self.sock, timeout=0.0) + + @property + def has_connected_to_proxy(self) -> bool: + return self._has_connected_to_proxy def close(self) -> None: - self._connecting_to_proxy = False - super().close() + try: + super().close() + finally: + # Reset all stateful properties so connection + # can be re-used without leaking prior configs. + self.sock = None + self.is_verified = False + self.proxy_is_verified = None + self._has_connected_to_proxy = False + self._response_options = None + self._tunnel_host = None + self._tunnel_port = None + self._tunnel_scheme = None def putrequest( self, @@ -306,6 +323,7 @@ class HTTPConnection(_HTTPConnection): url: str, body: Optional[_TYPE_BODY] = None, headers: Optional[Mapping[str, str]] = None, + *, chunked: bool = False, preload_content: bool = True, decode_content: bool = True, @@ -403,6 +421,12 @@ class HTTPConnection(_HTTPConnection): Alternative to the common request method, which sends the body with chunked encoding and not as one block """ + warnings.warn( + "HTTPConnection.request_chunked() is deprecated and will be removed in a " + "future version. Instead use HTTPConnection.request(..., chunked=True).", + category=DeprecationWarning, + stacklevel=2, + ) self.request(method, url, body=body, headers=headers, chunked=True) def getresponse( # type: ignore[override] @@ -423,6 +447,10 @@ class HTTPConnection(_HTTPConnection): resp_options = self._response_options self._response_options = None + # Since the connection's timeout value may have been updated + # we need to set the timeout on the socket. + self.sock.settimeout(self.timeout) + # This is needed here to avoid circular import errors from .response import HTTPResponse @@ -463,7 +491,7 @@ class HTTPSConnection(HTTPConnection): socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. """ - default_port = port_by_scheme["https"] + default_port = port_by_scheme["https"] # type: ignore[misc] cert_reqs: Optional[Union[int, str]] = None ca_certs: Optional[str] = None @@ -473,18 +501,13 @@ class HTTPSConnection(HTTPConnection): ssl_minimum_version: Optional[int] = None ssl_maximum_version: Optional[int] = None assert_fingerprint: Optional[str] = None - tls_in_tls_required: bool = False def __init__( self, host: str, port: Optional[int] = None, - key_file: Optional[str] = None, - cert_file: Optional[str] = None, - key_password: Optional[str] = None, + *, timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, - ssl_context: Optional["ssl.SSLContext"] = None, - server_hostname: Optional[str] = None, source_address: Optional[Tuple[str, int]] = None, blocksize: int = 8192, socket_options: Optional[ @@ -492,6 +515,20 @@ class HTTPSConnection(HTTPConnection): ] = HTTPConnection.default_socket_options, proxy: Optional[Url] = None, proxy_config: Optional[ProxyConfig] = None, + cert_reqs: Optional[Union[int, str]] = None, + assert_hostname: Union[None, str, "Literal[False]"] = None, + assert_fingerprint: Optional[str] = None, + server_hostname: Optional[str] = None, + ssl_context: Optional["ssl.SSLContext"] = None, + ca_certs: Optional[str] = None, + ca_cert_dir: Optional[str] = None, + ca_cert_data: Union[None, str, bytes] = None, + ssl_minimum_version: Optional[int] = None, + ssl_maximum_version: Optional[int] = None, + ssl_version: Optional[Union[int, str]] = None, # Deprecated + cert_file: Optional[str] = None, + key_file: Optional[str] = None, + key_password: Optional[str] = None, ) -> None: super().__init__( @@ -510,9 +547,22 @@ class HTTPSConnection(HTTPConnection): self.key_password = key_password self.ssl_context = ssl_context self.server_hostname = server_hostname - self.ssl_version = None - self.ssl_minimum_version = None - self.ssl_maximum_version = None + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data + + # cert_reqs depends on ssl_context so calculate last. + if cert_reqs is None: + if self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + else: + cert_reqs = resolve_cert_reqs(None) + self.cert_reqs = cert_reqs def set_cert( self, @@ -529,6 +579,14 @@ class HTTPSConnection(HTTPConnection): """ This method should only be called once, before the connection is used. """ + warnings.warn( + "HTTPSConnection.set_cert() is deprecated and will be removed in a " + "future version. Instead provide the parameters to the HTTPSConnection " + "constructor.", + category=DeprecationWarning, + stacklevel=2, + ) + # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also # have an SSLContext object in which case we'll use its verify_mode. if cert_reqs is None: @@ -548,25 +606,24 @@ class HTTPSConnection(HTTPConnection): self.ca_cert_data = ca_cert_data def connect(self) -> None: - self._connecting_to_proxy = bool(self.proxy) - sock: Union[socket.socket, "ssl.SSLSocket"] self.sock = sock = self._new_conn() server_hostname: str = self.host tls_in_tls = False - if self._is_using_tunnel(): - if self.tls_in_tls_required: + # Do we need to establish a tunnel? + if self._tunnel_host is not None: + # We're tunneling to an HTTPS origin so need to do TLS-in-TLS. + if self._tunnel_scheme == "https": self.sock = sock = self._connect_tls_proxy(self.host, sock) tls_in_tls = True - self._connecting_to_proxy = False + # If we're tunneling it means we're connected to our proxy. + self._has_connected_to_proxy = True - self._tunnel() + self._tunnel() # type: ignore[attr-defined] # Override the host with the one we're requesting data from. - server_hostname = cast( - str, self._tunnel_host - ) # self._tunnel_host is not None, because self._is_using_tunnel() returned a truthy value. + server_hostname = self._tunnel_host if self.server_hostname is not None: server_hostname = self.server_hostname @@ -601,15 +658,18 @@ class HTTPSConnection(HTTPConnection): ) self.sock = sock_and_verified.socket self.is_verified = sock_and_verified.is_verified - self._connecting_to_proxy = False + + # If there's a proxy to be connected to we are fully connected. + # This is set twice (once above and here) due to forwarding proxies + # not using tunnelling. + self._has_connected_to_proxy = bool(self.proxy) def _connect_tls_proxy(self, hostname: str, sock: socket.socket) -> "ssl.SSLSocket": """ Establish a TLS connection to the proxy using the provided SSL context. """ - proxy_config = cast( - ProxyConfig, self.proxy_config - ) # `_connect_tls_proxy` is called when self._is_using_tunnel() is truthy. + # `_connect_tls_proxy` is called when self._tunnel_host is truthy. + proxy_config = cast(ProxyConfig, self.proxy_config) ssl_context = proxy_config.ssl_context sock_and_verified = _ssl_wrap_socket_and_match_hostname( sock, diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index 35c5c23b..9d520661 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -8,9 +8,9 @@ from socket import timeout as SocketTimeout from types import TracebackType from typing import TYPE_CHECKING, Any, Mapping, Optional, Type, TypeVar, Union, overload +from ._base_connection import _TYPE_BODY from ._request_methods import RequestMethods from .connection import ( - _TYPE_BODY, BaseSSLError, BrokenPipeError, DummyConnection, @@ -18,7 +18,6 @@ from .connection import ( HTTPException, HTTPSConnection, ProxyConfig, - VerifiedHTTPSConnection, _wrap_proxy_error, ) from .connection import port_by_scheme as port_by_scheme @@ -37,7 +36,7 @@ from .exceptions import ( SSLError, TimeoutError, ) -from .response import HTTPResponse +from .response import BaseHTTPResponse from .util.connection import is_connection_dropped from .util.proxy import connection_requires_http_tunnel from .util.request import _TYPE_BODY_POSITION, set_file_position @@ -54,6 +53,8 @@ if TYPE_CHECKING: from typing_extensions import Literal + from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection + log = logging.getLogger(__name__) _TYPE_TIMEOUT = Union[Timeout, float, _TYPE_DEFAULT] @@ -81,9 +82,14 @@ class ConnectionPool: raise LocationValueError("No host specified.") self.host = _normalize_host(host, scheme=self.scheme) - self._proxy_host = host.lower() self.port = port + # This property uses 'normalize_host()' (not '_normalize_host()') + # to avoid removing square braces around IPv6 addresses. + # This value is sent to `HTTPConnection.set_tunnel()` if called + # because square braces are required for HTTP CONNECT tunneling. + self._tunnel_host = normalize_host(host, scheme=self.scheme).lower() + def __str__(self) -> str: return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})" @@ -164,8 +170,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = "http" - ConnectionCls: Type[Union[HTTPConnection, HTTPSConnection]] = HTTPConnection - ResponseCls = HTTPResponse + ConnectionCls: Union[ + Type["BaseHTTPConnection"], Type["BaseHTTPSConnection"] + ] = HTTPConnection def __init__( self, @@ -228,7 +235,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # HTTPConnectionPool object is garbage collected. weakref.finalize(self, _close_pool_connections, pool) - def _new_conn(self) -> HTTPConnection: + def _new_conn(self) -> "BaseHTTPConnection": """ Return a fresh :class:`HTTPConnection`. """ @@ -248,7 +255,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): ) return conn - def _get_conn(self, timeout: Optional[float] = None) -> HTTPConnection: + def _get_conn(self, timeout: Optional[float] = None) -> "BaseHTTPConnection": """ Get a connection. Will return a pooled connection if one is available. @@ -286,7 +293,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): return conn or self._new_conn() - def _put_conn(self, conn: Optional[HTTPConnection]) -> None: + def _put_conn(self, conn: Optional["BaseHTTPConnection"]) -> None: """ Put a connection back into the pool. @@ -330,13 +337,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn: conn.close() - def _validate_conn(self, conn: HTTPConnection) -> None: + def _validate_conn(self, conn: "BaseHTTPConnection") -> None: """ Called right before a request is made, after the socket is created. """ pass - def _prepare_proxy(self, conn: HTTPConnection) -> None: + def _prepare_proxy(self, conn: "BaseHTTPConnection") -> None: # Nothing to do for HTTP connections. pass @@ -373,7 +380,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request( self, - conn: HTTPConnection, + conn: "BaseHTTPConnection", method: str, url: str, body: Optional[_TYPE_BODY] = None, @@ -381,11 +388,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): retries: Optional[Retry] = None, timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, chunked: bool = False, - response_conn: Optional[HTTPConnection] = None, + response_conn: Optional["BaseHTTPConnection"] = None, preload_content: bool = True, decode_content: bool = True, enforce_content_length: bool = True, - ) -> HTTPResponse: + ) -> BaseHTTPResponse: """ Perform a request on a given urllib connection object taken from our pool. @@ -476,9 +483,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): new_e: Exception = e if isinstance(e, (BaseSSLError, CertificateError)): new_e = SSLError(e) + # If the connection didn't successfully connect to it's proxy + # then there if isinstance( new_e, (OSError, NewConnectionError, TimeoutError, SSLError) - ) and (conn and conn._connecting_to_proxy and conn.proxy): + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) raise new_e @@ -511,7 +520,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - if conn.sock: + if not conn.is_closed: # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -521,7 +530,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, f"Read timed out. (read timeout={read_timeout})" ) - conn.sock.settimeout(read_timeout) + conn.timeout = read_timeout # Receive the response from the server try: @@ -532,11 +541,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Set properties that are used by the pooling layer. response.retries = retries - response._connection = response_conn - response._pool = self + response._connection = response_conn # type: ignore[attr-defined] + response._pool = self # type: ignore[attr-defined] log.debug( - '%s://%s:%s "%s %s %s" %s %s', + '%s://%s:%s "%s %s %s" %s', self.scheme, self.host, self.port, @@ -545,7 +554,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # HTTP version conn._http_vsn_str, # type: ignore[attr-defined] response.status, - response.length_remaining, + response.length_remaining, # type: ignore[attr-defined] ) return response @@ -598,8 +607,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn: Optional[bool] = None, chunked: bool = False, body_pos: Optional[_TYPE_BODY_POSITION] = None, + preload_content: bool = True, + decode_content: bool = True, **response_kw: Any, - ) -> HTTPResponse: + ) -> BaseHTTPResponse: """ Get a connection from the pool and perform an HTTP request. This is the lowest level call for making a request, so you'll need to specify all @@ -668,6 +679,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): block for ``pool_timeout`` seconds and raise EmptyPoolError if no connection is available within the time period. + :param bool preload_content: + If True, the response's body will be preloaded into memory. + + :param bool decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + :param release_conn: If False, then the urlopen call will not release the connection back into the pool once a response is received (but will release if @@ -675,10 +693,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): `preload_content=True`). This is useful if you're not preloading the response's content immediately. You will need to call ``r.release_conn()`` on the response ``r`` to return the connection - back into the pool. If None, it takes the value of - ``response_kw.get('preload_content', True)``. + back into the pool. If None, it takes the value of ``preload_content`` + which defaults to ``True``. - :param chunked: + :param bool chunked: If True, urllib3 will send the body using chunked transfer encoding. Otherwise, urllib3 will send the body using the standard content-length form. Defaults to False. @@ -687,12 +705,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Position to seek to in file-like body in the event of a retry or redirect. Typically this won't need to be set because urllib3 will auto-populate the value when needed. - - :param \\**response_kw: - Additional parameters are passed to - :meth:`urllib3.connection.HTTPConnection.getresponse` """ - parsed_url = parse_url(url) destination_scheme = parsed_url.scheme @@ -703,7 +716,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: - release_conn = response_kw.get("preload_content", True) + release_conn = preload_content # Check host if assert_same_host and not self.is_same_host(url): @@ -758,20 +771,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment] - is_new_proxy_conn = self.proxy is not None and not getattr( - conn, "sock", None - ) - if is_new_proxy_conn: - assert isinstance(self.proxy, Url) - conn._connecting_to_proxy = True - if http_tunnel_required: - try: - self._prepare_proxy(conn) - except (BaseSSLError, OSError, SocketTimeout) as e: - self._raise_timeout( - err=e, url=self.proxy.url, timeout_value=conn.timeout - ) - raise + # Is this a closed/new connection that requires CONNECT tunnelling? + if self.proxy is not None and http_tunnel_required and conn.is_closed: + try: + self._prepare_proxy(conn) + except (BaseSSLError, OSError, SocketTimeout) as e: + self._raise_timeout( + err=e, url=self.proxy.url, timeout_value=conn.timeout + ) + raise # If we're going to release the connection in ``finally:``, then # the response doesn't need to know about the connection. Otherwise @@ -790,6 +798,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): chunked=chunked, retries=retries, response_conn=response_conn, + preload_content=preload_content, + decode_content=decode_content, **response_kw, ) @@ -827,7 +837,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): SSLError, HTTPException, ), - ) and (conn and conn._connecting_to_proxy and conn.proxy): + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) elif isinstance(new_e, (OSError, HTTPException)): new_e = ProtocolError("Connection aborted.", new_e) @@ -875,6 +885,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, chunked=chunked, body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, **response_kw, ) @@ -908,6 +920,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, chunked=chunked, body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, **response_kw, ) @@ -938,6 +952,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, chunked=chunked, body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, **response_kw, ) @@ -959,7 +975,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = "https" - ConnectionCls = HTTPSConnection + ConnectionCls: Type["BaseHTTPSConnection"] = HTTPSConnection def __init__( self, @@ -1011,44 +1027,22 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, conn: HTTPSConnection) -> HTTPConnection: - """ - Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` - and establish the tunnel if proxy is used. - """ - - if isinstance(conn, VerifiedHTTPSConnection): - conn.set_cert( - key_file=self.key_file, - key_password=self.key_password, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - ca_cert_dir=self.ca_cert_dir, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint, - ) - conn.ssl_version = self.ssl_version - conn.ssl_minimum_version = self.ssl_minimum_version - conn.ssl_maximum_version = self.ssl_maximum_version - - return conn - def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override] - """ - Establishes a tunnel connection through HTTP CONNECT. - - Tunnel connection is established early because otherwise httplib would - improperly set Host: header to proxy's IP:port. - """ - conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) - + """Establishes a tunnel connection through HTTP CONNECT.""" if self.proxy and self.proxy.scheme == "https": - conn.tls_in_tls_required = True + tunnel_scheme = "https" + else: + tunnel_scheme = "http" + conn.set_tunnel( + scheme=tunnel_scheme, + host=self._tunnel_host, + port=self.port, + headers=self.proxy_headers, + ) conn.connect() - def _new_conn(self) -> HTTPConnection: + def _new_conn(self) -> "BaseHTTPSConnection": """ Return a fresh :class:`urllib3.connection.HTTPConnection`. """ @@ -1060,7 +1054,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.port or "443", ) - if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap, truthy-function] + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap] raise ImportError( "Can't connect to HTTPS URL because the SSL module is not available." ) @@ -1071,26 +1065,32 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - conn = self.ConnectionCls( + return self.ConnectionCls( host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, cert_file=self.cert_file, key_file=self.key_file, key_password=self.key_password, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, **self.conn_kw, ) - return self._prepare_conn(conn) - - def _validate_conn(self, conn: HTTPConnection) -> None: + def _validate_conn(self, conn: "BaseHTTPConnection") -> None: """ Called right before a request is made, after the socket is created. """ super()._validate_conn(conn) # Force connect early to allow us to validate the connection. - if not conn.sock: + if conn.is_closed: conn.connect() if not conn.is_verified: diff --git a/src/urllib3/response.py b/src/urllib3/response.py index 501eaad0..a9951c38 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -46,8 +46,9 @@ except (AttributeError, ImportError, ValueError): # Defensive: zstd = None from . import util +from ._base_connection import _TYPE_BODY from ._collections import HTTPHeaderDict -from .connection import _TYPE_BODY, BaseSSLError, HTTPConnection, HTTPException +from .connection import BaseSSLError, HTTPConnection, HTTPException from .exceptions import ( BodyNotHttplibCompatible, DecodeError, diff --git a/src/urllib3/util/connection.py b/src/urllib3/util/connection.py index 2c5bce75..cc02af32 100644 --- a/src/urllib3/util/connection.py +++ b/src/urllib3/util/connection.py @@ -1,25 +1,21 @@ import socket -from typing import Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union from ..exceptions import LocationParseError from .timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT -from .wait import wait_for_read _TYPE_SOCKET_OPTIONS = Sequence[Tuple[int, int, Union[int, bytes]]] +if TYPE_CHECKING: + from .._base_connection import BaseHTTPConnection -def is_connection_dropped(conn: socket.socket) -> bool: # Platform-specific + +def is_connection_dropped(conn: "BaseHTTPConnection") -> bool: # Platform-specific """ Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`http.client.HTTPConnection` object. + :param conn: :class:`urllib3.connection.HTTPConnection` object. """ - sock = getattr(conn, "sock", None) - if sock is None: # Connection already closed (such as by httplib). - return True - # Returns True if readable, which here means it's been dropped - return wait_for_read(sock, timeout=0.0) + return not conn.is_connected # This function is copied from socket.py in the Python 2.7 standard diff --git a/src/urllib3/util/retry.py b/src/urllib3/util/retry.py index cc5587a7..12b1c7fb 100644 --- a/src/urllib3/util/retry.py +++ b/src/urllib3/util/retry.py @@ -28,7 +28,7 @@ from .util import reraise if TYPE_CHECKING: from ..connectionpool import ConnectionPool - from ..response import HTTPResponse + from ..response import BaseHTTPResponse log = logging.getLogger(__name__) @@ -315,7 +315,7 @@ class Retry: return seconds - def get_retry_after(self, response: "HTTPResponse") -> Optional[float]: + def get_retry_after(self, response: "BaseHTTPResponse") -> Optional[float]: """Get the value of Retry-After in seconds.""" retry_after = response.getheader("Retry-After") @@ -325,7 +325,7 @@ class Retry: return self.parse_retry_after(retry_after) - def sleep_for_retry(self, response: "HTTPResponse") -> bool: + def sleep_for_retry(self, response: "BaseHTTPResponse") -> bool: retry_after = self.get_retry_after(response) if retry_after: time.sleep(retry_after) @@ -339,7 +339,7 @@ class Retry: return time.sleep(backoff) - def sleep(self, response: Optional["HTTPResponse"] = None) -> None: + def sleep(self, response: Optional["BaseHTTPResponse"] = None) -> None: """Sleep between retry attempts. This method will respect a server's ``Retry-After`` response header @@ -422,7 +422,7 @@ class Retry: self, method: Optional[str] = None, url: Optional[str] = None, - response: Optional["HTTPResponse"] = None, + response: Optional["BaseHTTPResponse"] = None, error: Optional[Exception] = None, _pool: Optional["ConnectionPool"] = None, _stacktrace: Optional[TracebackType] = None, @@ -431,7 +431,7 @@ class Retry: :param response: A response object, or None, if the server did not return a response. - :type response: :class:`~urllib3.response.HTTPResponse` + :type response: :class:`~urllib3.response.BaseHTTPResponse` :param Exception error: An error encountered during the request, or None if the response was received successfully. diff --git a/src/urllib3/util/timeout.py b/src/urllib3/util/timeout.py index bad91cc9..1798fa9e 100644 --- a/src/urllib3/util/timeout.py +++ b/src/urllib3/util/timeout.py @@ -10,7 +10,9 @@ if TYPE_CHECKING: class _TYPE_DEFAULT(Enum): - token = 0 + # This value should never be passed to socket.settimeout() so for safety we use a -1. + # socket.settimout() raises a ValueError for negative values. + token = -1 _DEFAULT_TIMEOUT: "Final[_TYPE_DEFAULT]" = _TYPE_DEFAULT.token diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 71ff6582..c7645f78 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -290,6 +290,16 @@ def _remove_path_dot_segments(path: str) -> str: return "/".join(output) +@overload +def _normalize_host(host: None, scheme: Optional[str]) -> None: + ... + + +@overload +def _normalize_host(host: str, scheme: Optional[str]) -> str: + ... + + def _normalize_host(host: Optional[str], scheme: Optional[str]) -> Optional[str]: if host: if scheme in _NORMALIZABLE_SCHEMES: diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 4a480711..7586585b 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -583,7 +583,9 @@ class TestConnectionPool: def test_read_timeout_0_does_not_raise_bad_status_line_error(self) -> None: with HTTPConnectionPool(host="localhost", maxsize=1) as pool: - conn = Mock() + conn = Mock(spec=HTTPConnection) + # Needed to tell the pool that the connection is alive. + conn.is_closed = False with patch.object(Timeout, "read_timeout", 0): timeout = Timeout(1, 1, 1) with pytest.raises(ReadTimeoutError): diff --git a/test/with_dummyserver/test_connection.py b/test/with_dummyserver/test_connection.py index 76fe2c13..3c65e79f 100644 --- a/test/with_dummyserver/test_connection.py +++ b/test/with_dummyserver/test_connection.py @@ -26,7 +26,7 @@ def test_returns_urllib3_HTTPResponse(pool: HTTPConnectionPool) -> None: conn.request(method, path) - response: HTTPResponse = conn.getresponse() + response = conn.getresponse() assert isinstance(response, HTTPResponse) @@ -39,7 +39,7 @@ def test_does_not_release_conn(pool: HTTPConnectionPool) -> None: conn.request(method, path) - response: HTTPResponse = conn.getresponse() + response = conn.getresponse() response.release_conn() assert pool.pool.qsize() == 0 # type: ignore[union-attr] @@ -47,18 +47,19 @@ def test_does_not_release_conn(pool: HTTPConnectionPool) -> None: def test_releases_conn(pool: HTTPConnectionPool) -> None: conn = pool._get_conn() + assert conn is not None method = "GET" path = "/" conn.request(method, path) - response: HTTPResponse = conn.getresponse() + response = conn.getresponse() # If these variables are set by the pool # then the response can release the connection # back into the pool. - response._pool = pool - response._connection = conn + response._pool = pool # type: ignore[attr-defined] + response._connection = conn # type: ignore[attr-defined] response.release_conn() assert pool.pool.qsize() == 1 # type: ignore[union-attr] @@ -72,8 +73,71 @@ def test_double_getresponse(pool: HTTPConnectionPool) -> None: conn.request(method, path) - _: HTTPResponse = conn.getresponse() + _ = conn.getresponse() # Calling getrepsonse() twice should cause an error with pytest.raises(ResponseNotReady): conn.getresponse() + + +def test_connection_state_properties(pool: HTTPConnectionPool) -> None: + conn = pool._get_conn() + + assert conn.is_closed is True + assert conn.is_connected is False + assert conn.has_connected_to_proxy is False + assert conn.is_verified is False + assert conn.proxy_is_verified is None + + conn.connect() + + assert conn.is_closed is False + assert conn.is_connected is True + assert conn.has_connected_to_proxy is False + assert conn.is_verified is False + assert conn.proxy_is_verified is None + + conn.request("GET", "/") + resp = conn.getresponse() + assert resp.status == 200 + + conn.close() + + assert conn.is_closed is True + assert conn.is_connected is False + assert conn.has_connected_to_proxy is False + assert conn.is_verified is False + assert conn.proxy_is_verified is None + + +def test_set_tunnel_is_reset(pool: HTTPConnectionPool) -> None: + conn = pool._get_conn() + + assert conn.is_closed is True + assert conn.is_connected is False + assert conn.has_connected_to_proxy is False + assert conn.is_verified is False + assert conn.proxy_is_verified is None + + conn.set_tunnel(host="host", port=8080, scheme="http") + + assert conn._tunnel_host == "host" # type: ignore[attr-defined] + assert conn._tunnel_port == 8080 # type: ignore[attr-defined] + assert conn._tunnel_scheme == "http" # type: ignore[attr-defined] + + conn.close() + + assert conn._tunnel_host is None # type: ignore[attr-defined] + assert conn._tunnel_port is None # type: ignore[attr-defined] + assert conn._tunnel_scheme is None # type: ignore[attr-defined] + + +def test_invalid_tunnel_scheme(pool: HTTPConnectionPool) -> None: + conn = pool._get_conn() + + with pytest.raises(ValueError) as e: + conn.set_tunnel(host="host", port=8080, scheme="socks") + assert ( + str(e.value) + == "Invalid proxy scheme for tunneling: 'socks', must be either 'http' or 'https'" + ) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 6e38fbb3..c20ae396 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -75,9 +75,9 @@ class TestConnectionPoolTimeouts(SocketDummyServerTestCase): try: with pytest.raises(ReadTimeoutError): pool.urlopen("GET", "/") - if conn.sock: + if not conn.is_closed: with pytest.raises(socket.error): - conn.sock.recv(1024) + conn.sock.recv(1024) # type: ignore[attr-defined] finally: pool._put_conn(conn) @@ -289,7 +289,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): conn = pool._get_conn() try: pool._make_request(conn, "GET", "/") - tcp_nodelay_setting = conn.sock.getsockopt( + tcp_nodelay_setting = conn.sock.getsockopt( # type: ignore[attr-defined] socket.IPPROTO_TCP, socket.TCP_NODELAY ) assert tcp_nodelay_setting @@ -312,7 +312,8 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.port, socket_options=socket_options, ) as pool: - s = pool._new_conn()._new_conn() # Get the socket + # Get the socket of a new connection. + s = pool._new_conn()._new_conn() # type: ignore[attr-defined] try: using_keepalive = ( s.getsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE) > 0 @@ -331,7 +332,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): with HTTPConnectionPool( self.host, self.port, socket_options=socket_options ) as pool: - s = pool._new_conn()._new_conn() + s = pool._new_conn()._new_conn() # type: ignore[attr-defined] try: using_nagle = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) == 0 assert using_nagle @@ -349,7 +350,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): # Update the default socket options assert conn.socket_options is not None conn.socket_options += [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)] # type: ignore[operator] - s = conn._new_conn() + s = conn._new_conn() # type: ignore[attr-defined] nagle_disabled = ( s.getsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY) > 0 ) @@ -991,10 +992,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): pool.request("GET", "/headers", chunked=chunked) else: conn = pool._get_conn() - if chunked: - conn.request_chunked("GET", "/headers") - else: - conn.request("GET", "/headers") + conn.request("GET", "/headers", chunked=chunked) assert pool.headers == {"key": "val"} assert isinstance(pool.headers, header_type) @@ -1004,13 +1002,28 @@ class TestConnectionPool(HTTPDummyServerTestCase): pool.request("GET", "/headers", headers=headers, chunked=chunked) else: conn = pool._get_conn() - if chunked: - conn.request_chunked("GET", "/headers", headers=headers) - else: - conn.request("GET", "/headers", headers=headers) + conn.request("GET", "/headers", headers=headers, chunked=chunked) assert headers == {"key": "val"} + def test_request_chunked_is_deprecated( + self, + ) -> None: + + with HTTPConnectionPool(self.host, self.port) as pool: + conn = pool._get_conn() + + with pytest.warns(DeprecationWarning) as w: + conn.request_chunked("GET", "/headers") # type: ignore[attr-defined] + assert len(w) == 1 and str(w[0].message) == ( + "HTTPConnection.request_chunked() is deprecated and will be removed in a future version. " + "Instead use HTTPConnection.request(..., chunked=True)." + ) + + resp = conn.getresponse() + assert resp.status == 200 + assert resp.json()["Transfer-Encoding"] == "chunked" + def test_bytes_header(self) -> None: with HTTPConnectionPool(self.host, self.port) as pool: headers = {"User-Agent": "test header"} diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index a2c2067a..52b6fbff 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -33,7 +33,7 @@ from dummyserver.server import ( ) from dummyserver.testcase import HTTPSDummyServerTestCase from urllib3 import HTTPSConnectionPool -from urllib3.connection import RECENT_DATE, VerifiedHTTPSConnection +from urllib3.connection import RECENT_DATE, HTTPSConnection, VerifiedHTTPSConnection from urllib3.exceptions import ( ConnectTimeoutError, InsecureRequestWarning, @@ -342,7 +342,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): with pytest.raises(ssl.SSLError): conn.connect() - assert conn.sock + assert conn.sock is not None # type: ignore[attr-defined] finally: conn.close() @@ -459,8 +459,8 @@ class TestHTTPS(HTTPSDummyServerTestCase): # pyopenssl doesn't let you pull the server_hostname back off the # socket, so only add this assertion if the attribute is there (i.e. # the python ssl module). - if hasattr(conn.sock, "server_hostname"): - assert conn.sock.server_hostname == "localhost" + if hasattr(conn.sock, "server_hostname"): # type: ignore[attr-defined] + assert conn.sock.server_hostname == "localhost" # type: ignore[attr-defined] def test_assert_fingerprint_md5(self) -> None: with HTTPSConnectionPool( @@ -763,8 +763,33 @@ class TestHTTPS(HTTPSDummyServerTestCase): def test_set_cert_default_cert_required(self) -> None: conn = VerifiedHTTPSConnection(self.host, self.port) - conn.set_cert() + with pytest.warns(DeprecationWarning) as w: + conn.set_cert() assert conn.cert_reqs == ssl.CERT_REQUIRED + assert len(w) == 1 and str(w[0].message) == ( + "HTTPSConnection.set_cert() is deprecated and will be removed in a future version. " + "Instead provide the parameters to the HTTPSConnection constructor." + ) + + @pytest.mark.parametrize("verify_mode", [ssl.CERT_NONE, ssl.CERT_REQUIRED]) + def test_set_cert_inherits_cert_reqs_from_ssl_context( + self, verify_mode: int + ) -> None: + ssl_context = urllib3.util.ssl_.create_urllib3_context(cert_reqs=verify_mode) + assert ssl_context.verify_mode == verify_mode + + conn = HTTPSConnection(self.host, self.port, ssl_context=ssl_context) + with pytest.warns(DeprecationWarning) as w: + conn.set_cert() + + assert conn.cert_reqs == verify_mode + assert ( + conn.ssl_context is not None and conn.ssl_context.verify_mode == verify_mode + ) + assert len(w) == 1 and str(w[0].message) == ( + "HTTPSConnection.set_cert() is deprecated and will be removed in a future version. " + "Instead provide the parameters to the HTTPSConnection constructor." + ) def test_tls_protocol_name_of_socket(self) -> None: if self.tls_protocol_name is None: @@ -779,9 +804,9 @@ class TestHTTPS(HTTPSDummyServerTestCase): conn = https_pool._get_conn() try: conn.connect() - if not hasattr(conn.sock, "version"): + if not hasattr(conn.sock, "version"): # type: ignore[attr-defined] pytest.skip("SSLSocket.version() not available") - assert conn.sock.version() == self.tls_protocol_name + assert conn.sock.version() == self.tls_protocol_name # type: ignore[attr-defined] finally: conn.close() @@ -879,7 +904,7 @@ class TestHTTPS(HTTPSDummyServerTestCase): conn = https_pool._get_conn() try: conn.connect() - assert conn.sock.version() == self.tls_protocol_name # type: ignore[union-attr] + assert conn.sock.version() == self.tls_protocol_name # type: ignore[attr-defined] finally: conn.close() diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index 3f069e26..dd17a339 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -134,7 +134,7 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): conn = hc2._get_conn() try: hc2._make_request(conn, "GET", "/") - tcp_nodelay_setting = conn.sock.getsockopt( + tcp_nodelay_setting = conn.sock.getsockopt( # type: ignore[attr-defined] socket.IPPROTO_TCP, socket.TCP_NODELAY ) assert tcp_nodelay_setting == 0, ( |