diff options
| author | Kenneth Reitz <me@kennethreitz.org> | 2018-03-16 19:06:14 -0400 |
|---|---|---|
| committer | Kenneth Reitz <me@kennethreitz.org> | 2018-03-16 19:06:14 -0400 |
| commit | 32540f9bceb3781753de0c449cf4b815944ea8a6 (patch) | |
| tree | 3e0d52ff92aee8de6fb40370a0ebd6456c7db088 /requests3/core | |
| parent | c6464c17700e204df7b2b318a7129c53c20a3e62 (diff) | |
| download | python-requests-32540f9bceb3781753de0c449cf4b815944ea8a6.tar.gz | |
requests3
Diffstat (limited to 'requests3/core')
53 files changed, 13130 insertions, 0 deletions
diff --git a/requests3/core/__init__.py b/requests3/core/__init__.py new file mode 100644 index 00000000..23889b5c --- /dev/null +++ b/requests3/core/__init__.py @@ -0,0 +1,3 @@ +from .api import AsyncPoolManager +from .api import request, blocking_request +from .import http_manager diff --git a/requests3/core/api.py b/requests3/core/api.py new file mode 100644 index 00000000..3950df04 --- /dev/null +++ b/requests3/core/api.py @@ -0,0 +1,51 @@ +import trio + +from .http_manager import AsyncPoolManager, PoolManager +from .http_manager._backends import TrioBackend +from . import http_manager + + +async def request( + method, + url, + timeout, + body=None, + headers=None, + preload_content=False, + pool=None, + **kwargs +): + """Returns a Response object, to be awaited.""" + if not pool: + pool = AsyncPoolManager(backend=TrioBackend()) + return await pool.urlopen( + method=method, + url=url, + headers=headers, + preload_content=preload_content, + **kwargs + ) + + +def blocking_request( + method, + url, + timeout, + body=None, + headers=None, + preload_content=False, + pool=None, + **kwargs +): + """Returns a Response object.""" + if not pool: + pool = PoolManager() + with pool as http: + r = http.urlopen( + method=method, + url=url, + headers=headers, + preload_content=preload_content, + **kwargs + ) + return r diff --git a/requests3/core/http_manager/__init__.py b/requests3/core/http_manager/__init__.py new file mode 100644 index 00000000..362725be --- /dev/null +++ b/requests3/core/http_manager/__init__.py @@ -0,0 +1,111 @@ +""" +urllib3 - Thread-safe connection pooling and re-using. +""" +from __future__ import absolute_import +import warnings + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url +) + +from . import exceptions +from .filepost import encode_multipart_formdata +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: # Python 2.7+ + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '2.0.dev0+bleach.spike.proof.of.concept.dont.use' + +__all__ = [ + 'HTTPConnectionPool', + 'HTTPSConnectionPool', + 'PoolManager', + 'ProxyManager', + 'HTTPResponse', + 'Retry', + 'Timeout', + 'add_stderr_logger', + 'connection_from_url', + 'disable_warnings', + 'encode_multipart_formdata', + 'get_host', + 'make_headers', + 'proxy_from_url', +] + +# For now we only support async on 3.6, because we use async generators +import sys +if sys.version_info >= (3, 6): + from ._async.connectionpool import ( + HTTPConnectionPool as AsyncHTTPConnectionPool, + HTTPSConnectionPool as AsyncHTTPSConnectionPool) + from ._async.poolmanager import ( + PoolManager as AsyncPoolManager, + ProxyManager as AsyncProxyManager) + from ._async.response import HTTPResponse as AsyncHTTPResponse + __all__.extend( + ('AsyncHTTPConnectionPool', 'AsyncHTTPSConnectionPool', + 'AsyncPoolManager', 'AsyncProxyManager', 'AsyncHTTPResponse')) + + +logging.getLogger(__name__).addHandler(NullHandler()) + + +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added a stderr logging handler to logger: %s', __name__) + return handler + + +# ... Clean up. +del NullHandler + + +# All warning filters *must* be appended unless you're really certain that they +# shouldn't be: otherwise, it's very hard for users to use most Python +# mechanisms to silence them. +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) +# SNIMissingWarnings should go off only once. +warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True) + + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/requests3/core/http_manager/_async/__init__.py b/requests3/core/http_manager/_async/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/requests3/core/http_manager/_async/__init__.py diff --git a/requests3/core/http_manager/_async/connection.py b/requests3/core/http_manager/_async/connection.py new file mode 100644 index 00000000..934bb516 --- /dev/null +++ b/requests3/core/http_manager/_async/connection.py @@ -0,0 +1,526 @@ +# -*- coding: utf-8 -*- +""" +This module implements the connection management logic. + +Unlike in http.client, the connection here is an object that is responsible +for a very small number of tasks: + + 1. Serializing/deserializing data to/from the network. + 2. Being able to do basic parsing of HTTP and maintaining the framing. + 3. Understanding connection state. + +This object knows very little about the semantics of HTTP in terms of how to +construct HTTP requests and responses. It mostly manages the socket itself. +""" +from __future__ import absolute_import + +import collections +import datetime +import socket +import warnings + +import h11 + +from ..base import Request, Response +from ..exceptions import ( + ConnectTimeoutError, + NewConnectionError, + SubjectAltNameWarning, + SystemTimeWarning, + BadVersionError, + FailedTunnelError, + InvalidBodyError, + ProtocolError, +) +from ..packages import six +from ..util import ssl_ as ssl_util +from .._backends import SyncBackend +from .._backends._common import LoopAbort + +try: + import ssl +except ImportError: + ssl = None +# When updating RECENT_DATE, move it to +# within two years of the current date, and no +# earlier than 6 months ago. +RECENT_DATE = datetime.date(2016, 1, 1) +_SUPPORTED_VERSIONS = frozenset([b'1.0', b'1.1']) +# A sentinel object returned when some syscalls return EAGAIN. +_EAGAIN = object() + + +def _headers_to_native_string(headers): + """ + A temporary shim to convert received headers to native strings, to match + the behaviour of httplib. We will reconsider this later in the process. + """ + # TODO: revisit. + # This works because fundamentally we know that all headers coming from + # h11 are bytes, so if they aren't of type `str` then we must be on Python + # 3 and need to decode the headers using Latin1. + for n, v in headers: + if not isinstance(n, str): + n = n.decode('latin1') + if not isinstance(v, str): + v = v.decode('latin1') + yield (n, v) + + +def _stringify_headers(headers): + """ + A generator that transforms headers so they're suitable for sending by h11. + """ + # TODO: revisit + for name, value in headers: + if isinstance(name, six.text_type): + name = name.encode('ascii') + if isinstance(value, six.text_type): + value = value.encode('latin-1') + elif isinstance(value, int): + value = str(value).encode('ascii') + yield (name, value) + + +def _read_readable(readable): + # TODO: reconsider this block size + blocksize = 8192 + while True: + datablock = readable.read(blocksize) + if not datablock: + break + + yield datablock + + + + +# XX this should return an async iterator +def _make_body_iterable(body): + """ + This function turns all possible body types that urllib3 supports into an + iterable of bytes. The goal is to expose a uniform structure to request + bodies so that they all appear to be identical to the low-level code. + + The basic logic here is: + - byte strings are turned into single-element lists + - readables are wrapped in an iterable that repeatedly calls read until + nothing is returned anymore + - other iterables are used directly + - anything else is not acceptable + + In particular, note that we do not support *text* data of any kind. This + is deliberate: users must make choices about the encoding of the data they + use. + """ + if body is None: + return [] + + elif isinstance(body, six.binary_type): + return [body] + + elif hasattr(body, "read"): + return _read_readable(body) + + elif isinstance(body, collections.Iterable) and not isinstance( + body, six.text_type + ): + return body + + else: + raise InvalidBodyError("Unacceptable body type: %s" % type(body)) + + + + +# XX this should return an async iterator +def _request_bytes_iterable(request, state_machine): + """ + An iterable that serialises a set of bytes for the body. + """ + h11_request = h11.Request( + method=request.method, + target=request.target, + headers=_stringify_headers(request.headers.items()), + ) + yield state_machine.send(h11_request) + + for chunk in _make_body_iterable(request.body): + yield state_machine.send(h11.Data(data=chunk)) + + yield state_machine.send(h11.EndOfMessage()) + + +def _response_from_h11(h11_response, body_object): + """ + Given a h11 Response object, build a urllib3 response object and return it. + """ + if h11_response.http_version not in _SUPPORTED_VERSIONS: + raise BadVersionError(h11_response.http_version) + + version = b'HTTP/' + h11_response.http_version + our_response = Response( + status_code=h11_response.status_code, + headers=_headers_to_native_string(h11_response.headers), + body=body_object, + version=version, + ) + return our_response + + +def _build_tunnel_request(host, port, headers): + """ + Builds a urllib3 Request object that is set up correctly to request a proxy + to establish a TCP tunnel to the remote host. + """ + target = "%s:%d" % (host, port) + if not isinstance(target, bytes): + target = target.encode('latin1') + tunnel_request = Request(method=b"CONNECT", target=target, headers=headers) + tunnel_request.add_host(host=host, port=port, scheme='http') + return tunnel_request + + +async def _start_http_request(request, state_machine, conn): + """ + Send the request using the given state machine and connection, wait + for the response headers, and return them. + + If we get response headers early, then we stop sending and return + immediately, poisoning the state machine along the way so that we know + it can't be re-used. + + This is a standalone function because we use it both to set up both + CONNECT requests and real requests. + """ + # Before we begin, confirm that the state machine is ok. + if ( + state_machine.our_state is not h11.IDLE or + state_machine.their_state is not h11.IDLE + ): + raise ProtocolError("Invalid internal state transition") + + request_bytes_iterable = _request_bytes_iterable(request, state_machine) + # Hack around Python 2 lack of nonlocal + context = {'send_aborted': True, 'h11_response': None} + + async def next_bytes_to_send(): + try: + return next(request_bytes_iterable) + + except StopIteration: + # We successfully sent the whole body! + context['send_aborted'] = False + return None + + def consume_bytes(data): + state_machine.receive_data(data) + while True: + event = state_machine.next_event() + if event is h11.NEED_DATA: + break + + elif isinstance(event, h11.InformationalResponse): + # Ignore 1xx responses + continue + + elif isinstance(event, h11.Response): + # We have our response! Save it and get out of here. + context['h11_response'] = event + raise LoopAbort + + else: + # Can't happen + raise RuntimeError("Unexpected h11 event {}".format(event)) + + await conn.send_and_receive_for_a_while(next_bytes_to_send, consume_bytes) + assert context['h11_response'] is not None + if context['send_aborted']: + # Our state machine thinks we sent a bunch of data... but maybe we + # didn't! Maybe our send got cancelled while we were only half-way + # through sending the last chunk, and then h11 thinks we sent a + # complete request and we actually didn't. Then h11 might think we can + # re-use this connection, even though we can't. So record this in + # h11's state machine. + # XX need to implement this in h11 + # state_machine.poison() + # XX kluge for now + state_machine._cstate.process_error(state_machine.our_role) + return context['h11_response'] + + +async def _read_until_event(state_machine, conn): + """ + A loop that keeps issuing reads and feeding the data into h11 and + checking whether h11 has an event for us. The moment there is an event + other than h11.NEED_DATA, this function returns that event. + """ + while True: + event = state_machine.next_event() + if event is not h11.NEED_DATA: + return event + + state_machine.receive_data(await conn.receive_some()) + + +_DEFAULT_SOCKET_OPTIONS = object() + + +class HTTP1Connection(object): + """ + A wrapper around a single HTTP/1.1 connection. + + This wrapper manages connection state, ensuring that connections are + appropriately managed throughout the lifetime of a HTTP transaction. In + particular, this object understands the conditions in which connections + should be torn down, and also manages sending data and handling early + responses. + + This object can be iterated over to return the response body. When iterated + over it will return all of the data that is currently buffered, and if no + data is buffered it will issue one read syscall and return all of that + data. Buffering of response data must happen at a higher layer. + """ + # : Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + def __init__( + self, + host, + port, + backend=None, + socket_options=_DEFAULT_SOCKET_OPTIONS, + source_address=None, + tunnel_host=None, + tunnel_port=None, + tunnel_headers=None, + ): + self.is_verified = False + self._backend = backend or SyncBackend() + self._host = host + self._port = port + self._socket_options = ( + socket_options if socket_options is not _DEFAULT_SOCKET_OPTIONS else self.default_socket_options + ) + self._source_address = source_address + self._tunnel_host = tunnel_host + self._tunnel_port = tunnel_port + self._tunnel_headers = tunnel_headers + self._sock = None + self._state_machine = h11.Connection(our_role=h11.CLIENT) + + async def _wrap_socket( + self, conn, ssl_context, fingerprint, assert_hostname + ): + """ + Handles extra logic to wrap the socket in TLS magic. + """ + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors' + ).format( + RECENT_DATE + ), + SystemTimeWarning, + ) + # XX need to know whether this is the proxy or the final host that + # we just did a handshake with! + check_host = assert_hostname or self._tunnel_host or self._host + # Stripping trailing dots from the hostname is important because + # they indicate that this host is an absolute name (for DNS + # lookup), but are irrelevant to SSL hostname matching and in fact + # will break it. + check_host = check_host.rstrip(".") + conn = await conn.start_tls(check_host, ssl_context) + if fingerprint: + ssl_util.assert_fingerprint( + conn.getpeercert(binary_form=True), fingerprint + ) + elif ( + ssl_context.verify_mode != ssl.CERT_NONE and + assert_hostname is not False + ): + cert = conn.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn( + ( + 'Certificate for {0} has no `subjectAltName`, falling ' + 'back to check for a `commonName` for now. This ' + 'feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See ' + 'https://github.com/shazow/urllib3/issues/497 for ' + 'details.)'.format(self._host) + ), + SubjectAltNameWarning, + ) + ssl_util.match_hostname(cert, check_host) + self.is_verified = ( + ssl_context.verify_mode == ssl.CERT_REQUIRED and + (assert_hostname is not False or fingerprint) + ) + return conn + + async def send_request(self, request, read_timeout): + """ + Given a Request object, performs the logic required to get a response. + """ + h11_response = await _start_http_request( + request, self._state_machine, self._sock + ) + return _response_from_h11(h11_response, self) + + async def _tunnel(self, conn): + """ + This method establishes a CONNECT tunnel shortly after connection. + """ + # Basic sanity check that _tunnel is only called at appropriate times. + assert self._state_machine.our_state is h11.IDLE + tunnel_request = _build_tunnel_request( + self._tunnel_host, self._tunnel_port, self._tunnel_headers + ) + tunnel_state_machine = h11.Connection(our_role=h11.CLIENT) + h11_response = await _start_http_request( + tunnel_request, tunnel_state_machine, conn + ) + # XX this is wrong -- 'self' here will try to iterate using + # self._state_machine, not tunnel_state_machine. Also, we need to + # think about how this failure case interacts with the pool's + # connection lifecycle management. + tunnel_response = _response_from_h11(h11_response, self) + if h11_response.status_code != 200: + conn.forceful_close() + raise FailedTunnelError( + "Unable to establish CONNECT tunnel", tunnel_response + ) + + async def connect( + self, + ssl_context=None, + fingerprint=None, + assert_hostname=None, + connect_timeout=None, + ): + """ + Connect this socket to the server, applying the source address, any + relevant socket options, and the relevant connection timeout. + """ + if self._sock is not None: + # We're already connected, move on. + self._sock.set_readable_watch_state(False) + return + + extra_kw = {} + if self._source_address: + extra_kw['source_address'] = self._source_address + if self._socket_options: + extra_kw['socket_options'] = self._socket_options + # XX pass connect_timeout to backend + # This was factored out into a separate function to allow overriding + # by subclasses, but in the backend approach the way to to this is to + # provide a custom backend. (Composition >> inheritance.) + try: + conn = await self._backend.connect( + self._host, self._port, **extra_kw + ) + # XX these two error handling blocks needs to be re-done in a + # backend-agnostic way + except socket.timeout: + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self._host, connect_timeout), + ) + + except socket.error as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + if ssl_context is not None: + if self._tunnel_host is not None: + self._tunnel(conn) + conn = await self._wrap_socket( + conn, ssl_context, fingerprint, assert_hostname + ) + # XX We should pick one of these names and use it consistently... + self._sock = conn + + def close(self): + """ + Close this connection. + """ + if self._sock is not None: + # Make sure self._sock is None even if closing raises an exception + sock, self._sock = self._sock, None + sock.forceful_close() + + def is_dropped(self): + """ + Returns True if the connection is closed: returns False otherwise. This + includes closures that do not mark the FD as closed, such as when the + remote peer has sent EOF but we haven't read it yet. + + Pre-condition: _reset must have been called. + """ + if self._sock is None: + return True + + # We check for droppedness by checking the socket for readability. If + # it's not readable, it's not dropped. If it is readable, then we + # assume that the thing we'd read from the socket is EOF. It might not + # be, but if it's not then the server has busted its HTTP/1.1 framing + # and so we want to drop the connection anyway. + return self._sock.is_readable() + + def _reset(self): + """ + Called once we hit EndOfMessage, and checks whether we can re-use this + state machine and connection or not, and if not, closes the socket and + state machine. + """ + try: + self._state_machine.start_next_cycle() + except h11.LocalProtocolError: + # Not re-usable + self.close() + else: + # This connection can be returned to the connection pool, and + # eventually we'll take it out again and want to know if it's been + # dropped. + self._sock.set_readable_watch_state(True) + + @property + def complete(self): + """ + XX what is this supposed to do? check if the response has been fully + iterated over? check for that + the connection being reusable? + """ + our_state = self._state_machine.our_state + their_state = self._state_machine.their_state + return (our_state is h11.IDLE and their_state is h11.IDLE) + + def __aiter__(self): + return self + + def next(self): # Needed for Python 2 as __anext__ becomes __next__ + return self.__next__() + + async def __anext__(self): + """ + Iterate over the body bytes of the response until end of message. + """ + event = await _read_until_event(self._state_machine, self._sock) + if isinstance(event, h11.Data): + return bytes(event.data) + + elif isinstance(event, h11.EndOfMessage): + self._reset() + raise StopAsyncIteration + + else: + # can't happen + raise RuntimeError("Unexpected h11 event {}".format(event)) diff --git a/requests3/core/http_manager/_async/connectionpool.py b/requests3/core/http_manager/_async/connectionpool.py new file mode 100644 index 00000000..3c829c3c --- /dev/null +++ b/requests3/core/http_manager/_async/connectionpool.py @@ -0,0 +1,891 @@ +from __future__ import absolute_import +import errno +import logging +import sys +import warnings + +from socket import error as SocketError, timeout as SocketTimeout +import socket + +import h11 + + +from ..base import Request, DEFAULT_PORTS +from ..exceptions import ( + ClosedPoolError, + ProtocolError, + EmptyPoolError, + LocationValueError, + MaxRetryError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, + InsecureRequestWarning, + NewConnectionError, +) +from ..packages.ssl_match_hostname import CertificateError +from ..packages import six +from ..packages.six.moves import queue +from ..request import RequestMethods +from .response import HTTPResponse +from .connection import HTTP1Connection + +from ..util.connection import is_connection_dropped +from ..util.request import set_file_position +from ..util.retry import Retry +from ..util.ssl_ import ( + create_urllib3_context, + merge_context_settings, + resolve_ssl_version, + resolve_cert_reqs, + BaseSSLError, +) +from ..util.timeout import Timeout +from ..util.url import get_host, Url + +try: + import ssl +except ImportError: + ssl = None +if six.PY2: + # Queue is imported for side effects on MS Windows + import Queue as _unused_module_Queue # noqa: F401 +xrange = six.moves.xrange +log = logging.getLogger(__name__) +_Default = object() + + +def _add_transport_headers(headers): + """ + Adds the transport framing headers, if needed. Naturally, this method + cannot add a content-length header, so if there is no content-length header + then it will add Transfer-Encoding: chunked instead. Should only be called + if there is a body to upload. + + This should be a bit smarter: in particular, it should allow for bad or + unexpected versions of these headers, particularly transfer-encoding. + """ + transfer_headers = ('content-length', 'transfer-encoding') + for header_name in headers: + if header_name.lower() in transfer_headers: + return + + headers['transfer-encoding'] = 'chunked' + + +def _build_context( + context, keyfile, certfile, cert_reqs, ca_certs, ca_cert_dir, ssl_version +): + """ + Creates a urllib3 context suitable for a given request based on a + collection of possible properties of that context. + """ + if context is None: + context = create_urllib3_context( + ssl_version=resolve_ssl_version(ssl_version), + cert_reqs=resolve_cert_reqs(cert_reqs), + ) + context = merge_context_settings( + context, + keyfile=keyfile, + certfile=certfile, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + ca_cert_dir=ca_cert_dir, + ) + return context + + + + +# Pool objects +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + scheme = None + QueueCls = queue.LifoQueue + + def __init__(self, host, port=None): + if not host: + raise LocationValueError("No host specified.") + + self.host = _ipv6_host(host).lower() + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % ( + type(self).__name__, self.host, self.port + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + pass + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + scheme = 'http' + ConnectionCls = HTTP1Connection + ResponseCls = HTTPResponse + + def __init__( + self, + host, + port=None, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + **conn_kw + ): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries + self.pool = self.QueueCls(maxsize) + self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + + def _new_conn(self): + """ + Return a fresh connection. + """ + self.num_connections += 1 + + # TODO: Huge hack. + for kw in ('strict',): + if kw in self.conn_kw: + self.conn_kw.pop(kw) + + log.debug( + "Starting new HTTP connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "80", + ) + conn = self.ConnectionCls( + host=self.host, port=self.port, ** self.conn_kw + ) + return conn + + async def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") + + except queue.Empty: + if self.block: + raise EmptyPoolError( + self, + "Pool reached maximum size and no more " + "connections are allowed.", + ) + + pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + return conn or self._new_conn() + + async def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # This should never happen if self.block == True + log.warning( + "Connection pool is full, discarding connection: %s", self.host + ) + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + async def _start_conn(self, conn, connect_timeout): + """ + Called right before a request is made, after the socket is created. + """ + await conn.connect(connect_timeout=connect_timeout) + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + if isinstance(err, SocketTimeout): + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + # TODO: Can we remove this? + if 'timed out' in str(err) or 'did not complete (read)' in str( + err + ): # Python 2.6 + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + async def _make_request( + self, conn, method, url, timeout=_Default, body=None, headers=None + ): + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. + """ + self.num_requests += 1 + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + # Trigger any extra validation we need to do. + try: + await self._start_conn(conn, timeout_obj.connect_timeout) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # TODO: We need to encapsulate our proxy logic in here somewhere. + request = Request( + method=method, target=url, headers=headers, body=body + ) + host = self.host + port = self.port + scheme = self.scheme + request.add_host(host, port, scheme) + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout + ) + + if read_timeout is Timeout.DEFAULT_TIMEOUT: + read_timeout = socket.getdefaulttimeout() + # Receive the response from the server + try: + response = await conn.send_request( + request, read_timeout=read_timeout + ) + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug( + "%s://%s:%s \"%s %s %s\" %s", + self.scheme, + self.host, + self.port, + method, + url, + http_version, + response.status_code, + ) + return response + + def _absolute_url(self, path): + return Url( + scheme=self.scheme, host=self.host, port=self.port, path=path + ).url + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + if self.pool is None: + return + + # Disable access to the pool + old_pool, self.pool = self.pool, None + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + except queue.Empty: + pass # Done. + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith('/'): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + host = _ipv6_host(host).lower() + # Use explicit default port for comparison when none is given + if self.port and not port: + port = DEFAULT_PORTS.get(scheme) + elif not self.port and port == DEFAULT_PORTS.get(scheme): + port = None + return (scheme, host, port) == (self.scheme, self.host, self.port) + + async def urlopen( + self, + method, + url, + body=None, + headers=None, + retries=None, + timeout=_Default, + pool_timeout=None, + body_pos=None, + **response_kw + ): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`request`. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + + :param \\**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, default=self.retries, redirect=False + ) + conn = None + # Track whether `conn` needs to be released before + # returning/raising/recursing. + release_this_conn = False + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + if body is not None: + _add_transport_headers(headers) + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = await self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + # Make the request on the base connection object. + base_response = await self._make_request( + conn, + method, + url, + timeout=timeout_obj, + body=body, + headers=headers, + ) + # Pass method to Response for length checking + response_kw['request_method'] = method + # Import httplib's response into our own wrapper object + response = self.ResponseCls.from_base( + base_response, pool=self, retries=retries, **response_kw + ) + # Everything went great! + clean_exit = True + except queue.Empty: + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") + + except ( + TimeoutError, + SocketError, + ProtocolError, + h11.ProtocolError, + BaseSSLError, + SSLError, + CertificateError, + ) as e: + # Discard the connection for these exceptions. It will be + # replaced during the next _get_conn() call. + clean_exit = False + if isinstance(e, (BaseSSLError, CertificateError)): + e = SSLError(e) + elif isinstance( + e, (SocketError, NewConnectionError) + ) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, h11.ProtocolError)): + e = ProtocolError('Connection aborted.', e) + retries = retries.increment( + method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] + ) + retries.sleep() + # Keep track of the error for the retry warning. + err = e + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + conn = conn and conn.close() + release_this_conn = True + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + await self._put_conn(conn) + if not conn: + # Try again + log.warning( + "Retrying (%r) after connection " "broken by '%r': %s", + retries, + err, + url, + ) + return await self.urlopen( + method, + url, + body, + headers, + retries, + timeout=timeout, + pool_timeout=pool_timeout, + body_pos=body_pos, + **response_kw + ) + + def drain_and_release_conn(response): + try: + # discard any remaining response body, the connection will be + # released back to the pool once the entire response is read + response.read() + except ( + TimeoutError, + SocketError, + ProtocolError, + BaseSSLError, + SSLError, + ) as e: + pass + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.getheader('Retry-After')) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment( + method, url, response=response, _pool=self + ) + except MaxRetryError: + if retries.raise_on_status: + # Drain and release the connection for this response, since + # we're not returning it to be released manually. + drain_and_release_conn(response) + raise + + return response + + # drain and return the connection to the pool before recursing + drain_and_release_conn(response) + retries.sleep(response) + log.debug("Retry: %s", url) + return await self.urlopen( + method, + url, + body, + headers, + retries=retries, + timeout=timeout, + pool_timeout=pool_timeout, + body_pos=body_pos, + **response_kw + ) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:`.HTTPSConnection`. + + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is + available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + scheme = 'https' + + def __init__( + self, + host, + port=None, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + key_file=None, + cert_file=None, + cert_reqs=None, + ca_certs=None, + ssl_version=None, + assert_hostname=None, + assert_fingerprint=None, + ca_cert_dir=None, + ssl_context=None, + **conn_kw + ): + HTTPConnectionPool.__init__( + self, + host, + port, + timeout, + maxsize, + block, + headers, + retries, + _proxy, + _proxy_headers, + **conn_kw + ) + if ssl is None: + raise SSLError("SSL module is not available") + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.ssl_context = _build_context( + ssl_context, + keyfile=key_file, + certfile=cert_file, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + ca_cert_dir=ca_cert_dir, + ssl_version=ssl_version, + ) + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _new_conn(self): + """ + Return a fresh connection. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTPS connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "443", + ) + actual_host = self.host + actual_port = self.port + tunnel_host = None + tunnel_port = None + tunnel_headers = None + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + tunnel_host = self.host + tunnel_port = self.port + tunnel_headers = self.proxy_headers + + # TODO: Huge hack. + for kw in ('strict', 'redirect'): + if kw in self.conn_kw: + self.conn_kw.pop(kw) + + conn = self.ConnectionCls( + host=actual_host, + port=actual_port, + tunnel_host=tunnel_host, + tunnel_port=tunnel_port, + tunnel_headers=tunnel_headers, + ** self.conn_kw + ) + return conn + + async def _start_conn(self, conn, connect_timeout): + """ + Called right before a request is made, after the socket is created. + """ + await conn.connect( + ssl_context=self.ssl_context, + fingerprint=self.assert_fingerprint, + assert_hostname=self.assert_hostname, + connect_timeout=connect_timeout, + ) + if not conn.is_verified: + warnings.warn( + ( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings' + ), + InsecureRequestWarning, + ) + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + port = port or DEFAULT_PORTS.get(scheme, 80) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def _ipv6_host(host): + """ + Process IPv6 address literals + """ + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + # + # Also if an IPv6 address literal has a zone identifier, the + # percent sign might be URIencoded, convert it back into ASCII + if host.startswith('[') and host.endswith(']'): + host = host.replace('%25', '%').strip('[]') + return host diff --git a/requests3/core/http_manager/_async/poolmanager.py b/requests3/core/http_manager/_async/poolmanager.py new file mode 100644 index 00000000..0645a0f5 --- /dev/null +++ b/requests3/core/http_manager/_async/poolmanager.py @@ -0,0 +1,446 @@ +from __future__ import absolute_import +import collections +import functools +import logging + +from .._collections import RecentlyUsedContainer +from ..base import DEFAULT_PORTS +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from ..exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown +from ..packages.six.moves.urllib.parse import urljoin +from ..request import RequestMethods +from ..util.url import parse_url +from ..util.request import set_file_position +from ..util.retry import Retry + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] +log = logging.getLogger(__name__) +SSL_KEYWORDS = ( + 'key_file', + 'cert_file', + 'cert_reqs', + 'ca_certs', + 'ssl_version', + 'ca_cert_dir', + 'ssl_context', +) +# All known keyword arguments that could be provided to the pool manager, its +# pools, or the underlying connections. This is used to construct a pool key. +_key_fields = ( + 'key_scheme', # str + 'key_host', # str + 'key_strict', + 'key_port', # int + 'key_timeout', # int or float or Timeout + 'key_retries', # int or Retry + 'key_block', # bool + 'key_source_address', # str + 'key_key_file', # str + 'key_cert_file', # str + 'key_cert_reqs', # str + 'key_ca_certs', # str + 'key_ssl_version', # str + 'key_ca_cert_dir', # str + 'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext + 'key_maxsize', # int + 'key_headers', # dict + 'key__proxy', # parsed proxy url + 'key__proxy_headers', # dict + 'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples + 'key__socks_options', # dict + 'key_assert_hostname', # bool or string + 'key_assert_fingerprint', # str +) +# : The namedtuple class used to construct keys for the connection pool. +#: All custom key schemes should include the fields in this key at a minimum. +PoolKey = collections.namedtuple('PoolKey', _key_fields) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key out of a request context dictionary. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + :type key_class: namedtuple + :param request_context: + A dictionary-like object that contain the context for a request. + :type request_context: dict + + :return: A namedtuple that can be used as a connection pool key. + :rtype: PoolKey + """ + # Since we mutate the dictionary, make a copy first + context = request_context.copy() + context['scheme'] = context['scheme'].lower() + context['host'] = context['host'].lower() + # These are both dictionaries and need to be transformed into frozensets + for key in ('headers', '_proxy_headers', '_socks_options'): + if key in context and context[key] is not None: + context[key] = frozenset(context[key].items()) + # The socket_options key may be a list and needs to be transformed into a + # tuple. + socket_opts = context.get('socket_options') + if socket_opts is not None: + context['socket_options'] = tuple(socket_opts) + # Map the kwargs to the names in the namedtuple - this is necessary since + # namedtuples can't have fields starting with '_'. + for key in list(context.keys()): + context['key_' + key] = context.pop(key) + # Default to ``None`` for keys missing from the context + for field in key_class._fields: + if field not in context: + context[field] = None + return key_class(**context) + + +# : A dictionary that maps a scheme to a callable that creates a pool key. +#: This can be used to alter the way pool keys are constructed, if desired. +#: Each PoolManager makes a copy of this dictionary so they can be configured +#: globally here, or individually on the instance. +key_fn_by_scheme = { + 'http': functools.partial(_default_key_normalizer, PoolKey), + 'https': functools.partial(_default_key_normalizer, PoolKey), +} +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, 'https': HTTPSConnectionPool +} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param \\**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example:: + + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) + 2 + + """ + proxy = None + + def __init__( + self, num_pools=10, headers=None, backend=None, **connection_pool_kw + ): + RequestMethods.__init__(self, headers) + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer( + num_pools, dispose_func=lambda p: p.close() + ) + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + self.backend = backend + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + + def _new_pool(self, scheme, host, port, request_context=None): + """ + Create a new :class:`ConnectionPool` based on host, port, scheme, and + any additional pool keyword arguments. + + If ``request_context`` is provided, it is provided as keyword arguments + to the pool class used. This method is used to actually create the + connection pools handed out by :meth:`connection_from_url` and + companion methods. It is intended to be overridden for customization. + """ + pool_cls = self.pool_classes_by_scheme[scheme] + if request_context is None: + request_context = self.connection_pool_kw.copy() + # Although the context has everything necessary to create the pool, + # this function has historically only used the scheme, host, and port + # in the positional args. When an API change is acceptable these can + # be removed. + for key in ('scheme', 'host', 'port'): + request_context.pop(key, None) + if scheme == 'http': + for kw in SSL_KEYWORDS: + request_context.pop(kw, None) + return pool_cls(host, port, backend=self.backend, **request_context) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host( + self, host, port=None, scheme='http', pool_kwargs=None + ): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is + provided, it is merged with the instance's ``connection_pool_kw`` + variable and used to create the new connection pool, if one is + needed. + """ + if not host: + raise LocationValueError("No host specified.") + + request_context = self._merge_pool_kwargs(pool_kwargs) + request_context['scheme'] = scheme or 'http' + if not port: + port = DEFAULT_PORTS.get(request_context['scheme'].lower(), 80) + request_context['port'] = port + request_context['host'] = host + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context['scheme'].lower() + pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key = pool_key_constructor(request_context) + return self.connection_from_pool_key( + pool_key, request_context=request_context + ) + + def connection_from_pool_key(self, pool_key, request_context=None): + """ + Get a :class:`ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + scheme = request_context['scheme'] + host = request_context['host'] + port = request_context['port'] + pool = self._new_pool( + scheme, host, port, request_context=request_context + ) + self.pools[pool_key] = pool + return pool + + def connection_from_url(self, url, pool_kwargs=None): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url`. + + If ``pool_kwargs`` is not provided and a new pool needs to be + constructed, ``self.connection_pool_kw`` is used to initialize + the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` + is provided, it is used instead. Note that if a new pool does not + need to be created for the request, the provided ``pool_kwargs`` are + not used. + """ + u = parse_url(url) + return self.connection_from_host( + u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs + ) + + def _merge_pool_kwargs(self, override): + """ + Merge a dictionary of override values for self.connection_pool_kw. + + This does not modify self.connection_pool_kw and returns a new dict. + Any keys in the override dictionary with a value of ``None`` are + removed from the merged dictionary. + """ + base_pool_kwargs = self.connection_pool_kw.copy() + if override: + for key, value in override.items(): + if value is None: + try: + del base_pool_kwargs[key] + except KeyError: + pass + else: + base_pool_kwargs[key] = value + return base_pool_kwargs + + async def urlopen(self, method, url, redirect=True, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with redirect logic and only sends the request-uri portion of the + ``url``. + + The given ``url`` parameter must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body = kw.get('body') + body_pos = kw.get('body_pos') + kw['body_pos'] = set_file_position(body, body_pos) + if 'headers' not in kw: + kw['headers'] = self.headers + if self.proxy is not None and u.scheme == "http": + response = await conn.urlopen(method, url, **kw) + else: + response = await conn.urlopen(method, u.request_uri, **kw) + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + # RFC 7231, Section 6.4.4 + if response.status == 303: + method = 'GET' + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + try: + retries = retries.increment( + method, url, response=response, _pool=conn + ) + except MaxRetryError: + if retries.raise_on_redirect: + raise + + return response + + kw['retries'] = retries + kw['redirect'] = redirect + retries.sleep_for_retry(response) + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method, redirect_location, **kw) + + +class ProxyManager(PoolManager): + """ + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param proxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + """ + + def __init__( + self, + proxy_url, + num_pools=10, + headers=None, + proxy_headers=None, + **connection_pool_kw + ): + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % ( + proxy_url.scheme, proxy_url.host, proxy_url.port + ) + proxy = parse_url(proxy_url) + if not proxy.port: + port = DEFAULT_PORTS.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) + + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + + def connection_from_host( + self, host, port=None, scheme='http', pool_kwargs=None + ): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme, pool_kwargs=pool_kwargs + ) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, + self.proxy.port, + self.proxy.scheme, + pool_kwargs=pool_kwargs, + ) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + u = parse_url(url) + if u.scheme == "http": + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) + return super(ProxyManager, self).urlopen( + method, url, redirect=redirect, **kw + ) + + +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests3/core/http_manager/_async/response.py b/requests3/core/http_manager/_async/response.py new file mode 100644 index 00000000..78e6c264 --- /dev/null +++ b/requests3/core/http_manager/_async/response.py @@ -0,0 +1,461 @@ +from __future__ import absolute_import +from contextlib import contextmanager +import zlib +import io +import logging +from socket import timeout as SocketTimeout +from socket import error as SocketError + +import h11 + +from .._collections import HTTPHeaderDict +from ..exceptions import (ProtocolError, DecodeError, ReadTimeoutError) +from ..packages.six import string_types as basestring, binary_type +from ..util.ssl_ import BaseSSLError + +log = logging.getLogger(__name__) + + +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + + finally: + self._data = None + + +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + return self._obj.decompress(data) + + +def _get_decoder(mode): + if mode == 'gzip': + return GzipDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + """ + CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__( + self, + body='', + headers=None, + status=0, + version=0, + reason=None, + strict=0, + preload_content=True, + decode_content=True, + original_response=None, + pool=None, + connection=None, + retries=None, + request_method=None, + ): + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + self.retries = retries + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + self._buffer = b'' + if body and isinstance(body, (basestring, binary_type)): + self._body = body + else: + self._fp = body + self._pool = pool + self._connection = connection + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get('location') + + return False + + async def release_conn(self): + if not self._pool or not self._connection: + return + + await self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body is not None: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e, + ) + + if flush_decoder and decode_content: + data += self._flush_decoder() + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b'') + return buf + self._decoder.flush() + + return b'' + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except (h11.ProtocolError, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + + except GeneratorExit: + # We swallow GeneratorExit when it is emitted: this allows the + # use of the error checker inside stream() + pass + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + self.close() + # If we hold the original response but it's finished now, we should + # return the connection back to the pool. + # XXX + if False and self._original_response and self._original_response.complete: + self.release_conn() + + async def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + # TODO: refactor this method to better handle buffered output. + # This method is a weird one. We treat this read() like a buffered + # read, meaning that it never reads "short" unless there is an EOF + # condition at work. However, we have a decompressor in play here, + # which means our read() returns decompressed data. + # + # This means the buffer can only meaningfully buffer decompressed data. + # This makes this method prone to over-reading, and forcing too much + # data into the buffer. That's unfortunate, but right now I'm not smart + # enough to come up with a way to solve that problem. + if self._fp is None and not self._buffer: + return b'' + + data = self._buffer + with self._error_catcher(): + if amt is None: + chunks = [] + async for chunk in self.stream(decode_content): + chunks.append(chunk) + data += b''.join(chunks) + self._buffer = b'' + # We only cache the body data for simple read calls. + self._body = data + else: + data_len = len(data) + chunks = [data] + streamer = self.stream(decode_content) + while data_len < amt: + try: + chunk = next(streamer) + except StopIteration: + break + + else: + chunks.append(chunk) + data_len += len(chunk) + data = b''.join(chunks) + self._buffer = data[amt:] + data = data[:amt] + return data + + async def stream(self, decode_content=None): + """ + A generator wrapper for the read() method. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + # Short-circuit evaluation for exhausted responses. + if self._fp is None: + return + + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + with self._error_catcher(): + async for raw_chunk in self._fp: + self._fp_bytes_read += len(raw_chunk) + decoded_chunk = self._decode( + raw_chunk, decode_content, flush_decoder=False + ) + if decoded_chunk: + yield decoded_chunk + + # This branch is speculative: most decoders do not need to flush, + # and so this produces no output. However, it's here because + # anecdotally some platforms on which we do not test (like Jython) + # do require the flush. For this reason, we exclude this from code + # coverage. Happily, the code here is so simple that testing the + # branch we don't enter is basically entirely unnecessary (it's + # just a yield statement). + final_chunk = self._decode(b'', decode_content, flush_decoder=True) + if final_chunk: # Platform-specific: Jython + yield final_chunk + + self._fp = None + + @classmethod + def from_base(ResponseCls, r, **response_kw): + """ + Given an :class:`urllib3.base.Response` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + # TODO: Huge hack. + for kw in ('redirect', 'assert_same_host', 'enforce_content_length'): + if kw in response_kw: + response_kw.pop(kw) + + resp = ResponseCls( + body=r.body, + headers=r.headers, + status=r.status_code, + version=r.version, + original_response=r, + connection=r.body, + **response_kw + ) + return resp + + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + + # Backwards compatibility for http.cookiejar + def info(self): + return self.headers + + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + self._buffer = b'' + self._fp = None + if self._connection: + self._connection.close() + + @property + def closed(self): + # This method is required for `io` module compatibility. + if self._fp is None and not self._buffer: + return True + + elif hasattr(self._fp, 'complete'): + return self._fp.complete + + else: + return False + + def fileno(self): + # This method is required for `io` module compatibility. + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + + else: + raise IOError( + "The file-like object this HTTPResponse is wrapped " + "around has no file descriptor" + ) + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + + else: + b[:len(temp)] = temp + return len(temp) diff --git a/requests3/core/http_manager/_backends/__init__.py b/requests3/core/http_manager/_backends/__init__.py new file mode 100644 index 00000000..dbcc879d --- /dev/null +++ b/requests3/core/http_manager/_backends/__init__.py @@ -0,0 +1,9 @@ +from ..packages import six +from .sync_backend import SyncBackend + +__all__ = ['SyncBackend'] +if six.PY3: + from .trio_backend import TrioBackend + + from .twisted_backend import TwistedBackend + __all__ += ['TrioBackend', 'TwistedBackend'] diff --git a/requests3/core/http_manager/_backends/_common.py b/requests3/core/http_manager/_backends/_common.py new file mode 100644 index 00000000..62ef8397 --- /dev/null +++ b/requests3/core/http_manager/_backends/_common.py @@ -0,0 +1,29 @@ +from ..util import selectors + +__all__ = ["DEFAULT_SELECTOR", "is_readable", "LoopAbort"] +# We only ever select on 1 fd at a time, so there's no point in messing around +# with epoll/kqueue. But we do want to use PollSelector on platforms that have +# it (= everything except Windows), since it has no limit on the numerical +# value of the fds it accepts. On Windows, we use SelectSelector, but that's +# OK, because on Windows select also has no limit on the numerical value of +# the handles it accepts. +try: + selectors.PollSelector().select(timeout=0) +except (OSError, AttributeError): + DEFAULT_SELECTOR = selectors.SelectSelector +else: + DEFAULT_SELECTOR = selectors.PollSelector + + +def is_readable(sock): + s = DEFAULT_SELECTOR() + s.register(sock, selectors.EVENT_READ) + events = s.select(timeout=0) + return bool(events) + + +class LoopAbort(Exception): + """ + Tell backends that enough bytes have been consumed + """ + pass diff --git a/requests3/core/http_manager/_backends/sync_backend.py b/requests3/core/http_manager/_backends/sync_backend.py new file mode 100644 index 00000000..6332ff42 --- /dev/null +++ b/requests3/core/http_manager/_backends/sync_backend.py @@ -0,0 +1,136 @@ +import errno +import select +import socket +import ssl +from ..util.connection import create_connection +from ..util.ssl_ import ssl_wrap_socket +from ..util import selectors + +from ._common import DEFAULT_SELECTOR, is_readable, LoopAbort + +__all__ = ["SyncBackend"] +BUFSIZE = 65536 + + +class SyncBackend(object): + + def __init__(self, connect_timeout=None, read_timeout=None): + self._connect_timeout = connect_timeout + self._read_timeout = read_timeout + + def connect(self, host, port, source_address=None, socket_options=None): + conn = create_connection( + (host, port), + self._connect_timeout, + source_address=source_address, + socket_options=socket_options, + ) + return SyncSocket(conn, self._read_timeout) + + +class SyncSocket(object): + + def __init__(self, sock, read_timeout): + self._sock = sock + self._read_timeout = read_timeout + # We keep the socket in non-blocking mode, except during connect() and + # during the SSL handshake: + self._sock.setblocking(False) + + def start_tls(self, server_hostname, ssl_context): + self._sock.setblocking(True) + wrapped = ssl_wrap_socket( + self._sock, + server_hostname=server_hostname, + ssl_context=ssl_context, + ) + wrapped.setblocking(False) + return SyncSocket(wrapped, self._read_timeout) + + + # Only for SSL-wrapped sockets + def getpeercert(self, binary=False): + return self._sock.getpeercert(binary_form=binary) + + def _wait(self, readable, writable): + assert readable or writable + s = DEFAULT_SELECTOR() + flags = 0 + if readable: + flags |= selectors.EVENT_READ + if writable: + flags |= selectors.EVENT_WRITE + s.register(self._sock, flags) + events = s.select(timeout=self._read_timeout) + if not events: + raise socket.timeout("XX FIXME timeout happened") + + _, event = events[0] + return (event & selectors.EVENT_READ, event & selectors.EVENT_WRITE) + + def receive_some(self): + while True: + try: + return self._sock.recv(BUFSIZE) + + except ssl.SSLWantReadError: + self._wait(readable=True, writable=False) + except ssl.SSLWantWriteError: + self._wait(readable=False, writable=True) + except (OSError, socket.error) as exc: + if exc.errno in (errno.EWOULDBLOCK, errno.EAGAIN): + self._wait(readable=True, writable=False) + else: + raise + + def send_and_receive_for_a_while(self, produce_bytes, consume_bytes): + outgoing_finished = False + outgoing = b"" + try: + while True: + if not outgoing_finished and not outgoing: + # Can exit loop here with error + b = produce_bytes() + if b is None: + outgoing = None + outgoing_finished = True + else: + outgoing = memoryview(b) + want_read = False + want_write = False + try: + incoming = self._sock.recv(BUFSIZE) + except ssl.SSLWantReadError: + want_read = True + except ssl.SSLWantWriteError: + want_write = True + except (OSError, socket.error) as exc: + if exc.errno in (errno.EWOULDBLOCK, errno.EAGAIN): + want_read = True + else: + # Can exit loop here with LoopAbort + consume_bytes(incoming) + if not outgoing_finished: + try: + sent = self._sock.send(outgoing) + outgoing = outgoing[sent:] + except ssl.SSLWantReadError: + want_read = True + except ssl.SSLWantWriteError: + want_write = True + except (OSError, socket.error) as exc: + if exc.errno in (errno.EWOULDBLOCK, errno.EAGAIN): + want_write = True + if want_read or want_write: + self._wait(want_read, want_write) + except LoopAbort: + pass + + def forceful_close(self): + self._sock.close() + + def is_readable(self): + return is_readable(self._sock) + + def set_readable_watch_state(self, enabled): + pass diff --git a/requests3/core/http_manager/_backends/trio_backend.py b/requests3/core/http_manager/_backends/trio_backend.py new file mode 100644 index 00000000..c2af2138 --- /dev/null +++ b/requests3/core/http_manager/_backends/trio_backend.py @@ -0,0 +1,102 @@ +import trio + +from ._common import is_readable, LoopAbort + +BUFSIZE = 65536 + + +class TrioBackend: + + async def connect( + self, host, port, source_address=None, socket_options=None + ): + if source_address is not None: + # You can't really combine source_address= and happy eyeballs + # (can we get rid of source_address? or at least make it a source + # ip, no port?) + raise NotImplementedError( + "trio backend doesn't support setting source_address" + ) + + stream = await trio.open_tcp_stream(host, port) + for (level, optname, value) in socket_options: + stream.setsockopt(level, optname, value) + return TrioSocket(stream) + + def __len__(self): + return 1 + + def __gt__(self, other): + return len(self) > other + + + + +# XX it turns out that we don't need SSLStream to be robustified against +# cancellation, but we probably should do something to detect when the stream +# has been broken by cancellation (e.g. a timeout) and make is_readable return +# True so the connection won't be reused. +class TrioSocket: + + def __init__(self, stream): + self._stream = stream + + async def start_tls(self, server_hostname, ssl_context): + wrapped = trio.ssl.SSLStream( + self._stream, + ssl_context, + server_hostname=server_hostname, + https_compatible=True, + ) + return TrioSocket(wrapped) + + def getpeercert(self, binary=False): + return self._stream.getpeercert(binary=binary) + + async def receive_some(self): + return await self._stream.receive_some(BUFSIZE) + + async def send_and_receive_for_a_while(self, produce_bytes, consume_bytes): + + async def sender(): + while True: + outgoing = await produce_bytes() + if outgoing is None: + break + + await self._stream.send_all(outgoing) + + async def receiver(): + while True: + incoming = await self._stream.receive_some(BUFSIZE) + consume_bytes(incoming) + + try: + async with trio.open_nursery() as nursery: + nursery.start_soon(sender) + nursery.start_soon(receiver) + except LoopAbort: + pass + + + # Pull out the underlying trio socket, because it turns out HTTP is not so + # great at respecting abstraction boundaries. + def _socket(self): + stream = self._stream + # Strip off any layers of SSLStream + while hasattr(stream, "transport_stream"): + stream = stream.transport_stream + # Now we have a SocketStream + return stream.socket + + + # We want this to be synchronous, and don't care about graceful teardown + # of the SSL/TLS layer. + def forceful_close(self): + self._socket().close() + + def is_readable(self): + return is_readable(self._socket()) + + def set_readable_watch_state(self, enabled): + pass diff --git a/requests3/core/http_manager/_backends/twisted_backend.py b/requests3/core/http_manager/_backends/twisted_backend.py new file mode 100644 index 00000000..974b0dca --- /dev/null +++ b/requests3/core/http_manager/_backends/twisted_backend.py @@ -0,0 +1,272 @@ +import socket +import OpenSSL.crypto +from twisted.internet import protocol, ssl +from twisted.internet.interfaces import IHandshakeListener +from twisted.internet.endpoints import HostnameEndpoint, connectProtocol +from twisted.internet.defer import ( + Deferred, DeferredList, CancelledError, ensureDeferred +) +from zope.interface import implementer + +from ..contrib.pyopenssl import get_subj_alt_name +from ._common import LoopAbort + + + +# XX need to add timeout support, esp. on connect +class TwistedBackend: + + def __init__(self, reactor): + self._reactor = reactor + + async def connect( + self, host, port, source_address=None, socket_options=None + ): + # HostnameEndpoint only supports setting source host, not source port + if source_address is not None: + raise NotImplementedError( + "twisted backend doesn't support setting source_address" + ) + + # factory = protocol.Factory.forProtocol(TwistedSocketProtocol) + endpoint = HostnameEndpoint(self._reactor, host, port) + d = connectProtocol(endpoint, TwistedSocketProtocol()) + # XX d.addTimeout(...) + protocol = await d + if socket_options is not None: + for opt in socket_options: + if opt[:2] == (socket.IPPROTO_TCP, socket.TCP_NODELAY): + protocol.transport.setTcpNoDelay(opt[2]) + else: + raise NotImplementedError( + "unrecognized socket option for twisted backend" + ) + + return TwistedSocket(protocol) + + + + +# enums +class _DATA_RECEIVED: + pass + + +class _RESUME_PRODUCING: + pass + + +class _HANDSHAKE_COMPLETED: + pass + + +@implementer(IHandshakeListener) +class TwistedSocketProtocol(protocol.Protocol): + + def connectionMade(self): + self._receive_buffer = bytearray() + self.transport.pauseProducing() + self.transport.registerProducer(self, True) + self._producing = True + self._readable_watch_state_enabled = False + self._is_readable = False + self._events = {} + self._connection_lost = False + + def _signal(self, event): + if event in self._events: + # The first thing callback() will do is remove the deferred from + # self._events (see cleanup() in _wait_for() below). + self._events[event].callback(None) + + async def _wait_for(self, event): + assert event not in self._events + d = Deferred() + + # We might get callbacked, we might get cancelled; either way we want + # to clean up then pass through the result: + def cleanup(obj): + assert self._events[event] is d + del self._events[event] + return obj + + d.addBoth(cleanup) + self._events[event] = d + await d + + def dataReceived(self, data): + if self._readable_watch_state_enabled: + self._is_readable = True + self.transport.pauseProducing() + return + + self._receive_buffer += data + self._signal(_DATA_RECEIVED) + + def connectionLost(self, reason): + if self._readable_watch_state_enabled: + self._is_readable = True + self.transport.pauseProducing() + return + + self._connection_lost = True + self._signal(_DATA_RECEIVED) + + def pauseProducing(self): + self._producing = False + + def resumeProducing(self): + self._producing = True + self._signal(_RESUME_PRODUCING) + + def stopProducing(self): + pass + + def handshakeCompleted(self): + self._signal(_HANDSHAKE_COMPLETED) + + async def start_tls(self, server_hostname, ssl_context): + # XX ssl_context? + self.transport.startTLS(ssl.optionsForClientTLS(server_hostname)) + await self._wait_for(_HANDSHAKE_COMPLETED) + + async def receive_some(self): + assert not self._readable_watch_state_enabled + while not self._receive_buffer and not self._connection_lost: + self.transport.resumeProducing() + try: + await self._wait_for(_DATA_RECEIVED) + finally: + self.transport.pauseProducing() + got = self._receive_buffer + self._receive_buffer = bytearray() + return got + + async def send_all(self, data): + assert not self._readable_watch_state_enabled + while not self._producing: + await self._wait_for(_RESUME_PRODUCING) + self.transport.write(data) + + def is_readable(self): + assert self._readable_watch_state_enabled + return self._is_readable + + def set_readable_watch_state(self, enabled): + self._readable_watch_state_enabled = enabled + if self._readable_watch_state_enabled: + self.transport.resumeProducing() + else: + self.transport.pauseProducing() + + +class DoubleError(Exception): + + def __init__(self, exc1, exc2): + self.exc1 = exc1 + self.exc2 = exc2 + + def __str__(self): + return "{}, {}".format(self.exc1, self.exc2) + + +class TwistedSocket: + + def __init__(self, protocol): + self._protocol = protocol + + async def start_tls(self, server_hostname, ssl_context): + await self._protocol.start_tls(server_hostname, ssl_context) + + def getpeercert(self, binary=False): + # Cribbed from urllib3.contrib.pyopenssl.WrappedSocket.getpeercert + x509 = self._protocol.transport.getPeerCertificate() + if not x509: + return x509 + + if binary: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, x509 + ) + + return { + "subject": ((("commonName", x509.get_subject().CN),),), + "subjectAltName": get_subj_alt_name(x509), + } + + async def receive_some(self): + return await self._protocol.receive_some() + + async def send_and_receive_for_a_while(self, produce_bytes, consume_bytes): + + async def sender(): + while True: + outgoing = await produce_bytes() + if outgoing is None: + break + + await self._protocol.send_all(outgoing) + + async def receiver(): + while True: + incoming = await self._protocol.receive_some() + try: + consume_bytes(incoming) + except LoopAbort: + break + + # Run the two async functions concurrently + send_loop = ensureDeferred(sender()) + receive_loop = ensureDeferred(receiver()) + + # If the send_loop errors out, then cancel receive_loop and preserve + # the failure + @send_loop.addErrback + def send_loop_errback(failure): + receive_loop.cancel() + return failure + + + # If the receive_loop errors out *or* exits cleanly due to LoopAbort, + # then cancel the send_loop and preserve the result + @receive_loop.addBoth + def receive_loop_allback(result): + send_loop.cancel() + return result + + # Wait for both to finish, and then figure out if we need to raise an + # exception. + results = await DeferredList([send_loop, receive_loop]) + # First, find the failure objects - but since we've almost always + # cancelled one of the deferreds, which causes it to raise + # CancelledError, we can't treat these at face value. + failures = [] + for success, result in results: + if not success: + failures.append(result) + # First, loop over and remove at most 1 CancelledError, since that's + # the most that we ever generate. (If *we* were cancelled, then there + # will be 2 CancelledErrors, and that's fine; in that case we want to + # preserve 1 of them and then re-raise it.) + for i in range(len(failures)): + if isinstance(failures[i].value, CancelledError): + del failures[i] + break + + # Now whatever's left is what we need to re-raise + if len(failures) == 0: + return + + elif len(failures) == 1: + failures[0].raiseException() + else: + raise DoubleError(*failures) + + def forceful_close(self): + self._protocol.transport.abortConnection() + + def is_readable(self): + return self._protocol.is_readable() + + def set_readable_watch_state(self, enabled): + return self._protocol.set_readable_watch_state(enabled) diff --git a/requests3/core/http_manager/_collections.py b/requests3/core/http_manager/_collections.py new file mode 100644 index 00000000..8021ae20 --- /dev/null +++ b/requests3/core/http_manager/_collections.py @@ -0,0 +1,334 @@ +from __future__ import absolute_import + +try: + from collections.abc import Mapping, MutableMapping +except ImportError: + from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + + class RLock: + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict +from .exceptions import InvalidHeader +from .packages.six import iterkeys, itervalues, PY3 + +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] +_Null = object() + + +class RecentlyUsedContainer(MutableMapping): + """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. + + :param maxsize: + Maximum number of recent elements to retain. + + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ + ContainerCls = OrderedDict + + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func + self._container = self.ContainerCls() + self.lock = RLock() + + def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. + with self.lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self.lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) + + def __delitem__(self, key): + with self.lock: + value = self._container.pop(key) + if self.dispose_func: + self.dispose_func(value) + + def __len__(self): + with self.lock: + return len(self._container) + + def __iter__(self): + raise NotImplementedError( + 'Iteration over this class is unlikely to be threadsafe.' + ) + + def clear(self): + with self.lock: + # Copy pointers to all values, then wipe the mapping + values = list(itervalues(self._container)) + self._container.clear() + if self.dispose_func: + for value in values: + self.dispose_func(value) + + def keys(self): + with self.lock: + return list(iterkeys(self._container)) + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + """ + + def __init__(self, headers=None, **kwargs): + super(HTTPHeaderDict, self).__init__() + self._container = OrderedDict() + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + self._container[key.lower()] = [key, val] + return self._container[key.lower()] + + def __getitem__(self, key): + val = self._container[key.lower()] + return ', '.join(val[1:]) + + def __delitem__(self, key): + del self._container[key.lower()] + + def __contains__(self, key): + return key.lower() in self._container + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + + if not isinstance(other, type(self)): + other = type(self)(other) + return ( + dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged()) + ) + + def __ne__(self, other): + return not self.__eq__(other) + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + __marker = object() + + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + + return default + + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + key_lower = key.lower() + new_vals = [key, val] + # Keep the common case aka no item present as fast as possible + vals = self._container.setdefault(key_lower, new_vals) + if new_vals is not vals: + vals.append(val) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError( + "extend() takes at most 1 positional " + "arguments ({0} given)".format(len(args)) + ) + + other = args[0] if len(args) >= 1 else () + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + for key, value in kwargs.items(): + self.add(key, value) + + def getlist(self, key, default=__marker): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + try: + vals = self._container[key.lower()] + except KeyError: + if default is self.__marker: + return [] + + return default + + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist + # Backwards compatibility for http.cookiejar + get_all = getlist + + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = other.getlist(key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + self._container[key.lower()] = [key] + val + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = self._container[key.lower()] + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = self._container[key.lower()] + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + obs_fold_continued_leaders = (' ', '\t') + headers = [] + for line in message.headers: + if line.startswith(obs_fold_continued_leaders): + if not headers: + # We received a header line that starts with OWS as described + # in RFC-7230 S3.2.4. This indicates a multiline header, but + # there exists no previous header to which we can attach it. + raise InvalidHeader( + 'Header continuation with no previous header: %s' % + line + ) + + else: + key, value = headers[-1] + headers[-1] = (key, value + ' ' + line.strip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + return cls(headers) diff --git a/requests3/core/http_manager/_sync/__init__.py b/requests3/core/http_manager/_sync/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/requests3/core/http_manager/_sync/__init__.py diff --git a/requests3/core/http_manager/_sync/connection.py b/requests3/core/http_manager/_sync/connection.py new file mode 100644 index 00000000..fbfa5ab9 --- /dev/null +++ b/requests3/core/http_manager/_sync/connection.py @@ -0,0 +1,526 @@ +# -*- coding: utf-8 -*- +""" +This module implements the connection management logic. + +Unlike in http.client, the connection here is an object that is responsible +for a very small number of tasks: + + 1. Serializing/deserializing data to/from the network. + 2. Being able to do basic parsing of HTTP and maintaining the framing. + 3. Understanding connection state. + +This object knows very little about the semantics of HTTP in terms of how to +construct HTTP requests and responses. It mostly manages the socket itself. +""" +from __future__ import absolute_import + +import collections +import datetime +import socket +import warnings + +import h11 + +from ..base import Request, Response +from ..exceptions import ( + ConnectTimeoutError, + NewConnectionError, + SubjectAltNameWarning, + SystemTimeWarning, + BadVersionError, + FailedTunnelError, + InvalidBodyError, + ProtocolError, +) +from ..packages import six +from ..util import ssl_ as ssl_util +from .._backends import SyncBackend +from .._backends._common import LoopAbort + +try: + import ssl +except ImportError: + ssl = None +# When updating RECENT_DATE, move it to +# within two years of the current date, and no +# earlier than 6 months ago. +RECENT_DATE = datetime.date(2016, 1, 1) +_SUPPORTED_VERSIONS = frozenset([b'1.0', b'1.1']) +# A sentinel object returned when some syscalls return EAGAIN. +_EAGAIN = object() + + +def _headers_to_native_string(headers): + """ + A temporary shim to convert received headers to native strings, to match + the behaviour of httplib. We will reconsider this later in the process. + """ + # TODO: revisit. + # This works because fundamentally we know that all headers coming from + # h11 are bytes, so if they aren't of type `str` then we must be on Python + # 3 and need to decode the headers using Latin1. + for n, v in headers: + if not isinstance(n, str): + n = n.decode('latin1') + if not isinstance(v, str): + v = v.decode('latin1') + yield (n, v) + + +def _stringify_headers(headers): + """ + A generator that transforms headers so they're suitable for sending by h11. + """ + # TODO: revisit + for name, value in headers: + if isinstance(name, six.text_type): + name = name.encode('ascii') + if isinstance(value, six.text_type): + value = value.encode('latin-1') + elif isinstance(value, int): + value = str(value).encode('ascii') + yield (name, value) + + +def _read_readable(readable): + # TODO: reconsider this block size + blocksize = 8192 + while True: + datablock = readable.read(blocksize) + if not datablock: + break + + yield datablock + + + + +# XX this should return an async iterator +def _make_body_iterable(body): + """ + This function turns all possible body types that urllib3 supports into an + iterable of bytes. The goal is to expose a uniform structure to request + bodies so that they all appear to be identical to the low-level code. + + The basic logic here is: + - byte strings are turned into single-element lists + - readables are wrapped in an iterable that repeatedly calls read until + nothing is returned anymore + - other iterables are used directly + - anything else is not acceptable + + In particular, note that we do not support *text* data of any kind. This + is deliberate: users must make choices about the encoding of the data they + use. + """ + if body is None: + return [] + + elif isinstance(body, six.binary_type): + return [body] + + elif hasattr(body, "read"): + return _read_readable(body) + + elif isinstance(body, collections.Iterable) and not isinstance( + body, six.text_type + ): + return body + + else: + raise InvalidBodyError("Unacceptable body type: %s" % type(body)) + + + + +# XX this should return an async iterator +def _request_bytes_iterable(request, state_machine): + """ + An iterable that serialises a set of bytes for the body. + """ + h11_request = h11.Request( + method=request.method, + target=request.target, + headers=_stringify_headers(request.headers.items()), + ) + yield state_machine.send(h11_request) + + for chunk in _make_body_iterable(request.body): + yield state_machine.send(h11.Data(data=chunk)) + + yield state_machine.send(h11.EndOfMessage()) + + +def _response_from_h11(h11_response, body_object): + """ + Given a h11 Response object, build a urllib3 response object and return it. + """ + if h11_response.http_version not in _SUPPORTED_VERSIONS: + raise BadVersionError(h11_response.http_version) + + version = b'HTTP/' + h11_response.http_version + our_response = Response( + status_code=h11_response.status_code, + headers=_headers_to_native_string(h11_response.headers), + body=body_object, + version=version, + ) + return our_response + + +def _build_tunnel_request(host, port, headers): + """ + Builds a urllib3 Request object that is set up correctly to request a proxy + to establish a TCP tunnel to the remote host. + """ + target = "%s:%d" % (host, port) + if not isinstance(target, bytes): + target = target.encode('latin1') + tunnel_request = Request(method=b"CONNECT", target=target, headers=headers) + tunnel_request.add_host(host=host, port=port, scheme='http') + return tunnel_request + + +def _start_http_request(request, state_machine, conn): + """ + Send the request using the given state machine and connection, wait + for the response headers, and return them. + + If we get response headers early, then we stop sending and return + immediately, poisoning the state machine along the way so that we know + it can't be re-used. + + This is a standalone function because we use it both to set up both + CONNECT requests and real requests. + """ + # Before we begin, confirm that the state machine is ok. + if ( + state_machine.our_state is not h11.IDLE or + state_machine.their_state is not h11.IDLE + ): + raise ProtocolError("Invalid internal state transition") + + request_bytes_iterable = _request_bytes_iterable(request, state_machine) + # Hack around Python 2 lack of nonlocal + context = {'send_aborted': True, 'h11_response': None} + + def next_bytes_to_send(): + try: + return next(request_bytes_iterable) + + except StopIteration: + # We successfully sent the whole body! + context['send_aborted'] = False + return None + + def consume_bytes(data): + state_machine.receive_data(data) + while True: + event = state_machine.next_event() + if event is h11.NEED_DATA: + break + + elif isinstance(event, h11.InformationalResponse): + # Ignore 1xx responses + continue + + elif isinstance(event, h11.Response): + # We have our response! Save it and get out of here. + context['h11_response'] = event + raise LoopAbort + + else: + # Can't happen + raise RuntimeError("Unexpected h11 event {}".format(event)) + + conn.send_and_receive_for_a_while(next_bytes_to_send, consume_bytes) + assert context['h11_response'] is not None + if context['send_aborted']: + # Our state machine thinks we sent a bunch of data... but maybe we + # didn't! Maybe our send got cancelled while we were only half-way + # through sending the last chunk, and then h11 thinks we sent a + # complete request and we actually didn't. Then h11 might think we can + # re-use this connection, even though we can't. So record this in + # h11's state machine. + # XX need to implement this in h11 + # state_machine.poison() + # XX kluge for now + state_machine._cstate.process_error(state_machine.our_role) + return context['h11_response'] + + +def _read_until_event(state_machine, conn): + """ + A loop that keeps issuing reads and feeding the data into h11 and + checking whether h11 has an event for us. The moment there is an event + other than h11.NEED_DATA, this function returns that event. + """ + while True: + event = state_machine.next_event() + if event is not h11.NEED_DATA: + return event + + state_machine.receive_data(conn.receive_some()) + + +_DEFAULT_SOCKET_OPTIONS = object() + + +class HTTP1Connection(object): + """ + A wrapper around a single HTTP/1.1 connection. + + This wrapper manages connection state, ensuring that connections are + appropriately managed throughout the lifetime of a HTTP transaction. In + particular, this object understands the conditions in which connections + should be torn down, and also manages sending data and handling early + responses. + + This object can be iterated over to return the response body. When iterated + over it will return all of the data that is currently buffered, and if no + data is buffered it will issue one read syscall and return all of that + data. Buffering of response data must happen at a higher layer. + """ + # : Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + def __init__( + self, + host, + port, + backend=None, + socket_options=_DEFAULT_SOCKET_OPTIONS, + source_address=None, + tunnel_host=None, + tunnel_port=None, + tunnel_headers=None, + ): + self.is_verified = False + self._backend = backend or SyncBackend() + self._host = host + self._port = port + self._socket_options = ( + socket_options if socket_options is not _DEFAULT_SOCKET_OPTIONS else self.default_socket_options + ) + self._source_address = source_address + self._tunnel_host = tunnel_host + self._tunnel_port = tunnel_port + self._tunnel_headers = tunnel_headers + self._sock = None + self._state_machine = h11.Connection(our_role=h11.CLIENT) + + def _wrap_socket( + self, conn, ssl_context, fingerprint, assert_hostname + ): + """ + Handles extra logic to wrap the socket in TLS magic. + """ + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors' + ).format( + RECENT_DATE + ), + SystemTimeWarning, + ) + # XX need to know whether this is the proxy or the final host that + # we just did a handshake with! + check_host = assert_hostname or self._tunnel_host or self._host + # Stripping trailing dots from the hostname is important because + # they indicate that this host is an absolute name (for DNS + # lookup), but are irrelevant to SSL hostname matching and in fact + # will break it. + check_host = check_host.rstrip(".") + conn = conn.start_tls(check_host, ssl_context) + if fingerprint: + ssl_util.assert_fingerprint( + conn.getpeercert(binary_form=True), fingerprint + ) + elif ( + ssl_context.verify_mode != ssl.CERT_NONE and + assert_hostname is not False + ): + cert = conn.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn( + ( + 'Certificate for {0} has no `subjectAltName`, falling ' + 'back to check for a `commonName` for now. This ' + 'feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See ' + 'https://github.com/shazow/urllib3/issues/497 for ' + 'details.)'.format(self._host) + ), + SubjectAltNameWarning, + ) + ssl_util.match_hostname(cert, check_host) + self.is_verified = ( + ssl_context.verify_mode == ssl.CERT_REQUIRED and + (assert_hostname is not False or fingerprint) + ) + return conn + + def send_request(self, request, read_timeout): + """ + Given a Request object, performs the logic required to get a response. + """ + h11_response = _start_http_request( + request, self._state_machine, self._sock + ) + return _response_from_h11(h11_response, self) + + def _tunnel(self, conn): + """ + This method establishes a CONNECT tunnel shortly after connection. + """ + # Basic sanity check that _tunnel is only called at appropriate times. + assert self._state_machine.our_state is h11.IDLE + tunnel_request = _build_tunnel_request( + self._tunnel_host, self._tunnel_port, self._tunnel_headers + ) + tunnel_state_machine = h11.Connection(our_role=h11.CLIENT) + h11_response = _start_http_request( + tunnel_request, tunnel_state_machine, conn + ) + # XX this is wrong -- 'self' here will try to iterate using + # self._state_machine, not tunnel_state_machine. Also, we need to + # think about how this failure case interacts with the pool's + # connection lifecycle management. + tunnel_response = _response_from_h11(h11_response, self) + if h11_response.status_code != 200: + conn.forceful_close() + raise FailedTunnelError( + "Unable to establish CONNECT tunnel", tunnel_response + ) + + def connect( + self, + ssl_context=None, + fingerprint=None, + assert_hostname=None, + connect_timeout=None, + ): + """ + Connect this socket to the server, applying the source address, any + relevant socket options, and the relevant connection timeout. + """ + if self._sock is not None: + # We're already connected, move on. + self._sock.set_readable_watch_state(False) + return + + extra_kw = {} + if self._source_address: + extra_kw['source_address'] = self._source_address + if self._socket_options: + extra_kw['socket_options'] = self._socket_options + # XX pass connect_timeout to backend + # This was factored out into a separate function to allow overriding + # by subclasses, but in the backend approach the way to to this is to + # provide a custom backend. (Composition >> inheritance.) + try: + conn = self._backend.connect( + self._host, self._port, **extra_kw + ) + # XX these two error handling blocks needs to be re-done in a + # backend-agnostic way + except socket.timeout: + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self._host, connect_timeout), + ) + + except socket.error as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + if ssl_context is not None: + if self._tunnel_host is not None: + self._tunnel(conn) + conn = self._wrap_socket( + conn, ssl_context, fingerprint, assert_hostname + ) + # XX We should pick one of these names and use it consistently... + self._sock = conn + + def close(self): + """ + Close this connection. + """ + if self._sock is not None: + # Make sure self._sock is None even if closing raises an exception + sock, self._sock = self._sock, None + sock.forceful_close() + + def is_dropped(self): + """ + Returns True if the connection is closed: returns False otherwise. This + includes closures that do not mark the FD as closed, such as when the + remote peer has sent EOF but we haven't read it yet. + + Pre-condition: _reset must have been called. + """ + if self._sock is None: + return True + + # We check for droppedness by checking the socket for readability. If + # it's not readable, it's not dropped. If it is readable, then we + # assume that the thing we'd read from the socket is EOF. It might not + # be, but if it's not then the server has busted its HTTP/1.1 framing + # and so we want to drop the connection anyway. + return self._sock.is_readable() + + def _reset(self): + """ + Called once we hit EndOfMessage, and checks whether we can re-use this + state machine and connection or not, and if not, closes the socket and + state machine. + """ + try: + self._state_machine.start_next_cycle() + except h11.LocalProtocolError: + # Not re-usable + self.close() + else: + # This connection can be returned to the connection pool, and + # eventually we'll take it out again and want to know if it's been + # dropped. + self._sock.set_readable_watch_state(True) + + @property + def complete(self): + """ + XX what is this supposed to do? check if the response has been fully + iterated over? check for that + the connection being reusable? + """ + our_state = self._state_machine.our_state + their_state = self._state_machine.their_state + return (our_state is h11.IDLE and their_state is h11.IDLE) + + def __iter__(self): + return self + + def next(self): # Needed for Python 2 as __anext__ becomes __next__ + return self.__next__() + + def __next__(self): + """ + Iterate over the body bytes of the response until end of message. + """ + event = _read_until_event(self._state_machine, self._sock) + if isinstance(event, h11.Data): + return bytes(event.data) + + elif isinstance(event, h11.EndOfMessage): + self._reset() + raise StopIteration + + else: + # can't happen + raise RuntimeError("Unexpected h11 event {}".format(event)) diff --git a/requests3/core/http_manager/_sync/connectionpool.py b/requests3/core/http_manager/_sync/connectionpool.py new file mode 100644 index 00000000..e0bf5290 --- /dev/null +++ b/requests3/core/http_manager/_sync/connectionpool.py @@ -0,0 +1,891 @@ +from __future__ import absolute_import +import errno +import logging +import sys +import warnings + +from socket import error as SocketError, timeout as SocketTimeout +import socket + +import h11 + + +from ..base import Request, DEFAULT_PORTS +from ..exceptions import ( + ClosedPoolError, + ProtocolError, + EmptyPoolError, + LocationValueError, + MaxRetryError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, + InsecureRequestWarning, + NewConnectionError, +) +from ..packages.ssl_match_hostname import CertificateError +from ..packages import six +from ..packages.six.moves import queue +from ..request import RequestMethods +from .response import HTTPResponse +from .connection import HTTP1Connection + +from ..util.connection import is_connection_dropped +from ..util.request import set_file_position +from ..util.retry import Retry +from ..util.ssl_ import ( + create_urllib3_context, + merge_context_settings, + resolve_ssl_version, + resolve_cert_reqs, + BaseSSLError, +) +from ..util.timeout import Timeout +from ..util.url import get_host, Url + +try: + import ssl +except ImportError: + ssl = None +if six.PY2: + # Queue is imported for side effects on MS Windows + import Queue as _unused_module_Queue # noqa: F401 +xrange = six.moves.xrange +log = logging.getLogger(__name__) +_Default = object() + + +def _add_transport_headers(headers): + """ + Adds the transport framing headers, if needed. Naturally, this method + cannot add a content-length header, so if there is no content-length header + then it will add Transfer-Encoding: chunked instead. Should only be called + if there is a body to upload. + + This should be a bit smarter: in particular, it should allow for bad or + unexpected versions of these headers, particularly transfer-encoding. + """ + transfer_headers = ('content-length', 'transfer-encoding') + for header_name in headers: + if header_name.lower() in transfer_headers: + return + + headers['transfer-encoding'] = 'chunked' + + +def _build_context( + context, keyfile, certfile, cert_reqs, ca_certs, ca_cert_dir, ssl_version +): + """ + Creates a urllib3 context suitable for a given request based on a + collection of possible properties of that context. + """ + if context is None: + context = create_urllib3_context( + ssl_version=resolve_ssl_version(ssl_version), + cert_reqs=resolve_cert_reqs(cert_reqs), + ) + context = merge_context_settings( + context, + keyfile=keyfile, + certfile=certfile, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + ca_cert_dir=ca_cert_dir, + ) + return context + + + + +# Pool objects +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + scheme = None + QueueCls = queue.LifoQueue + + def __init__(self, host, port=None): + if not host: + raise LocationValueError("No host specified.") + + self.host = _ipv6_host(host).lower() + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % ( + type(self).__name__, self.host, self.port + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + pass + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + scheme = 'http' + ConnectionCls = HTTP1Connection + ResponseCls = HTTPResponse + + def __init__( + self, + host, + port=None, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + **conn_kw + ): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries + self.pool = self.QueueCls(maxsize) + self.block = block + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + + def _new_conn(self): + """ + Return a fresh connection. + """ + self.num_connections += 1 + + # TODO: Huge hack. + for kw in ('strict',): + if kw in self.conn_kw: + self.conn_kw.pop(kw) + + log.debug( + "Starting new HTTP connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "80", + ) + conn = self.ConnectionCls( + host=self.host, port=self.port, ** self.conn_kw + ) + return conn + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") + + except queue.Empty: + if self.block: + raise EmptyPoolError( + self, + "Pool reached maximum size and no more " + "connections are allowed.", + ) + + pass # Oh well, we'll create a new connection then + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # This should never happen if self.block == True + log.warning( + "Connection pool is full, discarding connection: %s", self.host + ) + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _start_conn(self, conn, connect_timeout): + """ + Called right before a request is made, after the socket is created. + """ + conn.connect(connect_timeout=connect_timeout) + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + if isinstance(err, SocketTimeout): + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + # TODO: Can we remove this? + if 'timed out' in str(err) or 'did not complete (read)' in str( + err + ): # Python 2.6 + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + def _make_request( + self, conn, method, url, timeout=_Default, body=None, headers=None + ): + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. + """ + self.num_requests += 1 + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + # Trigger any extra validation we need to do. + try: + self._start_conn(conn, timeout_obj.connect_timeout) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # TODO: We need to encapsulate our proxy logic in here somewhere. + request = Request( + method=method, target=url, headers=headers, body=body + ) + host = self.host + port = self.port + scheme = self.scheme + request.add_host(host, port, scheme) + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout + ) + + if read_timeout is Timeout.DEFAULT_TIMEOUT: + read_timeout = socket.getdefaulttimeout() + # Receive the response from the server + try: + response = conn.send_request( + request, read_timeout=read_timeout + ) + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug( + "%s://%s:%s \"%s %s %s\" %s", + self.scheme, + self.host, + self.port, + method, + url, + http_version, + response.status_code, + ) + return response + + def _absolute_url(self, path): + return Url( + scheme=self.scheme, host=self.host, port=self.port, path=path + ).url + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + if self.pool is None: + return + + # Disable access to the pool + old_pool, self.pool = self.pool, None + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + except queue.Empty: + pass # Done. + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith('/'): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + host = _ipv6_host(host).lower() + # Use explicit default port for comparison when none is given + if self.port and not port: + port = DEFAULT_PORTS.get(scheme) + elif not self.port and port == DEFAULT_PORTS.get(scheme): + port = None + return (scheme, host, port) == (self.scheme, self.host, self.port) + + def urlopen( + self, + method, + url, + body=None, + headers=None, + retries=None, + timeout=_Default, + pool_timeout=None, + body_pos=None, + **response_kw + ): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`request`. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + + :param \\**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, default=self.retries, redirect=False + ) + conn = None + # Track whether `conn` needs to be released before + # returning/raising/recursing. + release_this_conn = False + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + if body is not None: + _add_transport_headers(headers) + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + # Make the request on the base connection object. + base_response = self._make_request( + conn, + method, + url, + timeout=timeout_obj, + body=body, + headers=headers, + ) + # Pass method to Response for length checking + response_kw['request_method'] = method + # Import httplib's response into our own wrapper object + response = self.ResponseCls.from_base( + base_response, pool=self, retries=retries, **response_kw + ) + # Everything went great! + clean_exit = True + except queue.Empty: + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") + + except ( + TimeoutError, + SocketError, + ProtocolError, + h11.ProtocolError, + BaseSSLError, + SSLError, + CertificateError, + ) as e: + # Discard the connection for these exceptions. It will be + # replaced during the next _get_conn() call. + clean_exit = False + if isinstance(e, (BaseSSLError, CertificateError)): + e = SSLError(e) + elif isinstance( + e, (SocketError, NewConnectionError) + ) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, h11.ProtocolError)): + e = ProtocolError('Connection aborted.', e) + retries = retries.increment( + method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] + ) + retries.sleep() + # Keep track of the error for the retry warning. + err = e + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + conn = conn and conn.close() + release_this_conn = True + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + self._put_conn(conn) + if not conn: + # Try again + log.warning( + "Retrying (%r) after connection " "broken by '%r': %s", + retries, + err, + url, + ) + return self.urlopen( + method, + url, + body, + headers, + retries, + timeout=timeout, + pool_timeout=pool_timeout, + body_pos=body_pos, + **response_kw + ) + + def drain_and_release_conn(response): + try: + # discard any remaining response body, the connection will be + # released back to the pool once the entire response is read + response.read() + except ( + TimeoutError, + SocketError, + ProtocolError, + BaseSSLError, + SSLError, + ) as e: + pass + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.getheader('Retry-After')) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment( + method, url, response=response, _pool=self + ) + except MaxRetryError: + if retries.raise_on_status: + # Drain and release the connection for this response, since + # we're not returning it to be released manually. + drain_and_release_conn(response) + raise + + return response + + # drain and return the connection to the pool before recursing + drain_and_release_conn(response) + retries.sleep(response) + log.debug("Retry: %s", url) + return self.urlopen( + method, + url, + body, + headers, + retries=retries, + timeout=timeout, + pool_timeout=pool_timeout, + body_pos=body_pos, + **response_kw + ) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:`.HTTPSConnection`. + + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is + available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + scheme = 'https' + + def __init__( + self, + host, + port=None, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + key_file=None, + cert_file=None, + cert_reqs=None, + ca_certs=None, + ssl_version=None, + assert_hostname=None, + assert_fingerprint=None, + ca_cert_dir=None, + ssl_context=None, + **conn_kw + ): + HTTPConnectionPool.__init__( + self, + host, + port, + timeout, + maxsize, + block, + headers, + retries, + _proxy, + _proxy_headers, + **conn_kw + ) + if ssl is None: + raise SSLError("SSL module is not available") + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.ssl_context = _build_context( + ssl_context, + keyfile=key_file, + certfile=cert_file, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + ca_cert_dir=ca_cert_dir, + ssl_version=ssl_version, + ) + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _new_conn(self): + """ + Return a fresh connection. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTPS connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "443", + ) + actual_host = self.host + actual_port = self.port + tunnel_host = None + tunnel_port = None + tunnel_headers = None + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + tunnel_host = self.host + tunnel_port = self.port + tunnel_headers = self.proxy_headers + + # TODO: Huge hack. + for kw in ('strict', 'redirect'): + if kw in self.conn_kw: + self.conn_kw.pop(kw) + + conn = self.ConnectionCls( + host=actual_host, + port=actual_port, + tunnel_host=tunnel_host, + tunnel_port=tunnel_port, + tunnel_headers=tunnel_headers, + ** self.conn_kw + ) + return conn + + def _start_conn(self, conn, connect_timeout): + """ + Called right before a request is made, after the socket is created. + """ + conn.connect( + ssl_context=self.ssl_context, + fingerprint=self.assert_fingerprint, + assert_hostname=self.assert_hostname, + connect_timeout=connect_timeout, + ) + if not conn.is_verified: + warnings.warn( + ( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings' + ), + InsecureRequestWarning, + ) + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + port = port or DEFAULT_PORTS.get(scheme, 80) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def _ipv6_host(host): + """ + Process IPv6 address literals + """ + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + # + # Also if an IPv6 address literal has a zone identifier, the + # percent sign might be URIencoded, convert it back into ASCII + if host.startswith('[') and host.endswith(']'): + host = host.replace('%25', '%').strip('[]') + return host diff --git a/requests3/core/http_manager/_sync/poolmanager.py b/requests3/core/http_manager/_sync/poolmanager.py new file mode 100644 index 00000000..9e0b4af1 --- /dev/null +++ b/requests3/core/http_manager/_sync/poolmanager.py @@ -0,0 +1,446 @@ +from __future__ import absolute_import +import collections +import functools +import logging + +from .._collections import RecentlyUsedContainer +from ..base import DEFAULT_PORTS +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from ..exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown +from ..packages.six.moves.urllib.parse import urljoin +from ..request import RequestMethods +from ..util.url import parse_url +from ..util.request import set_file_position +from ..util.retry import Retry + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] +log = logging.getLogger(__name__) +SSL_KEYWORDS = ( + 'key_file', + 'cert_file', + 'cert_reqs', + 'ca_certs', + 'ssl_version', + 'ca_cert_dir', + 'ssl_context', +) +# All known keyword arguments that could be provided to the pool manager, its +# pools, or the underlying connections. This is used to construct a pool key. +_key_fields = ( + 'key_scheme', # str + 'key_host', # str + 'key_strict', + 'key_port', # int + 'key_timeout', # int or float or Timeout + 'key_retries', # int or Retry + 'key_block', # bool + 'key_source_address', # str + 'key_key_file', # str + 'key_cert_file', # str + 'key_cert_reqs', # str + 'key_ca_certs', # str + 'key_ssl_version', # str + 'key_ca_cert_dir', # str + 'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext + 'key_maxsize', # int + 'key_headers', # dict + 'key__proxy', # parsed proxy url + 'key__proxy_headers', # dict + 'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples + 'key__socks_options', # dict + 'key_assert_hostname', # bool or string + 'key_assert_fingerprint', # str +) +# : The namedtuple class used to construct keys for the connection pool. +#: All custom key schemes should include the fields in this key at a minimum. +PoolKey = collections.namedtuple('PoolKey', _key_fields) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key out of a request context dictionary. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + :type key_class: namedtuple + :param request_context: + A dictionary-like object that contain the context for a request. + :type request_context: dict + + :return: A namedtuple that can be used as a connection pool key. + :rtype: PoolKey + """ + # Since we mutate the dictionary, make a copy first + context = request_context.copy() + context['scheme'] = context['scheme'].lower() + context['host'] = context['host'].lower() + # These are both dictionaries and need to be transformed into frozensets + for key in ('headers', '_proxy_headers', '_socks_options'): + if key in context and context[key] is not None: + context[key] = frozenset(context[key].items()) + # The socket_options key may be a list and needs to be transformed into a + # tuple. + socket_opts = context.get('socket_options') + if socket_opts is not None: + context['socket_options'] = tuple(socket_opts) + # Map the kwargs to the names in the namedtuple - this is necessary since + # namedtuples can't have fields starting with '_'. + for key in list(context.keys()): + context['key_' + key] = context.pop(key) + # Default to ``None`` for keys missing from the context + for field in key_class._fields: + if field not in context: + context[field] = None + return key_class(**context) + + +# : A dictionary that maps a scheme to a callable that creates a pool key. +#: This can be used to alter the way pool keys are constructed, if desired. +#: Each PoolManager makes a copy of this dictionary so they can be configured +#: globally here, or individually on the instance. +key_fn_by_scheme = { + 'http': functools.partial(_default_key_normalizer, PoolKey), + 'https': functools.partial(_default_key_normalizer, PoolKey), +} +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, 'https': HTTPSConnectionPool +} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param \\**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example:: + + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) + 2 + + """ + proxy = None + + def __init__( + self, num_pools=10, headers=None, backend=None, **connection_pool_kw + ): + RequestMethods.__init__(self, headers) + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer( + num_pools, dispose_func=lambda p: p.close() + ) + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + self.backend = backend + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + + def _new_pool(self, scheme, host, port, request_context=None): + """ + Create a new :class:`ConnectionPool` based on host, port, scheme, and + any additional pool keyword arguments. + + If ``request_context`` is provided, it is provided as keyword arguments + to the pool class used. This method is used to actually create the + connection pools handed out by :meth:`connection_from_url` and + companion methods. It is intended to be overridden for customization. + """ + pool_cls = self.pool_classes_by_scheme[scheme] + if request_context is None: + request_context = self.connection_pool_kw.copy() + # Although the context has everything necessary to create the pool, + # this function has historically only used the scheme, host, and port + # in the positional args. When an API change is acceptable these can + # be removed. + for key in ('scheme', 'host', 'port'): + request_context.pop(key, None) + if scheme == 'http': + for kw in SSL_KEYWORDS: + request_context.pop(kw, None) + return pool_cls(host, port, backend=self.backend, **request_context) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host( + self, host, port=None, scheme='http', pool_kwargs=None + ): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is + provided, it is merged with the instance's ``connection_pool_kw`` + variable and used to create the new connection pool, if one is + needed. + """ + if not host: + raise LocationValueError("No host specified.") + + request_context = self._merge_pool_kwargs(pool_kwargs) + request_context['scheme'] = scheme or 'http' + if not port: + port = DEFAULT_PORTS.get(request_context['scheme'].lower(), 80) + request_context['port'] = port + request_context['host'] = host + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context['scheme'].lower() + pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key = pool_key_constructor(request_context) + return self.connection_from_pool_key( + pool_key, request_context=request_context + ) + + def connection_from_pool_key(self, pool_key, request_context=None): + """ + Get a :class:`ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + scheme = request_context['scheme'] + host = request_context['host'] + port = request_context['port'] + pool = self._new_pool( + scheme, host, port, request_context=request_context + ) + self.pools[pool_key] = pool + return pool + + def connection_from_url(self, url, pool_kwargs=None): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url`. + + If ``pool_kwargs`` is not provided and a new pool needs to be + constructed, ``self.connection_pool_kw`` is used to initialize + the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` + is provided, it is used instead. Note that if a new pool does not + need to be created for the request, the provided ``pool_kwargs`` are + not used. + """ + u = parse_url(url) + return self.connection_from_host( + u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs + ) + + def _merge_pool_kwargs(self, override): + """ + Merge a dictionary of override values for self.connection_pool_kw. + + This does not modify self.connection_pool_kw and returns a new dict. + Any keys in the override dictionary with a value of ``None`` are + removed from the merged dictionary. + """ + base_pool_kwargs = self.connection_pool_kw.copy() + if override: + for key, value in override.items(): + if value is None: + try: + del base_pool_kwargs[key] + except KeyError: + pass + else: + base_pool_kwargs[key] = value + return base_pool_kwargs + + def urlopen(self, method, url, redirect=True, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with redirect logic and only sends the request-uri portion of the + ``url``. + + The given ``url`` parameter must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body = kw.get('body') + body_pos = kw.get('body_pos') + kw['body_pos'] = set_file_position(body, body_pos) + if 'headers' not in kw: + kw['headers'] = self.headers + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + # RFC 7231, Section 6.4.4 + if response.status == 303: + method = 'GET' + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + try: + retries = retries.increment( + method, url, response=response, _pool=conn + ) + except MaxRetryError: + if retries.raise_on_redirect: + raise + + return response + + kw['retries'] = retries + kw['redirect'] = redirect + retries.sleep_for_retry(response) + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method, redirect_location, **kw) + + +class ProxyManager(PoolManager): + """ + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param proxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + """ + + def __init__( + self, + proxy_url, + num_pools=10, + headers=None, + proxy_headers=None, + **connection_pool_kw + ): + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % ( + proxy_url.scheme, proxy_url.host, proxy_url.port + ) + proxy = parse_url(proxy_url) + if not proxy.port: + port = DEFAULT_PORTS.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) + + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + + def connection_from_host( + self, host, port=None, scheme='http', pool_kwargs=None + ): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme, pool_kwargs=pool_kwargs + ) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, + self.proxy.port, + self.proxy.scheme, + pool_kwargs=pool_kwargs, + ) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + u = parse_url(url) + if u.scheme == "http": + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) + return super(ProxyManager, self).urlopen( + method, url, redirect=redirect, **kw + ) + + +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/requests3/core/http_manager/_sync/response.py b/requests3/core/http_manager/_sync/response.py new file mode 100644 index 00000000..d3f59556 --- /dev/null +++ b/requests3/core/http_manager/_sync/response.py @@ -0,0 +1,461 @@ +from __future__ import absolute_import +from contextlib import contextmanager +import zlib +import io +import logging +from socket import timeout as SocketTimeout +from socket import error as SocketError + +import h11 + +from .._collections import HTTPHeaderDict +from ..exceptions import (ProtocolError, DecodeError, ReadTimeoutError) +from ..packages.six import string_types as basestring, binary_type +from ..util.ssl_ import BaseSSLError + +log = logging.getLogger(__name__) + + +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + + finally: + self._data = None + + +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + return self._obj.decompress(data) + + +def _get_decoder(mode): + if mode == 'gzip': + return GzipDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + """ + CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__( + self, + body='', + headers=None, + status=0, + version=0, + reason=None, + strict=0, + preload_content=True, + decode_content=True, + original_response=None, + pool=None, + connection=None, + retries=None, + request_method=None, + ): + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + self.retries = retries + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + self._buffer = b'' + if body and isinstance(body, (basestring, binary_type)): + self._body = body + else: + self._fp = body + self._pool = pool + self._connection = connection + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body is not None: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e, + ) + + if flush_decoder and decode_content: + data += self._flush_decoder() + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b'') + return buf + self._decoder.flush() + + return b'' + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except (h11.ProtocolError, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + + except GeneratorExit: + # We swallow GeneratorExit when it is emitted: this allows the + # use of the error checker inside stream() + pass + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + self.close() + # If we hold the original response but it's finished now, we should + # return the connection back to the pool. + # XXX + if False and self._original_response and self._original_response.complete: + self.release_conn() + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + # TODO: refactor this method to better handle buffered output. + # This method is a weird one. We treat this read() like a buffered + # read, meaning that it never reads "short" unless there is an EOF + # condition at work. However, we have a decompressor in play here, + # which means our read() returns decompressed data. + # + # This means the buffer can only meaningfully buffer decompressed data. + # This makes this method prone to over-reading, and forcing too much + # data into the buffer. That's unfortunate, but right now I'm not smart + # enough to come up with a way to solve that problem. + if self._fp is None and not self._buffer: + return b'' + + data = self._buffer + with self._error_catcher(): + if amt is None: + chunks = [] + for chunk in self.stream(decode_content): + chunks.append(chunk) + data += b''.join(chunks) + self._buffer = b'' + # We only cache the body data for simple read calls. + self._body = data + else: + data_len = len(data) + chunks = [data] + streamer = self.stream(decode_content) + while data_len < amt: + try: + chunk = next(streamer) + except StopIteration: + break + + else: + chunks.append(chunk) + data_len += len(chunk) + data = b''.join(chunks) + self._buffer = data[amt:] + data = data[:amt] + return data + + def stream(self, decode_content=None): + """ + A generator wrapper for the read() method. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + # Short-circuit evaluation for exhausted responses. + if self._fp is None: + return + + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + with self._error_catcher(): + for raw_chunk in self._fp: + self._fp_bytes_read += len(raw_chunk) + decoded_chunk = self._decode( + raw_chunk, decode_content, flush_decoder=False + ) + if decoded_chunk: + yield decoded_chunk + + # This branch is speculative: most decoders do not need to flush, + # and so this produces no output. However, it's here because + # anecdotally some platforms on which we do not test (like Jython) + # do require the flush. For this reason, we exclude this from code + # coverage. Happily, the code here is so simple that testing the + # branch we don't enter is basically entirely unnecessary (it's + # just a yield statement). + final_chunk = self._decode(b'', decode_content, flush_decoder=True) + if final_chunk: # Platform-specific: Jython + yield final_chunk + + self._fp = None + + @classmethod + def from_base(ResponseCls, r, **response_kw): + """ + Given an :class:`urllib3.base.Response` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + # TODO: Huge hack. + for kw in ('redirect', 'assert_same_host', 'enforce_content_length'): + if kw in response_kw: + response_kw.pop(kw) + + resp = ResponseCls( + body=r.body, + headers=r.headers, + status=r.status_code, + version=r.version, + original_response=r, + connection=r.body, + **response_kw + ) + return resp + + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + + # Backwards compatibility for http.cookiejar + def info(self): + return self.headers + + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + self._buffer = b'' + self._fp = None + if self._connection: + self._connection.close() + + @property + def closed(self): + # This method is required for `io` module compatibility. + if self._fp is None and not self._buffer: + return True + + elif hasattr(self._fp, 'complete'): + return self._fp.complete + + else: + return False + + def fileno(self): + # This method is required for `io` module compatibility. + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + + else: + raise IOError( + "The file-like object this HTTPResponse is wrapped " + "around has no file descriptor" + ) + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + + else: + b[:len(temp)] = temp + return len(temp) diff --git a/requests3/core/http_manager/base.py b/requests3/core/http_manager/base.py new file mode 100644 index 00000000..1dbe94a6 --- /dev/null +++ b/requests3/core/http_manager/base.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +This module provides the base structure of the Request/Response objects that +urllib3 passes around to manage its HTTP semantic layer. + +These objects are the lowest common denominator: that is, they define the +Request/Response functionality that is always supported by urllib3. This means +they do not include any extra function required for asynchrony: that +functionality is handled elsewhere. Any part of urllib3 is required to be able +to work with one of these objects. +""" +from ._collections import HTTPHeaderDict + +# This dictionary is used to store the default ports for specific schemes to +# control whether the port is inserted into the Host header. +DEFAULT_PORTS = {"http": 80, "https": 443} + + +class Request(object): + """ + The base, common, Request object. + + This object provides a *semantic* representation of a HTTP request. It + includes all the magical parts of a HTTP request that we have come to know + and love: it has a method, a target (the path & query portions of a URI), + some headers, and optionally a body. + + All of urllib3 manipulates these Request objects, passing them around and + changing them as necessary. The low-level layers know how to send these + objects. + """ + + def __init__(self, method, target, headers=None, body=None): + # : The HTTP method in use. Must be a byte string. + self.method = method + # : The request target: that is, the path and query portions of the URI. + self.target = target + # : The request headers. These are always stored as a HTTPHeaderDict. + self.headers = HTTPHeaderDict(headers) + # : The request body. This is allowed to be one a few kind of objects: + #: - A byte string. + #: - A "readable" object. + #: - An iterable of byte strings. + #: - A text string (not recommended, auto-encoded to UTF-8) + self.body = body + + def add_host(self, host, port, scheme): + """ + Add the Host header, as needed. + + This helper method exists to circumvent an ordering problem: the best + layer to add the Host header is the bottom layer, but it is the layer + that will add headers last. That means that they will appear at the + bottom of the header block. + + Proxies, caches, and other intermediaries *hate* it when clients do + that because the Host header is routing information, and they'd like to + see it as early as possible. For this reason, this method ensures that + the Host header will be the first one emitted. It also ensures that we + do not duplicate the host header: if there already is one, we just use + that one. + """ + if b'host' not in self.headers: + # We test against a sentinel object here to forcibly always insert + # the port for schemes we don't understand. + if port is DEFAULT_PORTS.get(scheme, object()): + header = host + else: + header = "{}:{}".format(host, port) + headers = HTTPHeaderDict(host=header) + headers._copy_from(self.headers) + self.headers = headers + + +class Response(object): + """ + The abstract low-level Response object that urllib3 works on. This is not + the high-level helpful Response object that is exposed at the higher layers + of urllib3: it's just a simple object that just exposes the lowest-level + HTTP semantics to allow processing by the higher levels. + """ + + def __init__(self, status_code, headers, body, version): + # : The HTTP status code of the response. + self.status_code = status_code + # : The headers on the response, as a HTTPHeaderDict. + self.headers = HTTPHeaderDict(headers) + # : The request body. This is an iterable of bytes, and *must* be + #: iterated if the connection is to be preserved. + self.body = body + # : The HTTP version of the response. Stored as a bytestring. + self.version = version + + @property + def complete(self): + """ + If the response can be safely returned to the connection pool, returns + True. + """ + return self.body.complete diff --git a/requests3/core/http_manager/connection.py b/requests3/core/http_manager/connection.py new file mode 100644 index 00000000..14989de4 --- /dev/null +++ b/requests3/core/http_manager/connection.py @@ -0,0 +1,406 @@ +from __future__ import absolute_import +import datetime +import logging +import os +import sys +import socket +from socket import error as SocketError, timeout as SocketTimeout +import warnings +from .packages import six +from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection +from .packages.six.moves.http_client import HTTPException # noqa: F401 + +try: # Compiled with SSL? + import ssl + + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None + + class BaseSSLError(BaseException): + pass + + +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + + class ConnectionError(Exception): + pass + + +from .exceptions import ( + NewConnectionError, + ConnectTimeoutError, + SubjectAltNameWarning, + SystemTimeWarning, +) +from .packages.ssl_match_hostname import match_hostname, CertificateError + +from .util.ssl_ import ( + resolve_cert_reqs, + resolve_ssl_version, + assert_fingerprint, + create_urllib3_context, + ssl_wrap_socket, +) + + +from .util import connection + +from ._collections import HTTPHeaderDict + +log = logging.getLogger(__name__) +port_by_scheme = {'http': 80, 'https': 443} +# When updating RECENT_DATE, move it to within two years of the current date, +# and not less than 6 months ago. +# Example: if Today is 2018-01-01, then RECENT_DATE should be any date on or +# after 2016-01-01 (today - 2 years) AND before 2017-07-01 (today - 6 months) +RECENT_DATE = datetime.date(2017, 6, 30) + + +class DummyConnection(object): + """Used to detect a failed ConnectionCls import.""" + pass + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + default_port = port_by_scheme['http'] + # : Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + # : Whether this connection verifies the host's certificate. + is_verified = False + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + # : The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop( + 'socket_options', self.default_socket_options + ) + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) + + @property + def host(self): + """ + Getter method to remove any trailing dots that indicate the hostname is an FQDN. + + In general, SSL certificates don't include the trailing dot indicating a + fully-qualified domain name, and thus, they don't validate properly when + checked against a domain name that includes the dot. In addition, some + servers may not expect to receive the trailing dot when provided. + + However, the hostname with trailing dot is critical to DNS resolution; doing a + lookup with the trailing dot will properly only resolve the appropriate FQDN, + whereas a lookup without a trailing dot will search the system's search domain + list. Thus, it's important to keep the original host around for use only in + those cases where it's appropriate (i.e., when doing DNS lookup to establish the + actual TCP connection across which we're going to send HTTP requests). + """ + return self._dns_host.rstrip('.') + + @host.setter + def host(self, value): + """ + Setter for the `host` property. + + We assume that only urllib3 uses the _dns_host attribute; httplib itself + only uses `host`, and it seems reasonable that other libraries follow suit. + """ + self._dns_host = value + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + try: + conn = connection.create_connection( + (self._dns_host, self.port), self.timeout, **extra_kw + ) + except SocketTimeout as e: + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout), + ) + + except SocketError as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + return conn + + def _prepare_conn(self, conn): + self.sock = conn + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + def request_chunked(self, method, url, body=None, headers=None): + """ + Alternative to the common request method, which sends the + body with chunked encoding and not as one block + """ + headers = HTTPHeaderDict(headers if headers is not None else {}) + skip_accept_encoding = 'accept-encoding' in headers + skip_host = 'host' in headers + self.putrequest( + method, + url, + skip_accept_encoding=skip_accept_encoding, + skip_host=skip_host, + ) + for header, value in headers.items(): + self.putheader(header, value) + if 'transfer-encoding' not in headers: + self.putheader('Transfer-Encoding', 'chunked') + self.endheaders() + if body is not None: + stringish_types = six.string_types + (six.binary_type,) + if isinstance(body, stringish_types): + body = (body,) + for chunk in body: + if not chunk: + continue + + if not isinstance(chunk, six.binary_type): + chunk = chunk.encode('utf8') + len_str = hex(len(chunk))[2:] + self.send(len_str.encode('utf-8')) + self.send(b'\r\n') + self.send(chunk) + self.send(b'\r\n') + # After the if clause, to always have a closed body + self.send(b'0\r\n\r\n') + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + ssl_version = None + + def __init__( + self, + host, + port=None, + key_file=None, + cert_file=None, + strict=None, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + ssl_context=None, + **kw + ): + HTTPConnection.__init__( + self, host, port, strict=strict, timeout=timeout, **kw + ) + self.key_file = key_file + self.cert_file = cert_file + self.ssl_context = ssl_context + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(None), + cert_reqs=resolve_cert_reqs(None), + ) + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ssl_context=self.ssl_context, + ) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ca_cert_dir = None + ssl_version = None + assert_fingerprint = None + + def set_cert( + self, + key_file=None, + cert_file=None, + cert_reqs=None, + ca_certs=None, + assert_hostname=None, + assert_fingerprint=None, + ca_cert_dir=None, + ): + """ + This method should only be called once, before the connection is used. + """ + # If cert_reqs is not provided, we can try to guess. If the user gave + # us a cert database, we assume they want to use it: otherwise, if + # they gave us an SSL Context object we should use whatever is set for + # it. + if cert_reqs is None: + if ca_certs or ca_cert_dir: + cert_reqs = 'CERT_REQUIRED' + elif self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + + def connect(self): + # Add certificate verification + conn = self._new_conn() + hostname = self.host + if getattr(self, '_tunnel_host', None): + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + self.sock = conn + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors' + ).format( + RECENT_DATE + ), + SystemTimeWarning, + ) + # Wrap socket using verification with the root certs in + # trusted_root_certs + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(self.ssl_version), + cert_reqs=resolve_cert_reqs(self.cert_reqs), + ) + context = self.ssl_context + context.verify_mode = resolve_cert_reqs(self.cert_reqs) + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + server_hostname=hostname, + ssl_context=context, + ) + if self.assert_fingerprint: + assert_fingerprint( + self.sock.getpeercert(binary_form=True), + self.assert_fingerprint, + ) + elif context.verify_mode != ssl.CERT_NONE and not getattr( + context, 'check_hostname', False + ) and self.assert_hostname is not False: + # While urllib3 attempts to always turn off hostname matching from + # the TLS library, this cannot always be done. So we check whether + # the TLS Library still thinks it's matching hostnames. + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn( + ( + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname) + ), + SubjectAltNameWarning, + ) + _match_hostname(cert, self.assert_hostname or hostname) + self.is_verified = ( + context.verify_mode == ssl.CERT_REQUIRED or + self.assert_fingerprint is not None + ) + + +def _match_hostname(cert, asserted_hostname): + try: + match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.error( + 'Certificate did not match expected hostname: %s. ' + 'Certificate: %s', + asserted_hostname, + cert, + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/requests3/core/http_manager/connectionpool.py b/requests3/core/http_manager/connectionpool.py new file mode 100644 index 00000000..7705c4d3 --- /dev/null +++ b/requests3/core/http_manager/connectionpool.py @@ -0,0 +1,13 @@ +from ._sync.connectionpool import ( + ConnectionPool, + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url, +) + +__all__ = [ + 'ConnectionPool', + 'HTTPConnectionPool', + 'HTTPSConnectionPool', + 'connection_from_url', +] diff --git a/requests3/core/http_manager/contrib/__init__.py b/requests3/core/http_manager/contrib/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/requests3/core/http_manager/contrib/__init__.py diff --git a/requests3/core/http_manager/contrib/_securetransport/__init__.py b/requests3/core/http_manager/contrib/_securetransport/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/requests3/core/http_manager/contrib/_securetransport/__init__.py diff --git a/requests3/core/http_manager/contrib/_securetransport/bindings.py b/requests3/core/http_manager/contrib/_securetransport/bindings.py new file mode 100644 index 00000000..fbba2915 --- /dev/null +++ b/requests3/core/http_manager/contrib/_securetransport/bindings.py @@ -0,0 +1,417 @@ +""" +This module uses ctypes to bind a whole bunch of functions and constants from +SecureTransport. The goal here is to provide the low-level API to +SecureTransport. These are essentially the C-level functions and constants, and +they're pretty gross to work with. + +This code is a bastardised version of the code found in Will Bond's oscrypto +library. An enormous debt is owed to him for blazing this trail for us. For +that reason, this code should be considered to be covered both by urllib3's +license and by oscrypto's: + + Copyright (c) 2015-2016 Will Bond <will@wbond.net> + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +""" +from __future__ import absolute_import + +import platform +from ctypes.util import find_library +from ctypes import ( + c_void_p, + c_int32, + c_char_p, + c_size_t, + c_byte, + c_uint32, + c_ulong, + c_long, + c_bool, +) +from ctypes import CDLL, POINTER, CFUNCTYPE + +security_path = find_library('Security') +if not security_path: + raise ImportError('The library Security could not be found') + +core_foundation_path = find_library('CoreFoundation') +if not core_foundation_path: + raise ImportError('The library CoreFoundation could not be found') + +version = platform.mac_ver()[0] +version_info = tuple(map(int, version.split('.'))) +if version_info < (10, 8): + raise OSError( + 'Only OS X 10.8 and newer are supported, not %s.%s' % + (version_info[0], version_info[1]) + ) + +Security = CDLL(security_path, use_errno=True) +CoreFoundation = CDLL(core_foundation_path, use_errno=True) +Boolean = c_bool +CFIndex = c_long +CFStringEncoding = c_uint32 +CFData = c_void_p +CFString = c_void_p +CFArray = c_void_p +CFMutableArray = c_void_p +CFDictionary = c_void_p +CFError = c_void_p +CFType = c_void_p +CFTypeID = c_ulong +CFTypeRef = POINTER(CFType) +CFAllocatorRef = c_void_p +OSStatus = c_int32 +CFDataRef = POINTER(CFData) +CFStringRef = POINTER(CFString) +CFArrayRef = POINTER(CFArray) +CFMutableArrayRef = POINTER(CFMutableArray) +CFDictionaryRef = POINTER(CFDictionary) +CFArrayCallBacks = c_void_p +CFDictionaryKeyCallBacks = c_void_p +CFDictionaryValueCallBacks = c_void_p +SecCertificateRef = POINTER(c_void_p) +SecExternalFormat = c_uint32 +SecExternalItemType = c_uint32 +SecIdentityRef = POINTER(c_void_p) +SecItemImportExportFlags = c_uint32 +SecItemImportExportKeyParameters = c_void_p +SecKeychainRef = POINTER(c_void_p) +SSLProtocol = c_uint32 +SSLCipherSuite = c_uint32 +SSLContextRef = POINTER(c_void_p) +SecTrustRef = POINTER(c_void_p) +SSLConnectionRef = c_uint32 +SecTrustResultType = c_uint32 +SecTrustOptionFlags = c_uint32 +SSLProtocolSide = c_uint32 +SSLConnectionType = c_uint32 +SSLSessionOption = c_uint32 +try: + Security.SecItemImport.argtypes = [ + CFDataRef, + CFStringRef, + POINTER(SecExternalFormat), + POINTER(SecExternalItemType), + SecItemImportExportFlags, + POINTER(SecItemImportExportKeyParameters), + SecKeychainRef, + POINTER(CFArrayRef), + ] + Security.SecItemImport.restype = OSStatus + Security.SecCertificateGetTypeID.argtypes = [] + Security.SecCertificateGetTypeID.restype = CFTypeID + Security.SecIdentityGetTypeID.argtypes = [] + Security.SecIdentityGetTypeID.restype = CFTypeID + Security.SecKeyGetTypeID.argtypes = [] + Security.SecKeyGetTypeID.restype = CFTypeID + Security.SecCertificateCreateWithData.argtypes = [ + CFAllocatorRef, CFDataRef + ] + Security.SecCertificateCreateWithData.restype = SecCertificateRef + Security.SecCertificateCopyData.argtypes = [SecCertificateRef] + Security.SecCertificateCopyData.restype = CFDataRef + Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p] + Security.SecCopyErrorMessageString.restype = CFStringRef + Security.SecIdentityCreateWithCertificate.argtypes = [ + CFTypeRef, SecCertificateRef, POINTER(SecIdentityRef) + ] + Security.SecIdentityCreateWithCertificate.restype = OSStatus + Security.SecKeychainCreate.argtypes = [ + c_char_p, + c_uint32, + c_void_p, + Boolean, + c_void_p, + POINTER(SecKeychainRef), + ] + Security.SecKeychainCreate.restype = OSStatus + Security.SecKeychainDelete.argtypes = [SecKeychainRef] + Security.SecKeychainDelete.restype = OSStatus + Security.SecPKCS12Import.argtypes = [ + CFDataRef, CFDictionaryRef, POINTER(CFArrayRef) + ] + Security.SecPKCS12Import.restype = OSStatus + SSLReadFunc = CFUNCTYPE( + OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t) + ) + SSLWriteFunc = CFUNCTYPE( + OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t) + ) + Security.SSLSetIOFuncs.argtypes = [ + SSLContextRef, SSLReadFunc, SSLWriteFunc + ] + Security.SSLSetIOFuncs.restype = OSStatus + Security.SSLSetPeerID.argtypes = [SSLContextRef, c_char_p, c_size_t] + Security.SSLSetPeerID.restype = OSStatus + Security.SSLSetCertificate.argtypes = [SSLContextRef, CFArrayRef] + Security.SSLSetCertificate.restype = OSStatus + Security.SSLSetCertificateAuthorities.argtypes = [ + SSLContextRef, CFTypeRef, Boolean + ] + Security.SSLSetCertificateAuthorities.restype = OSStatus + Security.SSLSetConnection.argtypes = [SSLContextRef, SSLConnectionRef] + Security.SSLSetConnection.restype = OSStatus + Security.SSLSetPeerDomainName.argtypes = [ + SSLContextRef, c_char_p, c_size_t + ] + Security.SSLSetPeerDomainName.restype = OSStatus + Security.SSLHandshake.argtypes = [SSLContextRef] + Security.SSLHandshake.restype = OSStatus + Security.SSLRead.argtypes = [ + SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t) + ] + Security.SSLRead.restype = OSStatus + Security.SSLWrite.argtypes = [ + SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t) + ] + Security.SSLWrite.restype = OSStatus + Security.SSLClose.argtypes = [SSLContextRef] + Security.SSLClose.restype = OSStatus + Security.SSLGetNumberSupportedCiphers.argtypes = [ + SSLContextRef, POINTER(c_size_t) + ] + Security.SSLGetNumberSupportedCiphers.restype = OSStatus + Security.SSLGetSupportedCiphers.argtypes = [ + SSLContextRef, POINTER(SSLCipherSuite), POINTER(c_size_t) + ] + Security.SSLGetSupportedCiphers.restype = OSStatus + Security.SSLSetEnabledCiphers.argtypes = [ + SSLContextRef, POINTER(SSLCipherSuite), c_size_t + ] + Security.SSLSetEnabledCiphers.restype = OSStatus + Security.SSLGetNumberEnabledCiphers.argtype = [ + SSLContextRef, POINTER(c_size_t) + ] + Security.SSLGetNumberEnabledCiphers.restype = OSStatus + Security.SSLGetEnabledCiphers.argtypes = [ + SSLContextRef, POINTER(SSLCipherSuite), POINTER(c_size_t) + ] + Security.SSLGetEnabledCiphers.restype = OSStatus + Security.SSLGetNegotiatedCipher.argtypes = [ + SSLContextRef, POINTER(SSLCipherSuite) + ] + Security.SSLGetNegotiatedCipher.restype = OSStatus + Security.SSLGetNegotiatedProtocolVersion.argtypes = [ + SSLContextRef, POINTER(SSLProtocol) + ] + Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus + Security.SSLCopyPeerTrust.argtypes = [SSLContextRef, POINTER(SecTrustRef)] + Security.SSLCopyPeerTrust.restype = OSStatus + Security.SecTrustSetAnchorCertificates.argtypes = [SecTrustRef, CFArrayRef] + Security.SecTrustSetAnchorCertificates.restype = OSStatus + Security.SecTrustSetAnchorCertificatesOnly.argstypes = [ + SecTrustRef, Boolean + ] + Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus + Security.SecTrustEvaluate.argtypes = [ + SecTrustRef, POINTER(SecTrustResultType) + ] + Security.SecTrustEvaluate.restype = OSStatus + Security.SecTrustGetCertificateCount.argtypes = [SecTrustRef] + Security.SecTrustGetCertificateCount.restype = CFIndex + Security.SecTrustGetCertificateAtIndex.argtypes = [SecTrustRef, CFIndex] + Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef + Security.SSLCreateContext.argtypes = [ + CFAllocatorRef, SSLProtocolSide, SSLConnectionType + ] + Security.SSLCreateContext.restype = SSLContextRef + Security.SSLSetSessionOption.argtypes = [ + SSLContextRef, SSLSessionOption, Boolean + ] + Security.SSLSetSessionOption.restype = OSStatus + Security.SSLSetProtocolVersionMin.argtypes = [SSLContextRef, SSLProtocol] + Security.SSLSetProtocolVersionMin.restype = OSStatus + Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol] + Security.SSLSetProtocolVersionMax.restype = OSStatus + Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p] + Security.SecCopyErrorMessageString.restype = CFStringRef + Security.SSLReadFunc = SSLReadFunc + Security.SSLWriteFunc = SSLWriteFunc + Security.SSLContextRef = SSLContextRef + Security.SSLProtocol = SSLProtocol + Security.SSLCipherSuite = SSLCipherSuite + Security.SecIdentityRef = SecIdentityRef + Security.SecKeychainRef = SecKeychainRef + Security.SecTrustRef = SecTrustRef + Security.SecTrustResultType = SecTrustResultType + Security.SecExternalFormat = SecExternalFormat + Security.OSStatus = OSStatus + Security.kSecImportExportPassphrase = CFStringRef.in_dll( + Security, 'kSecImportExportPassphrase' + ) + Security.kSecImportItemIdentity = CFStringRef.in_dll( + Security, 'kSecImportItemIdentity' + ) + # CoreFoundation time! + CoreFoundation.CFRetain.argtypes = [CFTypeRef] + CoreFoundation.CFRetain.restype = CFTypeRef + CoreFoundation.CFRelease.argtypes = [CFTypeRef] + CoreFoundation.CFRelease.restype = None + CoreFoundation.CFGetTypeID.argtypes = [CFTypeRef] + CoreFoundation.CFGetTypeID.restype = CFTypeID + CoreFoundation.CFStringCreateWithCString.argtypes = [ + CFAllocatorRef, c_char_p, CFStringEncoding + ] + CoreFoundation.CFStringCreateWithCString.restype = CFStringRef + CoreFoundation.CFStringGetCStringPtr.argtypes = [ + CFStringRef, CFStringEncoding + ] + CoreFoundation.CFStringGetCStringPtr.restype = c_char_p + CoreFoundation.CFStringGetCString.argtypes = [ + CFStringRef, c_char_p, CFIndex, CFStringEncoding + ] + CoreFoundation.CFStringGetCString.restype = c_bool + CoreFoundation.CFDataCreate.argtypes = [CFAllocatorRef, c_char_p, CFIndex] + CoreFoundation.CFDataCreate.restype = CFDataRef + CoreFoundation.CFDataGetLength.argtypes = [CFDataRef] + CoreFoundation.CFDataGetLength.restype = CFIndex + CoreFoundation.CFDataGetBytePtr.argtypes = [CFDataRef] + CoreFoundation.CFDataGetBytePtr.restype = c_void_p + CoreFoundation.CFDictionaryCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + POINTER(CFTypeRef), + CFIndex, + CFDictionaryKeyCallBacks, + CFDictionaryValueCallBacks, + ] + CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef + CoreFoundation.CFDictionaryGetValue.argtypes = [CFDictionaryRef, CFTypeRef] + CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef + CoreFoundation.CFArrayCreate.argtypes = [ + CFAllocatorRef, POINTER(CFTypeRef), CFIndex, CFArrayCallBacks + ] + CoreFoundation.CFArrayCreate.restype = CFArrayRef + CoreFoundation.CFArrayCreateMutable.argtypes = [ + CFAllocatorRef, CFIndex, CFArrayCallBacks + ] + CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef + CoreFoundation.CFArrayAppendValue.argtypes = [CFMutableArrayRef, c_void_p] + CoreFoundation.CFArrayAppendValue.restype = None + CoreFoundation.CFArrayGetCount.argtypes = [CFArrayRef] + CoreFoundation.CFArrayGetCount.restype = CFIndex + CoreFoundation.CFArrayGetValueAtIndex.argtypes = [CFArrayRef, CFIndex] + CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p + CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll( + CoreFoundation, 'kCFAllocatorDefault' + ) + CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeArrayCallBacks' + ) + CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryKeyCallBacks' + ) + CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryValueCallBacks' + ) + CoreFoundation.CFTypeRef = CFTypeRef + CoreFoundation.CFArrayRef = CFArrayRef + CoreFoundation.CFStringRef = CFStringRef + CoreFoundation.CFDictionaryRef = CFDictionaryRef +except (AttributeError): + raise ImportError('Error initializing ctypes') + + +class CFConst(object): + """ + A class object that acts as essentially a namespace for CoreFoundation + constants. + """ + kCFStringEncodingUTF8 = CFStringEncoding(0x08000100) + + +class SecurityConst(object): + """ + A class object that acts as essentially a namespace for Security constants. + """ + kSSLSessionOptionBreakOnServerAuth = 0 + kSSLProtocol2 = 1 + kSSLProtocol3 = 2 + kTLSProtocol1 = 4 + kTLSProtocol11 = 7 + kTLSProtocol12 = 8 + kSSLClientSide = 1 + kSSLStreamType = 0 + kSecFormatPEMSequence = 10 + kSecTrustResultInvalid = 0 + kSecTrustResultProceed = 1 + # This gap is present on purpose: this was kSecTrustResultConfirm, which + # is deprecated. + kSecTrustResultDeny = 3 + kSecTrustResultUnspecified = 4 + kSecTrustResultRecoverableTrustFailure = 5 + kSecTrustResultFatalTrustFailure = 6 + kSecTrustResultOtherError = 7 + errSSLProtocol = -9800 + errSSLWouldBlock = -9803 + errSSLClosedGraceful = -9805 + errSSLClosedNoNotify = -9816 + errSSLClosedAbort = -9806 + errSSLXCertChainInvalid = -9807 + errSSLCrypto = -9809 + errSSLInternal = -9810 + errSSLCertExpired = -9814 + errSSLCertNotYetValid = -9815 + errSSLUnknownRootCert = -9812 + errSSLNoRootCert = -9813 + errSSLHostNameMismatch = -9843 + errSSLPeerHandshakeFail = -9824 + errSSLPeerUserCancelled = -9839 + errSSLWeakPeerEphemeralDHKey = -9850 + errSSLServerAuthCompleted = -9841 + errSSLRecordOverflow = -9847 + errSecVerifyFailed = -67808 + errSecNoTrustSettings = -25263 + errSecItemNotFound = -25300 + errSecInvalidTrustSettings = -25262 + # Cipher suites. We only pick the ones our default cipher string allows. + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C + TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030 + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B + TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F + TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 = 0x00A3 + TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F + TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 = 0x00A2 + TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024 + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014 + TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B + TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 = 0x006A + TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039 + TLS_DHE_DSS_WITH_AES_256_CBC_SHA = 0x0038 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 = 0x0040 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA = 0x0032 + TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D + TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C + TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D + TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C + TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035 + TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F + TLS_AES_128_GCM_SHA256 = 0x1301 + TLS_AES_256_GCM_SHA384 = 0x1302 + TLS_CHACHA20_POLY1305_SHA256 = 0x1303 diff --git a/requests3/core/http_manager/contrib/_securetransport/low_level.py b/requests3/core/http_manager/contrib/_securetransport/low_level.py new file mode 100644 index 00000000..3c7cee3e --- /dev/null +++ b/requests3/core/http_manager/contrib/_securetransport/low_level.py @@ -0,0 +1,313 @@ +""" +Low-level helpers for the SecureTransport bindings. + +These are Python functions that are not directly related to the high-level APIs +but are necessary to get them to work. They include a whole bunch of low-level +CoreFoundation messing about and memory management. The concerns in this module +are almost entirely about trying to avoid memory leaks and providing +appropriate and useful assistance to the higher-level code. +""" +import base64 +import ctypes +import itertools +import re +import os +import ssl +import tempfile + +from .bindings import Security, CoreFoundation, CFConst + +# This regular expression is used to grab PEM data out of a PEM bundle. +_PEM_CERTS_RE = re.compile( + b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL +) + + +def _cf_data_from_bytes(bytestring): + """ + Given a bytestring, create a CFData object from it. This CFData object must + be CFReleased by the caller. + """ + return CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring) + ) + + +def _cf_dictionary_from_tuples(tuples): + """ + Given a list of Python tuples, create an associated CFDictionary. + """ + dictionary_size = len(tuples) + # We need to get the dictionary keys and values out in the same order. + keys = (t[0] for t in tuples) + values = (t[1] for t in tuples) + cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys) + cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values) + return CoreFoundation.CFDictionaryCreate( + CoreFoundation.kCFAllocatorDefault, + cf_keys, + cf_values, + dictionary_size, + CoreFoundation.kCFTypeDictionaryKeyCallBacks, + CoreFoundation.kCFTypeDictionaryValueCallBacks, + ) + + +def _cf_string_to_unicode(value): + """ + Creates a Unicode string from a CFString object. Used entirely for error + reporting. + + Yes, it annoys me quite a lot that this function is this complex. + """ + value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p)) + string = CoreFoundation.CFStringGetCStringPtr( + value_as_void_p, CFConst.kCFStringEncodingUTF8 + ) + if string is None: + buffer = ctypes.create_string_buffer(1024) + result = CoreFoundation.CFStringGetCString( + value_as_void_p, buffer, 1024, CFConst.kCFStringEncodingUTF8 + ) + if not result: + raise OSError('Error copying C string from CFStringRef') + + string = buffer.value + if string is not None: + string = string.decode('utf-8') + return string + + +def _assert_no_error(error, exception_class=None): + """ + Checks the return code and throws an exception if there is an error to + report + """ + if error == 0: + return + + cf_error_string = Security.SecCopyErrorMessageString(error, None) + output = _cf_string_to_unicode(cf_error_string) + CoreFoundation.CFRelease(cf_error_string) + if output is None or output == u'': + output = u'OSStatus %s' % error + if exception_class is None: + exception_class = ssl.SSLError + raise exception_class(output) + + +def _cert_array_from_pem(pem_bundle): + """ + Given a bundle of certs in PEM format, turns them into a CFArray of certs + that can be used to validate a cert chain. + """ + der_certs = [ + base64.b64decode(match.group(1)) + for match in _PEM_CERTS_RE.finditer(pem_bundle) + ] + if not der_certs: + raise ssl.SSLError("No root certificates specified") + + cert_array = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + if not cert_array: + raise ssl.SSLError("Unable to allocate memory!") + + try: + for der_bytes in der_certs: + certdata = _cf_data_from_bytes(der_bytes) + if not certdata: + raise ssl.SSLError("Unable to allocate memory!") + + cert = Security.SecCertificateCreateWithData( + CoreFoundation.kCFAllocatorDefault, certdata + ) + CoreFoundation.CFRelease(certdata) + if not cert: + raise ssl.SSLError("Unable to build cert object!") + + CoreFoundation.CFArrayAppendValue(cert_array, cert) + CoreFoundation.CFRelease(cert) + except Exception: + # We need to free the array before the exception bubbles further. + # We only want to do that if an error occurs: otherwise, the caller + # should free. + CoreFoundation.CFRelease(cert_array) + return cert_array + + +def _is_cert(item): + """ + Returns True if a given CFTypeRef is a certificate. + """ + expected = Security.SecCertificateGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _is_identity(item): + """ + Returns True if a given CFTypeRef is an identity. + """ + expected = Security.SecIdentityGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _temporary_keychain(): + """ + This function creates a temporary Mac keychain that we can use to work with + credentials. This keychain uses a one-time password and a temporary file to + store the data. We expect to have one keychain per socket. The returned + SecKeychainRef must be freed by the caller, including calling + SecKeychainDelete. + + Returns a tuple of the SecKeychainRef and the path to the temporary + directory that contains it. + """ + # Unfortunately, SecKeychainCreate requires a path to a keychain. This + # means we cannot use mkstemp to use a generic temporary file. Instead, + # we're going to create a temporary directory and a filename to use there. + # This filename will be 8 random bytes expanded into base64. We also need + # some random bytes to password-protect the keychain we're creating, so we + # ask for 40 random bytes. + random_bytes = os.urandom(40) + filename = base64.b64encode(random_bytes[:8]).decode('utf-8') + password = base64.b64encode(random_bytes[8:]) # Must be valid UTF-8 + tempdirectory = tempfile.mkdtemp() + keychain_path = os.path.join(tempdirectory, filename).encode('utf-8') + # We now want to create the keychain itself. + keychain = Security.SecKeychainRef() + status = Security.SecKeychainCreate( + keychain_path, + len(password), + password, + False, + None, + ctypes.byref(keychain), + ) + _assert_no_error(status) + # Having created the keychain, we want to pass it off to the caller. + return keychain, tempdirectory + + +def _load_items_from_file(keychain, path): + """ + Given a single file, loads all the trust objects from it into arrays and + the keychain. + Returns a tuple of lists: the first list is a list of identities, the + second a list of certs. + """ + certificates = [] + identities = [] + result_array = None + with open(path, 'rb') as f: + raw_filedata = f.read() + try: + filedata = CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, raw_filedata, len(raw_filedata) + ) + result_array = CoreFoundation.CFArrayRef() + result = Security.SecItemImport( + filedata, # cert data + None, # Filename, leaving it out for now + None, # What the type of the file is, we don't care + None, # what's in the file, we don't care + 0, # import flags + None, # key params, can include passphrase in the future + keychain, # The keychain to insert into + ctypes.byref(result_array), # Results + ) + _assert_no_error(result) + # A CFArray is not very useful to us as an intermediary + # representation, so we are going to extract the objects we want + # and then free the array. We don't need to keep hold of keys: the + # keychain already has them! + result_count = CoreFoundation.CFArrayGetCount(result_array) + for index in range(result_count): + item = CoreFoundation.CFArrayGetValueAtIndex(result_array, index) + item = ctypes.cast(item, CoreFoundation.CFTypeRef) + if _is_cert(item): + CoreFoundation.CFRetain(item) + certificates.append(item) + elif _is_identity(item): + CoreFoundation.CFRetain(item) + identities.append(item) + finally: + if result_array: + CoreFoundation.CFRelease(result_array) + CoreFoundation.CFRelease(filedata) + return (identities, certificates) + + +def _load_client_cert_chain(keychain, *paths): + """ + Load certificates and maybe keys from a number of files. Has the end goal + of returning a CFArray containing one SecIdentityRef, and then zero or more + SecCertificateRef objects, suitable for use as a client certificate trust + chain. + """ + # Ok, the strategy. + # + # This relies on knowing that macOS will not give you a SecIdentityRef + # unless you have imported a key into a keychain. This is a somewhat + # artificial limitation of macOS (for example, it doesn't necessarily + # affect iOS), but there is nothing inside Security.framework that lets you + # get a SecIdentityRef without having a key in a keychain. + # + # So the policy here is we take all the files and iterate them in order. + # Each one will use SecItemImport to have one or more objects loaded from + # it. We will also point at a keychain that macOS can use to work with the + # private key. + # + # Once we have all the objects, we'll check what we actually have. If we + # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise, + # we'll take the first certificate (which we assume to be our leaf) and + # ask the keychain to give us a SecIdentityRef with that cert's associated + # key. + # + # We'll then return a CFArray containing the trust chain: one + # SecIdentityRef and then zero-or-more SecCertificateRef objects. The + # responsibility for freeing this CFArray will be with the caller. This + # CFArray must remain alive for the entire connection, so in practice it + # will be stored with a single SSLSocket, along with the reference to the + # keychain. + certificates = [] + identities = [] + # Filter out bad paths. + paths = (path for path in paths if path) + try: + for file_path in paths: + new_identities, new_certs = _load_items_from_file( + keychain, file_path + ) + identities.extend(new_identities) + certificates.extend(new_certs) + # Ok, we have everything. The question is: do we have an identity? If + # not, we want to grab one from the first cert we have. + if not identities: + new_identity = Security.SecIdentityRef() + status = Security.SecIdentityCreateWithCertificate( + keychain, certificates[0], ctypes.byref(new_identity) + ) + _assert_no_error(status) + identities.append(new_identity) + # We now want to release the original certificate, as we no longer + # need it. + CoreFoundation.CFRelease(certificates.pop(0)) + # We now need to build a new CFArray that holds the trust chain. + trust_chain = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + for item in itertools.chain(identities, certificates): + # ArrayAppendValue does a CFRetain on the item. That's fine, + # because the finally block will release our other refs to them. + CoreFoundation.CFArrayAppendValue(trust_chain, item) + return trust_chain + + finally: + for obj in itertools.chain(identities, certificates): + CoreFoundation.CFRelease(obj) diff --git a/requests3/core/http_manager/contrib/appengine.py b/requests3/core/http_manager/contrib/appengine.py new file mode 100644 index 00000000..62d58fb6 --- /dev/null +++ b/requests3/core/http_manager/contrib/appengine.py @@ -0,0 +1,332 @@ +""" +This module provides a pool manager that uses Google App Engine's +`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_. + +Example usage:: + + from urllib3 import PoolManager + from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox + + if is_appengine_sandbox(): + # AppEngineManager uses AppEngine's URLFetch API behind the scenes + http = AppEngineManager() + else: + # PoolManager uses a socket-level API behind the scenes + http = PoolManager() + + r = http.request('GET', 'https://google.com/') + +There are `limitations <https://cloud.google.com/appengine/docs/python/\ +urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be +the best choice for your application. There are three options for using +urllib3 on Google App Engine: + +1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is + cost-effective in many circumstances as long as your usage is within the + limitations. +2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets. + Sockets also have `limitations and restrictions + <https://cloud.google.com/appengine/docs/python/sockets/\ + #limitations-and-restrictions>`_ and have a lower free quota than URLFetch. + To use sockets, be sure to specify the following in your ``app.yaml``:: + + env_variables: + GAE_USE_SOCKETS_HTTPLIB : 'true' + +3. If you are using `App Engine Flexible +<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard +:class:`PoolManager` without any configuration or special environment variables. +""" + +from __future__ import absolute_import +import logging +import os +import warnings +from ..packages.six.moves.urllib.parse import urljoin + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError, +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation `here + <https://cloud.google.com/appengine/docs/python/urlfetch>`_. + + Notably it will raise an :class:`AppEnginePlatformError` if: + * URLFetch is not available. + * If you attempt to use this on App Engine Flexible, as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__( + self, + headers=None, + retries=None, + validate_certificate=True, + urlfetch_retries=True, + ): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment." + ) + + if is_prod_appengine_mvms(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment." + ) + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.", + AppEnginePlatformWarning, + ) + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + self.urlfetch_retries = urlfetch_retries + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen( + self, + method, + url, + body=None, + headers=None, + retries=None, + redirect=True, + timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw + ): + retries = self._get_retries(retries, redirect) + try: + follow_redirects = ( + redirect and retries.redirect != 0 and retries.total + ) + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=self.urlfetch_retries and follow_redirects, + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in str(e): + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", + e, + ) + + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in str(e): + raise MaxRetryError(self, url, reason=e) + + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", + e, + ) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e + ) + + http_response = self._urlfetch_response_to_http_response( + response, retries=retries, **response_kw + ) + # Handle redirect? + redirect_location = redirect and http_response.get_redirect_location() + if redirect_location: + # Check for redirect response + if (self.urlfetch_retries and retries.raise_on_redirect): + raise MaxRetryError(self, url, "too many redirects") + + else: + if http_response.status == 303: + method = 'GET' + try: + retries = retries.increment( + method, url, response=http_response, _pool=self + ) + except MaxRetryError: + if retries.raise_on_redirect: + raise MaxRetryError(self, url, "too many redirects") + + return http_response + + retries.sleep_for_retry(http_response) + log.debug("Redirecting %s -> %s", url, redirect_location) + redirect_url = urljoin(url, redirect_location) + return self.urlopen( + method, + redirect_url, + body, + headers, + retries=retries, + redirect=redirect, + timeout=timeout, + **response_kw + ) + + # Check if we should retry the HTTP response. + has_retry_after = bool(http_response.getheader('Retry-After')) + if retries.is_retry(method, http_response.status, has_retry_after): + retries = retries.increment( + method, url, response=http_response, _pool=self + ) + log.debug("Retry: %s", url) + retries.sleep(http_response) + return self.urlopen( + method, + url, + body=body, + headers=headers, + retries=retries, + redirect=redirect, + timeout=timeout, + **response_kw + ) + + return http_response + + def _urlfetch_response_to_http_response( + self, urlfetch_resp, **response_kw + ): + if is_prod_appengine(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + transfer_encoding = urlfetch_resp.headers.get('transfer-encoding') + # We have a full response's content, + # so let's make sure we don't report ourselves as chunked data. + if transfer_encoding == 'chunked': + encodings = transfer_encoding.split(",") + encodings.remove('chunked') + urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings) + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return None # Defer to URLFetch's default. + + if isinstance(timeout, Timeout): + if timeout._read is not None or timeout._connect is not None: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total or default URLFetch timeout.", + AppEnginePlatformWarning, + ) + return timeout.total + + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries + ) + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning, + ) + return retries + + +def is_appengine(): + return ( + is_local_appengine() or is_prod_appengine() or is_prod_appengine_mvms() + ) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_mvms() + + +def is_local_appengine(): + return ( + 'APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE'] + ) + + +def is_prod_appengine(): + return ( + 'APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_mvms() + ) + + +def is_prod_appengine_mvms(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/requests3/core/http_manager/contrib/pyopenssl.py b/requests3/core/http_manager/contrib/pyopenssl.py new file mode 100644 index 00000000..c7884b0c --- /dev/null +++ b/requests3/core/http_manager/contrib/pyopenssl.py @@ -0,0 +1,485 @@ +""" +SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. + +This needs the following packages installed: + +* pyOpenSSL (tested with 16.0.0) +* cryptography (minimum 1.3.4, from pyopenssl) +* idna (minimum 2.0, from cryptography) + +However, pyopenssl depends on cryptography, which depends on idna, so while we +use all three directly here we end up having relatively few packages required. + +You can install them with the following command: + + pip install pyopenssl cryptography idna + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +compression in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +""" +from __future__ import absolute_import + +import OpenSSL.SSL +from cryptography import x509 +from cryptography.hazmat.backends.openssl import backend as openssl_backend +from cryptography.hazmat.backends.openssl.x509 import _Certificate + +from socket import timeout, error as SocketError +from io import BytesIO + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile +import logging +import ssl +from ..packages import six +import sys + +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] +# SNI always works. +HAS_SNI = True +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} +if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD +if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass +_stdlib_to_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} +_openssl_to_stdlib_verify = dict( + (v, k) for k, v in _stdlib_to_openssl_verify.items() +) +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext +log = logging.getLogger(__name__) + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + _validate_dependencies_met() + util.ssl_.SSLContext = PyOpenSSLContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_PYOPENSSL = True + util.ssl_.IS_PYOPENSSL = True + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_PYOPENSSL = False + util.ssl_.IS_PYOPENSSL = False + + +def _validate_dependencies_met(): + """ + Verifies that PyOpenSSL's package-level dependencies have been met. + Throws `ImportError` if they are not met. + """ + # Method added in `cryptography==1.1`; not available in older versions + from cryptography.x509.extensions import Extensions + + if getattr(Extensions, "get_extension_for_class", None) is None: + raise ImportError( + "'cryptography' module missing required functionality. " + "Try upgrading to v1.3.4 or newer." + ) + + # pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509 + # attribute is only present on those versions. + from OpenSSL.crypto import X509 + + x509 = X509() + if getattr(x509, "_x509", None) is None: + raise ImportError( + "'pyOpenSSL' module missing required functionality. " + "Try upgrading to v0.14 or newer." + ) + + +def _dnsname_to_stdlib(name): + """ + Converts a dNSName SubjectAlternativeName field to the form used by the + standard library on the given Python version. + + Cryptography produces a dNSName as a unicode string that was idna-decoded + from ASCII bytes. We need to idna-encode that string to get it back, and + then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib + uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8). + """ + + def idna_encode(name): + """ + Borrowed wholesale from the Python Cryptography Project. It turns out + that we can't just safely call `idna.encode`: it can explode for + wildcard names. This avoids that problem. + """ + import idna + + for prefix in [u'*.', u'.']: + if name.startswith(prefix): + name = name[len(prefix):] + return prefix.encode('ascii') + idna.encode(name) + + return idna.encode(name) + + name = idna_encode(name) + if sys.version_info >= (3, 0): + name = name.decode('utf-8') + return name + + +def get_subj_alt_name(peer_cert): + """ + Given an PyOpenSSL certificate, provides all the subject alternative names. + """ + # Pass the cert to cryptography, which has much better APIs for this. + if hasattr(peer_cert, "to_cryptography"): + cert = peer_cert.to_cryptography() + else: + # This is technically using private APIs, but should work across all + # relevant versions before PyOpenSSL got a proper API for this. + cert = _Certificate(openssl_backend, peer_cert._x509) + # We want to find the SAN extension. Ask Cryptography to locate it (it's + # faster than looping in Python) + try: + ext = cert.extensions.get_extension_for_class( + x509.SubjectAlternativeName + ).value + except x509.ExtensionNotFound: + # No such extension, return the empty list. + return [] + + except ( + x509.DuplicateExtension, + x509.UnsupportedExtension, + x509.UnsupportedGeneralNameType, + UnicodeError, + ) as e: + # A problem has been found with the quality of the certificate. Assume + # no SAN field is present. + log.warning( + "A problem was encountered with the certificate that prevented " + "urllib3 from finding the SubjectAlternativeName field. This can " + "affect certificate validation. The error was %s", + e, + ) + return [] + + # We want to return dNSName and iPAddress fields. We need to cast the IPs + # back to strings because the match_hostname function wants them as + # strings. + # Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8 + # decoded. This is pretty frustrating, but that's what the standard library + # does with certificates, and so we need to attempt to do the same. + names = [ + ('DNS', _dnsname_to_stdlib(name)) + for name in ext.get_values_for_type(x509.DNSName) + ] + names.extend( + ('IP Address', str(name)) + for name in ext.get_values_for_type(x509.IPAddress) + ) + return names + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class. + + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): + self.connection = connection + self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 + self._closed = False + + def fileno(self): + return self.socket.fileno() + + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + + else: + raise SocketError(str(e)) + + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + + else: + raise + + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + + else: + return self.recv(*args, **kwargs) + + else: + return data + + def recv_into(self, *args, **kwargs): + try: + return self.connection.recv_into(*args, **kwargs) + + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return 0 + + else: + raise SocketError(str(e)) + + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return 0 + + else: + raise + + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + + else: + return self.recv_into(*args, **kwargs) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + + except OpenSSL.SSL.WantWriteError: + wr = util.wait_for_write(self.socket, self.socket.gettimeout()) + if not wr: + raise timeout() + + continue + + except OpenSSL.SSL.SysCallError as e: + raise SocketError(str(e)) + + def send(self, data): + return self._send_until_done(data) + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done( + data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE] + ) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() + + def close(self): + if self._makefile_refs < 1: + try: + self._closed = True + return self.connection.close() + + except OpenSSL.SSL.Error: + return + + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + if not x509: + return x509 + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, x509 + ) + + return { + 'subject': ((('commonName', x509.get_subject().CN),),), + 'subjectAltName': get_subj_alt_name(x509), + } + + def setblocking(self, flag): + return self.connection.setblocking(flag) + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) + + +else: # Platform-specific: Python 3 + makefile = backport_makefile +WrappedSocket.makefile = makefile + + +class PyOpenSSLContext(object): + """ + I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible + for translating the interface of the standard library ``SSLContext`` object + to calls into PyOpenSSL. + """ + + def __init__(self, protocol): + self.protocol = _openssl_versions[protocol] + self._ctx = OpenSSL.SSL.Context(self.protocol) + self._options = 0 + self.check_hostname = False + + @property + def options(self): + return self._options + + @options.setter + def options(self, value): + self._options = value + self._ctx.set_options(value) + + @property + def verify_mode(self): + return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] + + @verify_mode.setter + def verify_mode(self, value): + self._ctx.set_verify( + _stdlib_to_openssl_verify[value], _verify_callback + ) + + def set_default_verify_paths(self): + self._ctx.set_default_verify_paths() + + def set_ciphers(self, ciphers): + if isinstance(ciphers, six.text_type): + ciphers = ciphers.encode('utf-8') + self._ctx.set_cipher_list(ciphers) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + if cafile is not None: + cafile = cafile.encode('utf-8') + if capath is not None: + capath = capath.encode('utf-8') + self._ctx.load_verify_locations(cafile, capath) + if cadata is not None: + self._ctx.load_verify_locations(BytesIO(cadata)) + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._ctx.use_certificate_chain_file(certfile) + if password is not None: + self._ctx.set_passwd_cb( + lambda max_length, prompt_twice, userdata: password + ) + self._ctx.use_privatekey_file(keyfile or certfile) + + def wrap_socket( + self, + sock, + server_side=False, + do_handshake_on_connect=True, + suppress_ragged_eofs=True, + server_hostname=None, + ): + cnx = OpenSSL.SSL.Connection(self._ctx, sock) + if isinstance( + server_hostname, six.text_type + ): # Platform-specific: Python 3 + server_hostname = server_hostname.encode('utf-8') + if server_hostname is not None: + cnx.set_tlsext_host_name(server_hostname) + cnx.set_connect_state() + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(sock, sock.gettimeout()) + if not rd: + raise timeout('select timed out') + + continue + + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake: %r' % e) + + break + + return WrappedSocket(cnx, sock) + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 diff --git a/requests3/core/http_manager/contrib/securetransport.py b/requests3/core/http_manager/contrib/securetransport.py new file mode 100644 index 00000000..4a92ad75 --- /dev/null +++ b/requests3/core/http_manager/contrib/securetransport.py @@ -0,0 +1,807 @@ +""" +SecureTranport support for urllib3 via ctypes. + +This makes platform-native TLS available to urllib3 users on macOS without the +use of a compiler. This is an important feature because the Python Package +Index is moving to become a TLSv1.2-or-higher server, and the default OpenSSL +that ships with macOS is not capable of doing TLSv1.2. The only way to resolve +this is to give macOS users an alternative solution to the problem, and that +solution is to use SecureTransport. + +We use ctypes here because this solution must not require a compiler. That's +because pip is not allowed to require a compiler either. + +This is not intended to be a seriously long-term solution to this problem. +The hope is that PEP 543 will eventually solve this issue for us, at which +point we can retire this contrib module. But in the short term, we need to +solve the impending tire fire that is Python on Mac without this kind of +contrib module. So...here we are. + +To use this module, simply import and inject it:: + + import urllib3.contrib.securetransport + urllib3.contrib.securetransport.inject_into_urllib3() + +Happy TLSing! +""" +from __future__ import absolute_import + +import contextlib +import ctypes +import errno +import os.path +import shutil +import socket +import ssl +import threading +import weakref + +from .. import util +from ._securetransport.bindings import ( + Security, SecurityConst, CoreFoundation +) +from ._securetransport.low_level import ( + _assert_no_error, + _cert_array_from_pem, + _temporary_keychain, + _load_client_cert_chain, +) + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile +try: + memoryview(b'') +except NameError: + raise ImportError("SecureTransport only works on Pythons with memoryview") + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] +# SNI always works +HAS_SNI = True +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext +# This dictionary is used by the read callback to obtain a handle to the +# calling wrapped socket. This is a pretty silly approach, but for now it'll +# do. I feel like I should be able to smuggle a handle to the wrapped socket +# directly in the SSLConnectionRef, but for now this approach will work I +# guess. +# +# We need to lock around this structure for inserts, but we don't do it for +# reads/writes in the callbacks. The reasoning here goes as follows: +# +# 1. It is not possible to call into the callbacks before the dictionary is +# populated, so once in the callback the id must be in the dictionary. +# 2. The callbacks don't mutate the dictionary, they only read from it, and +# so cannot conflict with any of the insertions. +# +# This is good: if we had to lock in the callbacks we'd drastically slow down +# the performance of this code. +_connection_refs = weakref.WeakValueDictionary() +_connection_ref_lock = threading.Lock() +# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over +# for no better reason than we need *a* limit, and this one is right there. +SSL_WRITE_BLOCKSIZE = 16384 +# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to +# individual cipher suites. We need to do this becuase this is how +# SecureTransport wants them. +CIPHER_SUITES = [ + SecurityConst.TLS_AES_256_GCM_SHA384, + SecurityConst.TLS_CHACHA20_POLY1305_SHA256, + SecurityConst.TLS_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA, +] +# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of +# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version. +_protocol_to_min_max = { + ssl.PROTOCOL_SSLv23: ( + SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12 + ) +} +if hasattr(ssl, "PROTOCOL_SSLv2"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv2] = ( + SecurityConst.kSSLProtocol2, SecurityConst.kSSLProtocol2 + ) +if hasattr(ssl, "PROTOCOL_SSLv3"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv3] = ( + SecurityConst.kSSLProtocol3, SecurityConst.kSSLProtocol3 + ) +if hasattr(ssl, "PROTOCOL_TLSv1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1] = ( + SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol1 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_1] = ( + SecurityConst.kTLSProtocol11, SecurityConst.kTLSProtocol11 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_2"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_2] = ( + SecurityConst.kTLSProtocol12, SecurityConst.kTLSProtocol12 + ) +if hasattr(ssl, "PROTOCOL_TLS"): + _protocol_to_min_max[ssl.PROTOCOL_TLS] = _protocol_to_min_max[ + ssl.PROTOCOL_SSLv23 + ] + + +def inject_into_urllib3(): + """ + Monkey-patch urllib3 with SecureTransport-backed SSL-support. + """ + util.ssl_.SSLContext = SecureTransportContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_SECURETRANSPORT = True + util.ssl_.IS_SECURETRANSPORT = True + + +def extract_from_urllib3(): + """ + Undo monkey-patching by :func:`inject_into_urllib3`. + """ + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_SECURETRANSPORT = False + util.ssl_.IS_SECURETRANSPORT = False + + +def _read_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport read callback. This is called by ST to request that data + be returned from the socket. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + + base_socket = wrapped_socket.socket + requested_length = data_length_pointer[0] + timeout = wrapped_socket.gettimeout() + error = None + read_count = 0 + buffer = (ctypes.c_char * requested_length).from_address(data_buffer) + buffer_view = memoryview(buffer) + try: + while read_count < requested_length: + if timeout is None or timeout >= 0: + readables = util.wait_for_read([base_socket], timeout) + if not readables: + raise socket.error(errno.EAGAIN, 'timed out') + + # We need to tell ctypes that we have a buffer that can be + # written to. Upsettingly, we do that like this: + chunk_size = base_socket.recv_into( + buffer_view[read_count:requested_length] + ) + read_count += chunk_size + if not chunk_size: + if not read_count: + return SecurityConst.errSSLClosedGraceful + + break + + except (socket.error) as e: + error = e.errno + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + + raise + + data_length_pointer[0] = read_count + if read_count != requested_length: + return SecurityConst.errSSLWouldBlock + + return 0 + + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +def _write_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport write callback. This is called by ST to request that data + actually be sent on the network. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + + base_socket = wrapped_socket.socket + bytes_to_write = data_length_pointer[0] + data = ctypes.string_at(data_buffer, bytes_to_write) + timeout = wrapped_socket.gettimeout() + error = None + sent = 0 + try: + while sent < bytes_to_write: + if timeout is None or timeout >= 0: + writables = util.wait_for_write([base_socket], timeout) + if not writables: + raise socket.error(errno.EAGAIN, 'timed out') + + chunk_sent = base_socket.send(data) + sent += chunk_sent + # This has some needless copying here, but I'm not sure there's + # much value in optimising this data path. + data = data[chunk_sent:] + except (socket.error) as e: + error = e.errno + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + + raise + + data_length_pointer[0] = sent + if sent != bytes_to_write: + return SecurityConst.errSSLWouldBlock + + return 0 + + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +# We need to keep these two objects references alive: if they get GC'd while +# in use then SecureTransport could attempt to call a function that is in freed +# memory. That would be...uh...bad. Yeah, that's the word. Bad. +_read_callback_pointer = Security.SSLReadFunc(_read_callback) +_write_callback_pointer = Security.SSLWriteFunc(_write_callback) + + +class WrappedSocket(object): + """ + API-compatibility wrapper for Python's OpenSSL wrapped socket object. + + Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage + collector of PyPy. + """ + + def __init__(self, socket): + self.socket = socket + self.context = None + self._makefile_refs = 0 + self._closed = False + self._exception = None + self._keychain = None + self._keychain_dir = None + self._client_cert_chain = None + # We save off the previously-configured timeout and then set it to + # zero. This is done because we use select and friends to handle the + # timeouts, but if we leave the timeout set on the lower socket then + # Python will "kindly" call select on that socket again for us. Avoid + # that by forcing the timeout to zero. + self._timeout = self.socket.gettimeout() + self.socket.settimeout(0) + + @contextlib.contextmanager + def _raise_on_error(self): + """ + A context manager that can be used to wrap calls that do I/O from + SecureTransport. If any of the I/O callbacks hit an exception, this + context manager will correctly propagate the exception after the fact. + This avoids silently swallowing those exceptions. + + It also correctly forces the socket closed. + """ + self._exception = None + # We explicitly don't catch around this yield because in the unlikely + # event that an exception was hit in the block we don't want to swallow + # it. + yield + + if self._exception is not None: + exception, self._exception = self._exception, None + self.close() + raise exception + + def _set_ciphers(self): + """ + Sets up the allowed ciphers. By default this matches the set in + util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done + custom and doesn't allow changing at this time, mostly because parsing + OpenSSL cipher strings is going to be a freaking nightmare. + """ + ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))( + *CIPHER_SUITES + ) + result = Security.SSLSetEnabledCiphers( + self.context, ciphers, len(CIPHER_SUITES) + ) + _assert_no_error(result) + + def _custom_validate(self, verify, trust_bundle): + """ + Called when we have set custom validation. We do this in two cases: + first, when cert validation is entirely disabled; and second, when + using a custom trust DB. + """ + # If we disabled cert validation, just say: cool. + if not verify: + return + + # We want data in memory, so load it up. + if os.path.isfile(trust_bundle): + with open(trust_bundle, 'rb') as f: + trust_bundle = f.read() + cert_array = None + trust = Security.SecTrustRef() + try: + # Get a CFArray that contains the certs we want. + cert_array = _cert_array_from_pem(trust_bundle) + # Ok, now the hard part. We want to get the SecTrustRef that ST has + # created for this connection, shove our CAs into it, tell ST to + # ignore everything else it knows, and then ask if it can build a + # chain. This is a buuuunch of code. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + raise ssl.SSLError("Failed to copy trust reference") + + result = Security.SecTrustSetAnchorCertificates(trust, cert_array) + _assert_no_error(result) + result = Security.SecTrustSetAnchorCertificatesOnly(trust, True) + _assert_no_error(result) + trust_result = Security.SecTrustResultType() + result = Security.SecTrustEvaluate( + trust, ctypes.byref(trust_result) + ) + _assert_no_error(result) + finally: + if trust: + CoreFoundation.CFRelease(trust) + if cert_array is None: + CoreFoundation.CFRelease(cert_array) + # Ok, now we can look at what the result was. + successes = ( + SecurityConst.kSecTrustResultUnspecified, + SecurityConst.kSecTrustResultProceed, + ) + if trust_result.value not in successes: + raise ssl.SSLError( + "certificate verify failed, error code: %d" % + trust_result.value + ) + + def handshake( + self, + server_hostname, + verify, + trust_bundle, + min_version, + max_version, + client_cert, + client_key, + client_key_passphrase, + ): + """ + Actually performs the TLS handshake. This is run automatically by + wrapped socket, and shouldn't be needed in user code. + """ + # First, we do the initial bits of connection setup. We need to create + # a context, set its I/O funcs, and set the connection reference. + self.context = Security.SSLCreateContext( + None, SecurityConst.kSSLClientSide, SecurityConst.kSSLStreamType + ) + result = Security.SSLSetIOFuncs( + self.context, _read_callback_pointer, _write_callback_pointer + ) + _assert_no_error(result) + # Here we need to compute the handle to use. We do this by taking the + # id of self modulo 2**31 - 1. If this is already in the dictionary, we + # just keep incrementing by one until we find a free space. + with _connection_ref_lock: + handle = id(self) % 2147483647 + while handle in _connection_refs: + handle = (handle + 1) % 2147483647 + _connection_refs[handle] = self + result = Security.SSLSetConnection(self.context, handle) + _assert_no_error(result) + # If we have a server hostname, we should set that too. + if server_hostname: + if not isinstance(server_hostname, bytes): + server_hostname = server_hostname.encode('utf-8') + result = Security.SSLSetPeerDomainName( + self.context, server_hostname, len(server_hostname) + ) + _assert_no_error(result) + # Setup the ciphers. + self._set_ciphers() + # Set the minimum and maximum TLS versions. + result = Security.SSLSetProtocolVersionMin(self.context, min_version) + _assert_no_error(result) + result = Security.SSLSetProtocolVersionMax(self.context, max_version) + _assert_no_error(result) + # If there's a trust DB, we need to use it. We do that by telling + # SecureTransport to break on server auth. We also do that if we don't + # want to validate the certs at all: we just won't actually do any + # authing in that case. + if not verify or trust_bundle is not None: + result = Security.SSLSetSessionOption( + self.context, + SecurityConst.kSSLSessionOptionBreakOnServerAuth, + True, + ) + _assert_no_error(result) + # If there's a client cert, we need to use it. + if client_cert: + self._keychain, self._keychain_dir = _temporary_keychain() + self._client_cert_chain = _load_client_cert_chain( + self._keychain, client_cert, client_key + ) + result = Security.SSLSetCertificate( + self.context, self._client_cert_chain + ) + _assert_no_error(result) + while True: + with self._raise_on_error(): + result = Security.SSLHandshake(self.context) + if result == SecurityConst.errSSLWouldBlock: + raise socket.timeout("handshake timed out") + + elif result == SecurityConst.errSSLServerAuthCompleted: + self._custom_validate(verify, trust_bundle) + continue + + else: + _assert_no_error(result) + break + + def fileno(self): + return self.socket.fileno() + + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, bufsiz): + buffer = ctypes.create_string_buffer(bufsiz) + bytes_read = self.recv_into(buffer, bufsiz) + data = buffer[:bytes_read] + return data + + def recv_into(self, buffer, nbytes=None): + # Read short on EOF. + if self._closed: + return 0 + + if nbytes is None: + nbytes = len(buffer) + buffer = (ctypes.c_char * nbytes).from_buffer(buffer) + processed_bytes = ctypes.c_size_t(0) + with self._raise_on_error(): + result = Security.SSLRead( + self.context, buffer, nbytes, ctypes.byref(processed_bytes) + ) + # There are some result codes that we want to treat as "not always + # errors". Specifically, those are errSSLWouldBlock, + # errSSLClosedGraceful, and errSSLClosedNoNotify. + if (result == SecurityConst.errSSLWouldBlock): + # If we didn't process any bytes, then this was just a time out. + # However, we can get errSSLWouldBlock in situations when we *did* + # read some data, and in those cases we should just read "short" + # and return. + if processed_bytes.value == 0: + # Timed out, no data read. + raise socket.timeout("recv timed out") + + elif result in ( + SecurityConst.errSSLClosedGraceful, + SecurityConst.errSSLClosedNoNotify, + ): + # The remote peer has closed this connection. We should do so as + # well. Note that we don't actually return here because in + # principle this could actually be fired along with return data. + # It's unlikely though. + self.close() + else: + _assert_no_error(result) + # Ok, we read and probably succeeded. We should return whatever data + # was actually read. + return processed_bytes.value + + def settimeout(self, timeout): + self._timeout = timeout + + def gettimeout(self): + return self._timeout + + def send(self, data): + processed_bytes = ctypes.c_size_t(0) + with self._raise_on_error(): + result = Security.SSLWrite( + self.context, data, len(data), ctypes.byref(processed_bytes) + ) + if result == SecurityConst.errSSLWouldBlock and processed_bytes.value == 0: + # Timed out + raise socket.timeout("send timed out") + + else: + _assert_no_error(result) + # We sent, and probably succeeded. Tell them how much we sent. + return processed_bytes.value + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self.send(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + with self._raise_on_error(): + Security.SSLClose(self.context) + + def close(self): + # TODO: should I do clean shutdown here? Do I have to? + if self._makefile_refs < 1: + self._closed = True + if self.context: + CoreFoundation.CFRelease(self.context) + self.context = None + if self._client_cert_chain: + CoreFoundation.CFRelease(self._client_cert_chain) + self._client_cert_chain = None + if self._keychain: + Security.SecKeychainDelete(self._keychain) + CoreFoundation.CFRelease(self._keychain) + shutil.rmtree(self._keychain_dir) + self._keychain = self._keychain_dir = None + return self.socket.close() + + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + # Urgh, annoying. + # + # Here's how we do this: + # + # 1. Call SSLCopyPeerTrust to get hold of the trust object for this + # connection. + # 2. Call SecTrustGetCertificateAtIndex for index 0 to get the leaf. + # 3. To get the CN, call SecCertificateCopyCommonName and process that + # string so that it's of the appropriate type. + # 4. To get the SAN, we need to do something a bit more complex: + # a. Call SecCertificateCopyValues to get the data, requesting + # kSecOIDSubjectAltName. + # b. Mess about with this dictionary to try to get the SANs out. + # + # This is gross. Really gross. It's going to be a few hundred LoC extra + # just to repeat something that SecureTransport can *already do*. So my + # operating assumption at this time is that what we want to do is + # instead to just flag to urllib3 that it shouldn't do its own hostname + # validation when using SecureTransport. + if not binary_form: + raise ValueError( + "SecureTransport only supports dumping binary certs" + ) + + trust = Security.SecTrustRef() + certdata = None + der_bytes = None + try: + # Grab the trust store. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + # Probably we haven't done the handshake yet. No biggie. + return None + + cert_count = Security.SecTrustGetCertificateCount(trust) + if not cert_count: + # Also a case that might happen if we haven't handshaked. + # Handshook? Handshaken? + return None + + leaf = Security.SecTrustGetCertificateAtIndex(trust, 0) + assert leaf + # Ok, now we want the DER bytes. + certdata = Security.SecCertificateCopyData(leaf) + assert certdata + data_length = CoreFoundation.CFDataGetLength(certdata) + data_buffer = CoreFoundation.CFDataGetBytePtr(certdata) + der_bytes = ctypes.string_at(data_buffer, data_length) + finally: + if certdata: + CoreFoundation.CFRelease(certdata) + if trust: + CoreFoundation.CFRelease(trust) + return der_bytes + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) + + +else: # Platform-specific: Python 3 + + def makefile(self, mode="r", buffering=None, *args, **kwargs): + # We disable buffering with SecureTransport because it conflicts with + # the buffering that ST does internally (see issue #1153 for more). + buffering = 0 + return backport_makefile(self, mode, buffering, *args, **kwargs) + + +WrappedSocket.makefile = makefile + + +class SecureTransportContext(object): + """ + I am a wrapper class for the SecureTransport library, to translate the + interface of the standard library ``SSLContext`` object to calls into + SecureTransport. + """ + + def __init__(self, protocol): + self._min_version, self._max_version = _protocol_to_min_max[protocol] + self._options = 0 + self._verify = False + self._trust_bundle = None + self._client_cert = None + self._client_key = None + self._client_key_passphrase = None + + @property + def check_hostname(self): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + return True + + @check_hostname.setter + def check_hostname(self, value): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + pass + + @property + def options(self): + # TODO: Well, crap. + # + # So this is the bit of the code that is the most likely to cause us + # trouble. Essentially we need to enumerate all of the SSL options that + # users might want to use and try to see if we can sensibly translate + # them, or whether we should just ignore them. + return self._options + + @options.setter + def options(self, value): + # TODO: Update in line with above. + self._options = value + + @property + def verify_mode(self): + return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE + + @verify_mode.setter + def verify_mode(self, value): + self._verify = True if value == ssl.CERT_REQUIRED else False + + def set_default_verify_paths(self): + # So, this has to do something a bit weird. Specifically, what it does + # is nothing. + # + # This means that, if we had previously had load_verify_locations + # called, this does not undo that. We need to do that because it turns + # out that the rest of the urllib3 code will attempt to load the + # default verify paths if it hasn't been told about any paths, even if + # the context itself was sometime earlier. We resolve that by just + # ignoring it. + pass + + def load_default_certs(self): + return self.set_default_verify_paths() + + def set_ciphers(self, ciphers): + # For now, we just require the default cipher string. + if ciphers != util.ssl_.DEFAULT_CIPHERS: + raise ValueError( + "SecureTransport doesn't support custom cipher strings" + ) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + # OK, we only really support cadata and cafile. + if capath is not None: + raise ValueError( + "SecureTransport does not support cert directories" + ) + + self._trust_bundle = cafile or cadata + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._client_cert = certfile + self._client_key = keyfile + self._client_cert_passphrase = password + + def wrap_socket( + self, + sock, + server_side=False, + do_handshake_on_connect=True, + suppress_ragged_eofs=True, + server_hostname=None, + ): + # So, what do we do here? Firstly, we assert some properties. This is a + # stripped down shim, so there is some functionality we don't support. + # See PEP 543 for the real deal. + assert not server_side + assert do_handshake_on_connect + assert suppress_ragged_eofs + # Ok, we're good to go. Now we want to create the wrapped socket object + # and store it in the appropriate place. + wrapped_socket = WrappedSocket(sock) + # Now we can handshake + wrapped_socket.handshake( + server_hostname, + self._verify, + self._trust_bundle, + self._min_version, + self._max_version, + self._client_cert, + self._client_key, + self._client_key_passphrase, + ) + return wrapped_socket diff --git a/requests3/core/http_manager/contrib/socks.py b/requests3/core/http_manager/contrib/socks.py new file mode 100644 index 00000000..bdabcb08 --- /dev/null +++ b/requests3/core/http_manager/contrib/socks.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +""" +This module contains provisional support for SOCKS proxies from within +urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +SOCKS5. To enable its functionality, either install PySocks or install this +module with the ``socks`` extra. + +The SOCKS implementation supports the full range of urllib3 features. It also +supports the following SOCKS features: + +- SOCKS4 +- SOCKS4a +- SOCKS5 +- Usernames and passwords for the SOCKS proxy + +Known Limitations: + +- Currently PySocks does not support contacting remote websites via literal + IPv6 addresses. Any such connection attempt will fail. You must use a domain + name. +- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any + such connection attempt will fail. +""" +from __future__ import absolute_import + +try: + import socks +except ImportError: + import warnings + from ..exceptions import DependencyWarning + + warnings.warn( + ( + 'SOCKS support in urllib3 requires the installation of optional ' + 'dependencies: specifically, PySocks. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies' + ), + DependencyWarning, + ) + raise + +from socket import error as SocketError, timeout as SocketTimeout + +from .._sync.connection import (HTTP1Connection) +from ..connectionpool import (HTTPConnectionPool, HTTPSConnectionPool) +from ..exceptions import ConnectTimeoutError, NewConnectionError +from ..poolmanager import PoolManager +from ..util.url import parse_url + + +class SOCKSConnection(HTTP1Connection): + """ + A HTTP connection that connects via a SOCKS proxy. + """ + + def __init__(self, *args, **kwargs): + self._socks_options = kwargs.pop('_socks_options') + super(SOCKSConnection, self).__init__(*args, **kwargs) + + def _do_socket_connect(self, connect_timeout, connect_kw): + """ + Establish a new connection via the SOCKS proxy. + """ + try: + conn = socks.create_connection( + (self._host, self._port), + proxy_type=self._socks_options['socks_version'], + proxy_addr=self._socks_options['proxy_host'], + proxy_port=self._socks_options['proxy_port'], + proxy_username=self._socks_options['username'], + proxy_password=self._socks_options['password'], + proxy_rdns=self._socks_options['rdns'], + timeout=connect_timeout, + **connect_kw + ) + except SocketTimeout as e: + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self._host, connect_timeout), + ) + + except socks.ProxyError as e: + # This is fragile as hell, but it seems to be the only way to raise + # useful errors here. + if e.socket_err: + error = e.socket_err + if isinstance(error, SocketTimeout): + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self._host, connect_timeout), + ) + + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % error, + ) + + else: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + except SocketError as e: # Defensive: PySocks should catch all these. + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + return conn + + +class SOCKSHTTPConnectionPool(HTTPConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSProxyManager(PoolManager): + """ + A version of the urllib3 ProxyManager that routes connections via the + defined SOCKS proxy. + """ + pool_classes_by_scheme = { + 'http': SOCKSHTTPConnectionPool, 'https': SOCKSHTTPSConnectionPool + } + + def __init__( + self, + proxy_url, + username=None, + password=None, + num_pools=10, + headers=None, + **connection_pool_kw + ): + parsed = parse_url(proxy_url) + if parsed.scheme == 'socks5': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = False + elif parsed.scheme == 'socks5h': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = True + elif parsed.scheme == 'socks4': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = False + elif parsed.scheme == 'socks4a': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = True + else: + raise ValueError( + "Unable to determine SOCKS version from %s" % proxy_url + ) + + self.proxy_url = proxy_url + socks_options = { + 'socks_version': socks_version, + 'proxy_host': parsed.host, + 'proxy_port': parsed.port, + 'username': username, + 'password': password, + 'rdns': rdns, + } + connection_pool_kw['_socks_options'] = socks_options + super(SOCKSProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme diff --git a/requests3/core/http_manager/exceptions.py b/requests3/core/http_manager/exceptions.py new file mode 100644 index 00000000..743a6927 --- /dev/null +++ b/requests3/core/http_manager/exceptions.py @@ -0,0 +1,238 @@ +from __future__ import absolute_import + + + +# Base Exceptions +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) + + +class SSLError(HTTPError): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + + +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." + pass + + +# : Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + +# Leaf Exceptions +class MaxRetryError(RequestError): + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ + + def __init__(self, pool, url, reason=None): + self.reason = reason + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason + ) + RequestError.__init__(self, pool, url, message) + + +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" + pass + + +class NewConnectionError(ConnectTimeoutError, PoolError): + "Raised when we fail to establish a new connection. Usually ECONNREFUSED." + pass + + +class EmptyPoolError(PoolError): + "Raised when a pool runs out of connections and no more are allowed." + pass + + +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + HTTPError.__init__(self, message) + self.location = location + + +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class SNIMissingWarning(HTTPWarning): + "Warned when making a HTTPS request without SNI available." + pass + + +class DependencyWarning(HTTPWarning): + """ + Warned when an attempt is made to import a module with missing optional + dependencies. + """ + pass + + +class InvalidHeader(HTTPError): + "The header provided was somehow invalid." + pass + + +class BadVersionError(ProtocolError): + """ + The HTTP version in the response is unsupported. + """ + + def __init__(self, version): + message = "HTTP version {} is unsupported".format(version) + super(BadVersionError, self).__init__(message) + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % ( + defects or 'Unknown', unparsed_data + ) + super(HeaderParsingError, self).__init__(message) + + +class UnrewindableBodyError(HTTPError): + "urllib3 encountered an error when trying to rewind a body" + pass + + +class FailedTunnelError(HTTPError): + """ + An attempt was made to set up a CONNECT tunnel, but that attempt failed. + """ + + def __init__(self, message, response): + super(FailedTunnelError, self).__init__(message) + self.response = response + + +class InvalidBodyError(HTTPError): + """ + An attempt was made to send a request with a body object that urllib3 does + not support. + """ + pass diff --git a/requests3/core/http_manager/fields.py b/requests3/core/http_manager/fields.py new file mode 100644 index 00000000..f1808f0e --- /dev/null +++ b/requests3/core/http_manager/fields.py @@ -0,0 +1,183 @@ +from __future__ import absolute_import +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetypes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except (UnicodeEncodeError, UnicodeDecodeError): + pass + else: + return result + + if not six.PY3 and isinstance(value, six.text_type): # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + for name, value in iterable: + if value is not None: + parts.append(self._render_part(name, value)) + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart( + self, + content_disposition=None, + content_type=None, + content_location=None, + ): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers[ + 'Content-Disposition' + ] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join( + [ + '', + self._render_parts( + (('name', self._name), ('filename', self._filename)) + ), + ] + ) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/requests3/core/http_manager/filepost.py b/requests3/core/http_manager/filepost.py new file mode 100644 index 00000000..6b05b747 --- /dev/null +++ b/requests3/core/http_manager/filepost.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import +import codecs + +from io import BytesIO + +from .packages import six +from .packages.six import b +from .fields import RequestField + +writer = codecs.lookup('utf-8')[3] + + +def choose_boundary(): + """ + Our embarrassingly-simple replacement for mimetools.choose_boundary. + + We are lazily loading uuid here, because we don't want its issues + + https://bugs.python.org/issue5885 + https://bugs.python.org/issue11063 + + to affect our entire library. + """ + from uuid import uuid4 + return uuid4().hex + + +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + for field in i: + if isinstance(field, RequestField): + yield field + + else: + yield RequestField.from_tuples(*field) + + +def iter_fields(fields): + """ + .. deprecated:: 1.6 + + Iterate over fields. + + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. + + :param fields: + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + for field in iter_field_objects(fields): + body.write(b('--%s\r\n' % (boundary))) + writer(body).write(field.render_headers()) + data = field.data + if isinstance(data, int): + data = str(data) # Backwards compatibility + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + body.write(b'\r\n') + body.write(b('--%s--\r\n' % (boundary))) + content_type = str('multipart/form-data; boundary=%s' % boundary) + return body.getvalue(), content_type diff --git a/requests3/core/http_manager/packages/__init__.py b/requests3/core/http_manager/packages/__init__.py new file mode 100644 index 00000000..b3e85f85 --- /dev/null +++ b/requests3/core/http_manager/packages/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + +__all__ = ('ssl_match_hostname',) diff --git a/requests3/core/http_manager/packages/backports/__init__.py b/requests3/core/http_manager/packages/backports/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/requests3/core/http_manager/packages/backports/__init__.py diff --git a/requests3/core/http_manager/packages/backports/makefile.py b/requests3/core/http_manager/packages/backports/makefile.py new file mode 100644 index 00000000..160f0666 --- /dev/null +++ b/requests3/core/http_manager/packages/backports/makefile.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" +backports.makefile +~~~~~~~~~~~~~~~~~~ + +Backports the Python 3 ``socket.makefile`` method for use with anything that +wants to create a "fake" socket object. +""" +import io + +from socket import SocketIO + + +def backport_makefile( + self, mode="r", buffering=None, encoding=None, errors=None, newline=None +): + """ + Backport of ``socket.makefile`` from Python 3.5. + """ + if not set(mode) <= set(["r", "w", "b"]): + raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,)) + + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._makefile_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + + return raw + + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text diff --git a/requests3/core/http_manager/packages/ordered_dict.py b/requests3/core/http_manager/packages/ordered_dict.py new file mode 100644 index 00000000..74845861 --- /dev/null +++ b/requests3/core/http_manager/packages/ordered_dict.py @@ -0,0 +1,272 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + + # -- the following methods do not depend on the internal structure -- + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError( + 'update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),) + ) + + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + + if default is self.__marker: + raise KeyError(key) + + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + + return '%s(%r)' % (self.__class__.__name__, self.items()) + + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self) == len(other) and self.items() == other.items() + + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + + # -- the following methods are only used in Python 2.7 -- + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/requests3/core/http_manager/packages/six.py b/requests3/core/http_manager/packages/six.py new file mode 100644 index 00000000..af378941 --- /dev/null +++ b/requests3/core/http_manager/packages/six.py @@ -0,0 +1,935 @@ +"""Utilities for writing code that runs on Python 2 and 3""" +# Copyright (c) 2010-2015 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson <benjamin@python.org>" +__version__ = "1.10.0" +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + + get_source = get_code # same as get_code + + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute( + "filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse" + ), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute( + "reload_module", + "__builtin__", + "importlib" if PY34 else "imp", + "reload", + ), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute( + "zip_longest", "itertools", "itertools", "izip_longest", "zip_longest" + ), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule( + "email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart" + ), + MovedModule( + "email_mime_nonmultipart", + "email.MIMENonMultipart", + "email.mime.nonmultipart", + ), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule( + "tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext" + ), + MovedModule( + "tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog" + ), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule( + "tkinter_colorchooser", "tkColorChooser", "tkinter.colorchooser" + ), + MovedModule( + "tkinter_commondialog", "tkCommonDialog", "tkinter.commondialog" + ), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule( + "tkinter_tksimpledialog", "tkSimpleDialog", "tkinter.simpledialog" + ), + MovedModule( + "urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse" + ), + MovedModule( + "urllib_error", __name__ + ".moves.urllib_error", "urllib.error" + ), + MovedModule( + "urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib" + ), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [MovedModule("winreg", "_winreg")] +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr +_MovedItems._moved_attributes = _moved_attributes +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes +_importer._add_module( + Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", + "moves.urllib.parse", +) + + +class Module_six_moves_urllib_error(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes +_importer._add_module( + Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", + "moves.urllib.error", +) + + +class Module_six_moves_urllib_request(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute( + "HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request" + ), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), +] +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes +_importer._add_module( + Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", + "moves.urllib.request", +) + + +class Module_six_moves_urllib_response(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes +_importer._add_module( + Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", + "moves.urllib.response", +) + + +class Module_six_moves_urllib_robotparser(_LazyModule): + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser") +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes +_importer._add_module( + Module_six_moves_urllib_robotparser( + __name__ + ".moves.urllib.robotparser" + ), + "moves.urllib_robotparser", + "moves.urllib.robotparser", +) + + +class Module_six_moves_urllib(types.ModuleType): + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + + +_importer._add_module( + Module_six_moves_urllib(__name__ + ".moves.urllib"), "moves.urllib" +) + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" +try: + advance_iterator = next +except NameError: + + def advance_iterator(it): + return it.next() + + +next = advance_iterator +try: + callable = callable +except NameError: + + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc( + get_unbound_function, + """Get the function out of a possibly unbound function""", +) +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) +if PY3: + + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + viewvalues = operator.methodcaller("values") + viewitems = operator.methodcaller("items") +else: + + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + viewvalues = operator.methodcaller("viewvalues") + viewitems = operator.methodcaller("viewitems") +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc( + iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.", +) +_add_doc( + iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.", +) +if PY3: + + def b(s): + return s.encode("latin-1") + + def u(s): + return s + + unichr = chr + import struct + + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + + StringIO = io.StringIO + BytesIO = io.BytesIO + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" +else: + + def b(s): + return s + + + # Workaround for standalone backslash + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + + raise value + + +else: + + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec ("""exec _code_ in _globs_, _locs_""") + + exec_( + """def reraise(tp, value, tb=None): + raise tp, value, tb +""" + ) +if sys.version_info[:2] == (3, 2): + exec_( + """def raise_from(value, from_value): + if from_value is None: + raise value + raise value from from_value +""" + ) +elif sys.version_info[:2] > (3, 2): + exec_( + """def raise_from(value, from_value): + raise value from from_value +""" + ) +else: + + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if ( + isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None + ): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + + if kwargs: + raise TypeError("invalid keyword arguments to print()") + + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) + + +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + + +_add_doc(reraise, """Reraise an exception.""") +if sys.version_info[0:2] < (3, 4): + + def wraps( + wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES, + ): + + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + + return wrapper + + +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): + + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + + return wrapper + + +def python_2_unicode_compatible(klass): + """ + A decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError( + "@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % klass.__name__ + ) + + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if ( + type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__ + ): + del sys.meta_path[i] + break + + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/requests3/core/http_manager/packages/ssl_match_hostname/__init__.py b/requests3/core/http_manager/packages/ssl_match_hostname/__init__.py new file mode 100644 index 00000000..612b8a0b --- /dev/null +++ b/requests3/core/http_manager/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,18 @@ +import sys + +try: + # Our match_hostname function is the same as 3.5's, so we only want to + # import the match_hostname function if it's at least that good. + if sys.version_info < (3, 5): + raise ImportError("Fallback to vendored code") + + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/requests3/core/http_manager/packages/ssl_match_hostname/_implementation.py b/requests3/core/http_manager/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 00000000..925bad60 --- /dev/null +++ b/requests3/core/http_manager/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,165 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html +import re +import sys + +# ipaddress has been backported to 2.6+ in pypi. If it is installed on the +# system, use it to handle IPAddress ServerAltnames (this was added in +# python-3.5) otherwise only do DNS matching. This allows +# backports.ssl_match_hostname to continue to be used all the way back to +# python-2.4. +try: + import ipaddress +except ImportError: + ipaddress = None +__version__ = '3.5.0.1' + + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn) + ) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def _to_unicode(obj): + if isinstance(obj, str) and sys.version_info < (3,): + obj = unicode(obj, encoding='ascii', errors='strict') + return obj + + +def _ipaddress_match(ipname, host_ip): + """Exact matching of IP addresses. + + RFC 6125 explicitly doesn't define an algorithm for this + (section 1.7.2 - "Out of Scope"). + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) + return ip == host_ip + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError( + "empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED" + ) + + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(_to_unicode(hostname)) + except ValueError: + # Not an IP address (common case) + host_ip = None + except UnicodeError: + # Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: + raise + + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if host_ip is None and _dnsname_match(value, hostname): + return + + dnsnames.append(value) + elif key == 'IP Address': + if host_ip is not None and _ipaddress_match(value, host_ip): + return + + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError( + "hostname %r " + "doesn't match either of %s" % + (hostname, ', '.join(map(repr, dnsnames))) + ) + + elif len(dnsnames) == 1: + raise CertificateError( + "hostname %r " "doesn't match %r" % (hostname, dnsnames[0]) + ) + + else: + raise CertificateError( + "no appropriate commonName or " "subjectAltName fields were found" + ) diff --git a/requests3/core/http_manager/poolmanager.py b/requests3/core/http_manager/poolmanager.py new file mode 100644 index 00000000..62bf7dd8 --- /dev/null +++ b/requests3/core/http_manager/poolmanager.py @@ -0,0 +1,3 @@ +from ._sync.poolmanager import PoolManager, ProxyManager, proxy_from_url + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] diff --git a/requests3/core/http_manager/request.py b/requests3/core/http_manager/request.py new file mode 100644 index 00000000..bc2e0cbb --- /dev/null +++ b/requests3/core/http_manager/request.py @@ -0,0 +1,163 @@ +from __future__ import absolute_import + +from .filepost import encode_multipart_formdata +from .packages import six +from .packages.six.moves.urllib.parse import urlencode + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-form-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + def __init__(self, headers=None): + self.headers = headers or {} + + def urlopen( + self, + method, + url, + body=None, + headers=None, + encode_multipart=True, + multipart_boundary=None, + **kw + ): # Abstract + raise NotImplementedError( + "Classes extending RequestMethods must implement " + "their own ``urlopen`` method." + ) + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + if method in self._encode_url_methods: + return self.request_encode_url( + method, url, fields=fields, headers=headers, **urlopen_kw + ) + + else: + return self.request_encode_body( + method, url, fields=fields, headers=headers, **urlopen_kw + ) + + def request_encode_url( + self, method, url, fields=None, headers=None, **urlopen_kw + ): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if headers is None: + headers = self.headers + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + if fields: + url += '?' + urlencode(fields) + return self.urlopen(method, url, **extra_kw) + + def request_encode_body( + self, + method, + url, + fields=None, + headers=None, + encode_multipart=True, + multipart_boundary=None, + **urlopen_kw + ): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request + signing, such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example:: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if headers is None: + headers = self.headers + extra_kw = {'headers': {}} + if fields: + if 'body' in urlopen_kw: + raise TypeError( + "request got values for both 'fields' and 'body', can only specify one." + ) + + if encode_multipart: + body, content_type = encode_multipart_formdata( + fields, boundary=multipart_boundary + ) + else: + body, content_type = urlencode( + fields + ), 'application/x-www-form-urlencoded' + if isinstance(body, six.text_type): + body = body.encode('utf-8') + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) + return self.urlopen(method, url, **extra_kw) diff --git a/requests3/core/http_manager/response.py b/requests3/core/http_manager/response.py new file mode 100644 index 00000000..1e95d13d --- /dev/null +++ b/requests3/core/http_manager/response.py @@ -0,0 +1,3 @@ +from ._sync.response import DeflateDecoder, GzipDecoder, HTTPResponse + +__all__ = ['DeflateDecoder', 'GzipDecoder', 'HTTPResponse'] diff --git a/requests3/core/http_manager/util/__init__.py b/requests3/core/http_manager/util/__init__.py new file mode 100644 index 00000000..9014131b --- /dev/null +++ b/requests3/core/http_manager/util/__init__.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import + +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + IS_PYOPENSSL, + IS_SECURETRANSPORT, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import (current_time, Timeout) + +from .retry import Retry +from .url import (get_host, parse_url, split_first, Url) +from .wait import (wait_for_read, wait_for_write) + +__all__ = ( + 'HAS_SNI', + 'IS_PYOPENSSL', + 'IS_SECURETRANSPORT', + 'SSLContext', + 'Retry', + 'Timeout', + 'Url', + 'assert_fingerprint', + 'current_time', + 'is_connection_dropped', + 'is_fp_closed', + 'get_host', + 'parse_url', + 'make_headers', + 'resolve_cert_reqs', + 'resolve_ssl_version', + 'split_first', + 'ssl_wrap_socket', + 'wait_for_read', + 'wait_for_write', +) diff --git a/requests3/core/http_manager/util/connection.py b/requests3/core/http_manager/util/connection.py new file mode 100644 index 00000000..89a1ca32 --- /dev/null +++ b/requests3/core/http_manager/util/connection.py @@ -0,0 +1,108 @@ +from __future__ import absolute_import +import socket + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + """ + # TODO: Need to restore AppEngine behaviour here at some point. + return conn.is_dropped() + + + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +# One additional modification is that we avoid binding to IPv6 servers +# discovered in DNS if the system doesn't have IPv6 functionality. +def create_connection( + address, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, + socket_options=None, +): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + host, port = address + if host.startswith('['): + host = host.strip('[]') + err = None + # Using the value from allowed_gai_family() in the context of getaddrinfo lets + # us select whether to work with IPv4 DNS records, IPv6 records, or both. + # The original create_connection function always returns all records. + family = allowed_gai_family() + for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + # If provided, set socket level options before connecting. + _set_socket_options(sock, socket_options) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as e: + err = e + if sock is not None: + sock.close() + sock = None + if err is not None: + raise err + + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) + + +def allowed_gai_family(): + """This function is designed to work in the context of + getaddrinfo, where family=socket.AF_UNSPEC is the default and + will perform a DNS search for both IPv6 and IPv4 records.""" + family = socket.AF_INET + if HAS_IPV6: + family = socket.AF_UNSPEC + return family + + +def _has_ipv6(host): + """ Returns True if the system can bind an IPv6 address. """ + sock = None + has_ipv6 = False + if socket.has_ipv6: + # has_ipv6 returns true if cPython was compiled with IPv6 support. + # It does not tell us if the system has IPv6 support enabled. To + # determine that we must bind to an IPv6 address. + # https://github.com/shazow/urllib3/pull/611 + # https://bugs.python.org/issue658327 + try: + sock = socket.socket(socket.AF_INET6) + sock.bind((host, 0)) + has_ipv6 = True + except Exception: + pass + if sock: + sock.close() + return has_ipv6 + + +HAS_IPV6 = _has_ipv6('::1') diff --git a/requests3/core/http_manager/util/request.py b/requests3/core/http_manager/util/request.py new file mode 100644 index 00000000..43102ff1 --- /dev/null +++ b/requests3/core/http_manager/util/request.py @@ -0,0 +1,129 @@ +from __future__ import absolute_import +from base64 import b64encode + +from ..packages.six import b, integer_types +from ..exceptions import UnrewindableBodyError + +ACCEPT_ENCODING = 'gzip,deflate' +_FAILEDTELL = object() + + +def make_headers( + keep_alive=None, + accept_encoding=None, + user_agent=None, + basic_auth=None, + proxy_basic_auth=None, + disable_cache=None, +): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + if user_agent: + headers['user-agent'] = user_agent + if keep_alive: + headers['connection'] = 'keep-alive' + if basic_auth: + headers['authorization'] = 'Basic ' + b64encode(b(basic_auth)).decode( + 'utf-8' + ) + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + b64encode( + b(proxy_basic_auth) + ).decode( + 'utf-8' + ) + if disable_cache: + headers['cache-control'] = 'no-cache' + return headers + + +def set_file_position(body, pos): + """ + If a position is provided, move file to that point. + Otherwise, we'll attempt to record a position for future use. + """ + if pos is not None: + rewind_body(body, pos) + elif getattr(body, 'tell', None) is not None: + try: + pos = body.tell() + except (IOError, OSError): + # This differentiates from None, allowing us to catch + # a failed `tell()` later when trying to rewind the body. + pos = _FAILEDTELL + return pos + + +def rewind_body(body, body_pos): + """ + Attempt to rewind body to a certain position. + Primarily used for request redirects and retries. + + :param body: + File-like object that supports seek. + + :param int pos: + Position to seek to in file. + """ + body_seek = getattr(body, 'seek', None) + if body_seek is not None and isinstance(body_pos, integer_types): + try: + body_seek(body_pos) + except (IOError, OSError): + raise UnrewindableBodyError( + "An error occurred when rewinding request " + "body for redirect/retry." + ) + + elif body_pos is _FAILEDTELL: + raise UnrewindableBodyError( + "Unable to record file position for rewinding " + "request body during a redirect/retry." + ) + + else: + raise ValueError( + "body_pos must be of type integer, " + "instead it was %s." % type(body_pos) + ) diff --git a/requests3/core/http_manager/util/response.py b/requests3/core/http_manager/util/response.py new file mode 100644 index 00000000..4f31a85f --- /dev/null +++ b/requests3/core/http_manager/util/response.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import + + +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + try: + # Check for our own base response class. + return obj.complete + + except AttributeError: + pass + try: + # Check via the official file-like-object way. + return obj.closed + + except AttributeError: + pass + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + + except AttributeError: + pass + raise ValueError("Unable to determine whether fp is closed.") diff --git a/requests3/core/http_manager/util/retry.py b/requests3/core/http_manager/util/retry.py new file mode 100644 index 00000000..01157f95 --- /dev/null +++ b/requests3/core/http_manager/util/retry.py @@ -0,0 +1,432 @@ +from __future__ import absolute_import +import time +import logging +from collections import namedtuple +from itertools import takewhile +import email +import re + +from ..exceptions import ( + ConnectTimeoutError, + MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, + InvalidHeader, +) +from ..packages import six + +log = logging.getLogger(__name__) +# Data structure for representing the metadata of requests that result in a retry. +RequestHistory = namedtuple( + 'RequestHistory', ["method", "url", "error", "status", "redirect_location"] +) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int status: + How many times to retry on bad status codes. + + These are retries made on responses, where status code matches + ``status_forcelist``. + + Set to ``0`` to fail on the first retry of this type. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + idempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + Set to a ``False`` value to retry on any verb. + + :param iterable status_forcelist: + A set of integer HTTP status codes that we should force a retry on. + A retry is initiated if the request method is in ``method_whitelist`` + and the response status code is in ``status_forcelist``. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts after the second try + (most errors are resolved immediately by a second try without a + delay). urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.BACKOFF_MAX`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + + :param bool raise_on_status: Similar meaning to ``raise_on_redirect``: + whether we should raise an exception, or return a response, + if status falls in ``status_forcelist`` range and retries have + been exhausted. + + :param tuple history: The history of the request encountered during + each call to :meth:`~Retry.increment`. The list is in the order + the requests occurred. Each list item is of class :class:`RequestHistory`. + + :param bool respect_retry_after_header: + Whether to respect Retry-After header on status codes defined as + :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not. + + """ + DEFAULT_METHOD_WHITELIST = frozenset( + ['HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'] + ) + RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) + # : Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__( + self, + total=10, + connect=None, + read=None, + redirect=None, + status=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, + status_forcelist=None, + backoff_factor=0, + raise_on_redirect=True, + raise_on_status=True, + history=None, + respect_retry_after_header=True, + ): + self.total = total + self.connect = connect + self.read = read + self.status = status + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self.raise_on_status = raise_on_status + self.history = history or tuple() + self.respect_retry_after_header = respect_retry_after_header + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, + read=self.read, + redirect=self.redirect, + status=self.status, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + raise_on_status=self.raise_on_status, + history=self.history, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r", retries, new_retries) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + # We want to consider only the last consecutive errors sequence (Ignore redirects). + consecutive_errors_len = len( + list( + takewhile( + lambda x: x.redirect_location is None, + reversed(self.history), + ) + ) + ) + if consecutive_errors_len <= 1: + return 0 + + backoff_value = self.backoff_factor * ( + 2 ** (consecutive_errors_len - 1) + ) + return min(self.BACKOFF_MAX, backoff_value) + + def parse_retry_after(self, retry_after): + # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 + if re.match(r"^\s*[0-9]+\s*$", retry_after): + seconds = int(retry_after) + else: + retry_date_tuple = email.utils.parsedate(retry_after) + if retry_date_tuple is None: + raise InvalidHeader( + "Invalid Retry-After header: %s" % retry_after + ) + + retry_date = time.mktime(retry_date_tuple) + seconds = retry_date - time.time() + if seconds < 0: + seconds = 0 + return seconds + + def get_retry_after(self, response): + """ Get the value of Retry-After in seconds. """ + retry_after = response.getheader("Retry-After") + if retry_after is None: + return None + + return self.parse_retry_after(retry_after) + + def sleep_for_retry(self, response=None): + retry_after = self.get_retry_after(response) + if retry_after: + time.sleep(retry_after) + return True + + return False + + def _sleep_backoff(self): + backoff = self.get_backoff_time() + if backoff <= 0: + return + + time.sleep(backoff) + + def sleep(self, response=None): + """ Sleep between retry attempts. + + This method will respect a server's ``Retry-After`` response header + and sleep the duration of the time requested. If that is not present, it + will use an exponential backoff. By default, the backoff factor is 0 and + this method will return immediately. + """ + if response: + slept = self.sleep_for_retry(response) + if slept: + return + + self._sleep_backoff() + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def _is_method_retryable(self, method): + """ Checks if a given HTTP method should be retried upon, depending if + it is included on the method whitelist. + """ + if self.method_whitelist and method.upper( + ) not in self.method_whitelist: + return False + + return True + + def is_retry(self, method, status_code, has_retry_after=False): + """ Is this method/status code retryable? (Based on whitelists and control + variables such as the number of total retries to allow, whether to + respect the Retry-After header, whether this header is present, and + whether the returned status code is on the list of status codes to + be retried upon on the presence of the aforementioned header) + """ + if not self._is_method_retryable(method): + return False + + if self.status_forcelist and status_code in self.status_forcelist: + return True + + return ( + self.total and + self.respect_retry_after_header and + has_retry_after and + (status_code in self.RETRY_AFTER_STATUS_CODES) + ) + + def is_exhausted(self): + """ Are we out of retries? """ + retry_counts = ( + self.total, self.connect, self.read, self.redirect, self.status + ) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment( + self, + method=None, + url=None, + response=None, + error=None, + _pool=None, + _stacktrace=None, + ): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + connect = self.connect + read = self.read + redirect = self.redirect + status_count = self.status + cause = 'unknown' + status = None + redirect_location = None + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + + elif connect is not None: + connect -= 1 + elif error and self._is_read_error(error): + # Read retry? + if read is False or not self._is_method_retryable(method): + raise six.reraise(type(error), error, _stacktrace) + + elif read is not None: + read -= 1 + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = 'too many redirects' + redirect_location = response.get_redirect_location() + status = response.status + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + cause = ResponseError.GENERIC_ERROR + if response and response.status: + if status_count is not None: + status_count -= 1 + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status + ) + status = response.status + history = self.history + ( + RequestHistory(method, url, error, status, redirect_location), + ) + new_retry = self.new( + total=total, + connect=connect, + read=read, + redirect=redirect, + status=status_count, + history=history, + ) + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) + return new_retry + + def __repr__(self): + return ( + '{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect}, status={self.status})' + ).format( + cls=type(self), self=self + ) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/requests3/core/http_manager/util/selectors.py b/requests3/core/http_manager/util/selectors.py new file mode 100644 index 00000000..505f8082 --- /dev/null +++ b/requests3/core/http_manager/util/selectors.py @@ -0,0 +1,604 @@ +# Backport of selectors.py from Python 3.5+ to support Python < 3.4 +# Also has the behavior specified in PEP 475 which is to retry syscalls +# in the case of an EINTR error. This module is required because selectors34 +# does not follow this behavior and instead returns that no dile descriptor +# events have occurred rather than retry the syscall. The decision to drop +# support for select.devpoll is made to maintain 100% test coverage. +import errno +import math +import select +import socket +import sys +import time +from collections import namedtuple +from ..packages.six import integer_types + +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping +try: + monotonic = time.monotonic +except (AttributeError, ImportError): # Python 3.3< + monotonic = time.time +EVENT_READ = (1 << 0) +EVENT_WRITE = (1 << 1) +HAS_SELECT = True # Variable that shows whether the platform has a selector. +_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None. +_DEFAULT_SELECTOR = None + + +class SelectorError(Exception): + + def __init__(self, errcode): + super(SelectorError, self).__init__() + self.errno = errcode + + def __repr__(self): + return "<SelectorError errno={0}>".format(self.errno) + + def __str__(self): + return self.__repr__() + + +def _fileobj_to_fd(fileobj): + """ Return a file descriptor from a file object. If + given an integer will simply return that integer back. """ + if isinstance(fileobj, integer_types): + fd = fileobj + else: + try: + fd = int(fileobj.fileno()) + except (AttributeError, TypeError, ValueError): + raise ValueError("Invalid file object: {0!r}".format(fileobj)) + + if fd < 0: + raise ValueError("Invalid file descriptor: {0}".format(fd)) + + return fd + + +# Determine which function to use to wrap system calls because Python 3.5+ +# already handles the case when system calls are interrupted. +if sys.version_info >= (3, 5): + + def _syscall_wrapper(func, _, *args, **kwargs): + """ This is the short-circuit version of the below logic + because in Python 3.5+ all system calls automatically restart + and recalculate their timeouts. """ + try: + return func(*args, **kwargs) + + except (OSError, IOError, select.error) as e: + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + raise SelectorError(errcode) + + +else: + + def _syscall_wrapper(func, recalc_timeout, *args, **kwargs): + """ Wrapper function for syscalls that could fail due to EINTR. + All functions should be retried if there is time left in the timeout + in accordance with PEP 475. """ + timeout = kwargs.get("timeout", None) + if timeout is None: + expires = None + recalc_timeout = False + else: + timeout = float(timeout) + if timeout < 0.0: # Timeout less than 0 treated as no timeout. + expires = None + else: + expires = monotonic() + timeout + args = list(args) + if recalc_timeout and "timeout" not in kwargs: + raise ValueError( + "Timeout must be in args or kwargs to be recalculated" + ) + + result = _SYSCALL_SENTINEL + while result is _SYSCALL_SENTINEL: + try: + result = func(*args, **kwargs) + # OSError is thrown by select.select + # IOError is thrown by select.epoll.poll + # select.error is thrown by select.poll.poll + # Aren't we thankful for Python 3.x rework for exceptions? + except (OSError, IOError, select.error) as e: + # select.error wasn't a subclass of OSError in the past. + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + elif hasattr(e, "args"): + errcode = e.args[0] + # Also test for the Windows equivalent of EINTR. + is_interrupt = ( + errcode == errno.EINTR or + (hasattr(errno, "WSAEINTR") and errcode == errno.WSAEINTR) + ) + if is_interrupt: + if expires is not None: + current_time = monotonic() + if current_time > expires: + raise OSError(errno=errno.ETIMEDOUT) + + if recalc_timeout: + if "timeout" in kwargs: + kwargs["timeout"] = expires - current_time + continue + + if errcode: + raise SelectorError(errcode) + + else: + raise + + return result + + +SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data']) + + +class _SelectorMapping(Mapping): + """ Mapping of file objects to selector keys """ + + def __init__(self, selector): + self._selector = selector + + def __len__(self): + return len(self._selector._fd_to_key) + + def __getitem__(self, fileobj): + try: + fd = self._selector._fileobj_lookup(fileobj) + return self._selector._fd_to_key[fd] + + except KeyError: + raise KeyError("{0!r} is not registered.".format(fileobj)) + + def __iter__(self): + return iter(self._selector._fd_to_key) + + +class BaseSelector(object): + """ Abstract Selector class + + A selector supports registering file objects to be monitored + for specific I/O events. + + A file object is a file descriptor or any object with a + `fileno()` method. An arbitrary object can be attached to the + file object which can be used for example to store context info, + a callback, etc. + + A selector can use various implementations (select(), poll(), epoll(), + and kqueue()) depending on the platform. The 'DefaultSelector' class uses + the most efficient implementation for the current platform. + """ + + def __init__(self): + # Maps file descriptors to keys. + self._fd_to_key = {} + # Read-only mapping returned by get_map() + self._map = _SelectorMapping(self) + + def _fileobj_lookup(self, fileobj): + """ Return a file descriptor from a file object. + This wraps _fileobj_to_fd() to do an exhaustive + search in case the object is invalid but we still + have it in our map. Used by unregister() so we can + unregister an object that was previously registered + even if it is closed. It is also used by _SelectorMapping + """ + try: + return _fileobj_to_fd(fileobj) + + except ValueError: + # Search through all our mapped keys. + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + return key.fd + + # Raise ValueError after all. + raise + + def register(self, fileobj, events, data=None): + """ Register a file object for a set of events to monitor. """ + if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)): + raise ValueError("Invalid events: {0!r}".format(events)) + + key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data) + if key.fd in self._fd_to_key: + raise KeyError( + "{0!r} (FD {1}) is already registered".format(fileobj, key.fd) + ) + + self._fd_to_key[key.fd] = key + return key + + def unregister(self, fileobj): + """ Unregister a file object from being monitored. """ + try: + key = self._fd_to_key.pop(self._fileobj_lookup(fileobj)) + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + # Getting the fileno of a closed socket on Windows errors with EBADF. + except socket.error as e: # Platform-specific: Windows. + if e.errno != errno.EBADF: + raise + + else: + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + self._fd_to_key.pop(key.fd) + break + + else: + raise KeyError("{0!r} is not registered".format(fileobj)) + + return key + + def modify(self, fileobj, events, data=None): + """ Change a registered file object monitored events and data. """ + # NOTE: Some subclasses optimize this operation even further. + try: + key = self._fd_to_key[self._fileobj_lookup(fileobj)] + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + if events != key.events: + self.unregister(fileobj) + key = self.register(fileobj, events, data) + elif data != key.data: + # Use a shortcut to update the data. + key = key._replace(data=data) + self._fd_to_key[key.fd] = key + return key + + def select(self, timeout=None): + """ Perform the actual selection until some monitored file objects + are ready or the timeout expires. """ + raise NotImplementedError() + + def close(self): + """ Close the selector. This must be called to ensure that all + underlying resources are freed. """ + self._fd_to_key.clear() + self._map = None + + def get_key(self, fileobj): + """ Return the key associated with a registered file object. """ + mapping = self.get_map() + if mapping is None: + raise RuntimeError("Selector is closed") + + try: + return mapping[fileobj] + + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + def get_map(self): + """ Return a mapping of file objects to selector keys """ + return self._map + + def _key_from_fd(self, fd): + """ Return the key associated to a given file descriptor + Return None if it is not found. """ + try: + return self._fd_to_key[fd] + + except KeyError: + return None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# Almost all platforms have select.select() +if hasattr(select, "select"): + + class SelectSelector(BaseSelector): + """ Select-based selector. """ + + def __init__(self): + super(SelectSelector, self).__init__() + self._readers = set() + self._writers = set() + + def register(self, fileobj, events, data=None): + key = super(SelectSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + self._readers.add(key.fd) + if events & EVENT_WRITE: + self._writers.add(key.fd) + return key + + def unregister(self, fileobj): + key = super(SelectSelector, self).unregister(fileobj) + self._readers.discard(key.fd) + self._writers.discard(key.fd) + return key + + def _select(self, r, w, timeout=None): + """ Wrapper for select.select because timeout is a positional arg """ + return select.select(r, w, [], timeout) + + def select(self, timeout=None): + # Selecting on empty lists on Windows errors out. + if not len(self._readers) and not len(self._writers): + return [] + + timeout = None if timeout is None else max(timeout, 0.0) + ready = [] + r, w, _ = _syscall_wrapper( + self._select, True, self._readers, self._writers, timeout + ) + r = set(r) + w = set(w) + for fd in r | w: + events = 0 + if fd in r: + events |= EVENT_READ + if fd in w: + events |= EVENT_WRITE + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + +if hasattr(select, "poll"): + + class PollSelector(BaseSelector): + """ Poll-based selector """ + + def __init__(self): + super(PollSelector, self).__init__() + self._poll = select.poll() + + def register(self, fileobj, events, data=None): + key = super(PollSelector, self).register(fileobj, events, data) + event_mask = 0 + if events & EVENT_READ: + event_mask |= select.POLLIN + if events & EVENT_WRITE: + event_mask |= select.POLLOUT + self._poll.register(key.fd, event_mask) + return key + + def unregister(self, fileobj): + key = super(PollSelector, self).unregister(fileobj) + self._poll.unregister(key.fd) + return key + + def _wrap_poll(self, timeout=None): + """ Wrapper function for select.poll.poll() so that + _syscall_wrapper can work with only seconds. """ + if timeout is not None: + if timeout <= 0: + timeout = 0 + else: + # select.poll.poll() has a resolution of 1 millisecond, + # round away from zero to wait *at least* timeout seconds. + timeout = math.ceil(timeout * 1e3) + result = self._poll.poll(timeout) + return result + + def select(self, timeout=None): + ready = [] + fd_events = _syscall_wrapper( + self._wrap_poll, True, timeout=timeout + ) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.POLLIN: + events |= EVENT_WRITE + if event_mask & ~select.POLLOUT: + events |= EVENT_READ + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + +if hasattr(select, "epoll"): + + class EpollSelector(BaseSelector): + """ Epoll-based selector """ + + def __init__(self): + super(EpollSelector, self).__init__() + self._epoll = select.epoll() + + def fileno(self): + return self._epoll.fileno() + + def register(self, fileobj, events, data=None): + key = super(EpollSelector, self).register(fileobj, events, data) + events_mask = 0 + if events & EVENT_READ: + events_mask |= select.EPOLLIN + if events & EVENT_WRITE: + events_mask |= select.EPOLLOUT + _syscall_wrapper(self._epoll.register, False, key.fd, events_mask) + return key + + def unregister(self, fileobj): + key = super(EpollSelector, self).unregister(fileobj) + try: + _syscall_wrapper(self._epoll.unregister, False, key.fd) + except SelectorError: + # This can occur when the fd was closed since registry. + pass + return key + + def select(self, timeout=None): + if timeout is not None: + if timeout <= 0: + timeout = 0.0 + else: + # select.epoll.poll() has a resolution of 1 millisecond + # but luckily takes seconds so we don't need a wrapper + # like PollSelector. Just for better rounding. + timeout = math.ceil(timeout * 1e3) * 1e-3 + timeout = float(timeout) + else: + timeout = -1.0 # epoll.poll() must have a float. + # We always want at least 1 to ensure that select can be called + # with no file descriptors registered. Otherwise will fail. + max_events = max(len(self._fd_to_key), 1) + ready = [] + fd_events = _syscall_wrapper( + self._epoll.poll, True, timeout=timeout, maxevents=max_events + ) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.EPOLLIN: + events |= EVENT_WRITE + if event_mask & ~select.EPOLLOUT: + events |= EVENT_READ + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + def close(self): + self._epoll.close() + super(EpollSelector, self).close() + + +if hasattr(select, "kqueue"): + + class KqueueSelector(BaseSelector): + """ Kqueue / Kevent-based selector """ + + def __init__(self): + super(KqueueSelector, self).__init__() + self._kqueue = select.kqueue() + + def fileno(self): + return self._kqueue.fileno() + + def register(self, fileobj, events, data=None): + key = super(KqueueSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + kevent = select.kevent( + key.fd, select.KQ_FILTER_READ, select.KQ_EV_ADD + ) + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + if events & EVENT_WRITE: + kevent = select.kevent( + key.fd, select.KQ_FILTER_WRITE, select.KQ_EV_ADD + ) + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + return key + + def unregister(self, fileobj): + key = super(KqueueSelector, self).unregister(fileobj) + if key.events & EVENT_READ: + kevent = select.kevent( + key.fd, select.KQ_FILTER_READ, select.KQ_EV_DELETE + ) + try: + _syscall_wrapper( + self._kqueue.control, False, [kevent], 0, 0 + ) + except SelectorError: + pass + if key.events & EVENT_WRITE: + kevent = select.kevent( + key.fd, select.KQ_FILTER_WRITE, select.KQ_EV_DELETE + ) + try: + _syscall_wrapper( + self._kqueue.control, False, [kevent], 0, 0 + ) + except SelectorError: + pass + return key + + def select(self, timeout=None): + if timeout is not None: + timeout = max(timeout, 0) + max_events = len(self._fd_to_key) * 2 + ready_fds = {} + kevent_list = _syscall_wrapper( + self._kqueue.control, True, None, max_events, timeout + ) + for kevent in kevent_list: + fd = kevent.ident + event_mask = kevent.filter + events = 0 + if event_mask == select.KQ_FILTER_READ: + events |= EVENT_READ + if event_mask == select.KQ_FILTER_WRITE: + events |= EVENT_WRITE + key = self._key_from_fd(fd) + if key: + if key.fd not in ready_fds: + ready_fds[key.fd] = (key, events & key.events) + else: + old_events = ready_fds[key.fd][1] + ready_fds[key.fd] = ( + key, (events | old_events) & key.events + ) + return list(ready_fds.values()) + + def close(self): + self._kqueue.close() + super(KqueueSelector, self).close() + + +if not hasattr(select, 'select'): # Platform-specific: AppEngine + HAS_SELECT = False + + +def _can_allocate(struct): + """ Checks that select structs can be allocated by the underlying + operating system, not just advertised by the select module. We don't + check select() because we'll be hopeful that most platforms that + don't have it available will not advertise it. (ie: GAE) """ + try: + # select.poll() objects won't fail until used. + if struct == 'poll': + p = select.poll() + p.poll(0) + # All others will fail on allocation. + else: + getattr(select, struct)().close() + return True + + except (OSError, AttributeError) as e: + return False + + + + +# Choose the best implementation, roughly: +# kqueue == epoll > poll > select. Devpoll not supported. (See above) +# select() also can't accept a FD > FD_SETSIZE (usually around 1024) +def DefaultSelector(): + """ This function serves as a first call for DefaultSelector to + detect if the select module is being monkey-patched incorrectly + by eventlet, greenlet, and preserve proper behavior. """ + global _DEFAULT_SELECTOR + if _DEFAULT_SELECTOR is None: + if _can_allocate('kqueue'): + _DEFAULT_SELECTOR = KqueueSelector + elif _can_allocate('epoll'): + _DEFAULT_SELECTOR = EpollSelector + elif _can_allocate('poll'): + _DEFAULT_SELECTOR = PollSelector + elif hasattr(select, 'select'): + _DEFAULT_SELECTOR = SelectSelector + else: # Platform-specific: AppEngine + raise ValueError('Platform does not have a selector') + + return _DEFAULT_SELECTOR() diff --git a/requests3/core/http_manager/util/ssl_.py b/requests3/core/http_manager/util/ssl_.py new file mode 100644 index 00000000..73369f80 --- /dev/null +++ b/requests3/core/http_manager/util/ssl_.py @@ -0,0 +1,389 @@ +from __future__ import absolute_import +import errno +import logging +import warnings +import hmac + +from binascii import hexlify, unhexlify +from hashlib import md5, sha1, sha256 + +from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning +from ..packages.ssl_match_hostname import ( + match_hostname as _match_hostname, CertificateError +) + +SSLContext = None +HAS_SNI = False +IS_PYOPENSSL = False +IS_SECURETRANSPORT = False +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = {32: md5, 40: sha1, 64: sha256} +log = logging.getLogger(__name__) + + +def _const_compare_digest_backport(a, b): + """ + Compare two digests of equal length in constant time. + + The digests must be of type str/bytes. + Returns True if the digests match, and False otherwise. + """ + result = abs(len(a) - len(b)) + for l, r in zip(bytearray(a), bytearray(b)): + result |= l ^ r + return result == 0 + + +_const_compare_digest = getattr( + hmac, 'compare_digest', _const_compare_digest_backport +) +try: # Test for SSL features + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import HAS_SNI # Has SNI? + from ssl import SSLError as BaseSSLError +except ImportError: + + class BaseSSLError(Exception): + pass + + +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer TLS 1.3 cipher suites +# - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and +# security, +# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ':'.join( + [ + 'TLS13-AES-256-GCM-SHA384', + 'TLS13-CHACHA20-POLY1305-SHA256', + 'TLS13-AES-128-GCM-SHA256', + 'ECDH+AESGCM', + 'ECDH+CHACHA20', + 'DH+AESGCM', + 'DH+CHACHA20', + 'ECDH+AES256', + 'DH+AES256', + 'ECDH+AES128', + 'DH+AES', + 'RSA+AESGCM', + 'RSA+AES', + '!aNULL', + '!eNULL', + '!MD5', + ] +) +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + + # TODO: Can we remove this by choosing to support only platforms with + # actual SSLContext objects? + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, cafile=None, capath=None): + self.ca_certs = cafile + if capath is not None: + raise SSLError("CA directories not supported in older Pythons") + + def set_ciphers(self, cipher_suite): + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None, server_side=False): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. You can upgrade to a newer ' + 'version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + InsecurePlatformWarning, + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + 'server_side': server_side, + } + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + fingerprint = fingerprint.replace(':', '').lower() + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint) + ) + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + cert_digest = hashfunc(cert).digest() + if not _const_compare_digest(cert_digest, fingerprint_bytes): + raise SSLError( + 'Fingerprints did not match. Expected "{0}", got "{1}".'.format( + fingerprint, hexlify(cert_digest) + ) + ) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def create_urllib3_context( + ssl_version=None, cert_reqs=None, options=None, ciphers=None +): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + context.options |= options + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + context.verify_mode = cert_reqs + if getattr( + context, 'check_hostname', None + ) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def merge_context_settings( + context, + keyfile=None, + certfile=None, + cert_reqs=None, + ca_certs=None, + ca_cert_dir=None, +): + """ + Merges provided settings into an SSL Context. + """ + if cert_reqs is not None: + context.verify_mode = resolve_cert_reqs(cert_reqs) + if ca_certs or ca_cert_dir: + try: + context.load_verify_locations(ca_certs, ca_cert_dir) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + + raise + + elif getattr(context, 'load_default_certs', None) is not None: + # try to load OS default certs; works well on Windows (require Python3.4+) + context.load_default_certs() + if certfile: + context.load_cert_chain(certfile, keyfile) + return context + + +def ssl_wrap_socket( + sock, + keyfile=None, + certfile=None, + cert_reqs=None, + ca_certs=None, + server_hostname=None, + ssl_version=None, + ciphers=None, + ssl_context=None, + ca_cert_dir=None, +): + """ + All arguments except for server_hostname, ssl_context, and ca_cert_dir have + the same meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. + :param ca_cert_dir: + A directory containing CA certificates in multiple separate files, as + supported by OpenSSL's -CApath flag or the capath argument to + SSLContext.load_verify_locations(). + """ + context = ssl_context + if context is None: + # Note: This branch of code and all the variables in it are no longer + # used by urllib3 itself. We should consider deprecating and removing + # this code. + context = create_urllib3_context( + ssl_version, cert_reqs, ciphers=ciphers + ) + if ca_certs or ca_cert_dir: + try: + context.load_verify_locations(ca_certs, ca_cert_dir) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + + raise + + elif getattr(context, 'load_default_certs', None) is not None: + # try to load OS default certs; works well on Windows (require Python3.4+) + context.load_default_certs() + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + + warnings.warn( + 'An HTTPS request has been made, but the SNI (Server Name ' + 'Indication) extension to TLS is not available on this platform. ' + 'This may cause the server to present an incorrect TLS ' + 'certificate, which can cause validation failures. You can upgrade to ' + 'a newer version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + SNIMissingWarning, + ) + return context.wrap_socket(sock) + + +def match_hostname(cert, asserted_hostname): + try: + _match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.error( + 'Certificate did not match expected hostname: %s. ' + 'Certificate: %s', + asserted_hostname, + cert, + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise diff --git a/requests3/core/http_manager/util/timeout.py b/requests3/core/http_manager/util/timeout.py new file mode 100644 index 00000000..35d49520 --- /dev/null +++ b/requests3/core/http_manager/util/timeout.py @@ -0,0 +1,261 @@ +from __future__ import absolute_import + +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() +# Use time.monotonic if available. +current_time = getattr(time, "monotonic", time.time) + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + # : A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total + ) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If it is a numeric value less than or equal to + zero, or the type is not an integer, float, or None. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + if isinstance(value, bool): + raise ValueError( + "Timeout cannot be a boolean value. It must " + "be an int, float or None." + ) + + try: + float(value) + except (TypeError, ValueError): + raise ValueError( + "Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value) + ) + + try: + if value <= 0: + raise ValueError( + "Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than or equal to 0." % (name, value) + ) + + except TypeError: # Python 3 + raise ValueError( + "Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value) + ) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout( + connect=self._connect, read=self._read, total=self.total + ) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError( + "Can't get connect duration for timer " "that has not started." + ) + + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if ( + self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT + ): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + + return max( + 0, min(self.total - self.get_connect_duration(), self._read) + ) + + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + + else: + return self._read diff --git a/requests3/core/http_manager/util/url.py b/requests3/core/http_manager/util/url.py new file mode 100644 index 00000000..f4c6a745 --- /dev/null +++ b/requests3/core/http_manager/util/url.py @@ -0,0 +1,221 @@ +from __future__ import absolute_import +from collections import namedtuple + +from ..exceptions import LocationParseError + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] +# We only want to normalize urls with an HTTP(S) scheme. +# urllib3 infers URLs without a scheme (None) to be http. +NORMALIZABLE_SCHEMES = ('http', 'https', None) + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. Both the scheme and host are normalized as they are + both case-insensitive according to RFC 3986. + """ + __slots__ = () + + def __new__( + cls, + scheme=None, + auth=None, + host=None, + port=None, + path=None, + query=None, + fragment=None, + ): + if path and not path.startswith('/'): + path = '/' + path + if scheme: + scheme = scheme.lower() + if host and scheme in NORMALIZABLE_SCHEMES: + host = host.lower() + return super(Url, cls).__new__( + cls, scheme, auth, host, port, path, query, fragment + ) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + if self.query is not None: + uri += '?' + self.query + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + + return self.host + + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + return url + + def __str__(self): + return self.url + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx + 1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + if delim: + # Reassemble the path + path = delim + path_ + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + # Port + if ':' in url: + _host, port = url.split(':', 1) + if not host: + host = _host + if port: + # If given, ports must be integers. No whitespace, no plus or + # minus prefixes, no non-integer digits such as ^2 (superscript). + if not port.isdigit(): + raise LocationParseError(url) + + try: + port = int(port) + except ValueError: + raise LocationParseError(url) + + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + elif not host and url: + host = url + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + # Query + if '?' in path: + path, query = path.split('?', 1) + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/requests3/core/http_manager/util/wait.py b/requests3/core/http_manager/util/wait.py new file mode 100644 index 00000000..155bba0e --- /dev/null +++ b/requests3/core/http_manager/util/wait.py @@ -0,0 +1,39 @@ +from .selectors import (HAS_SELECT, DefaultSelector, EVENT_READ, EVENT_WRITE) + + +def _wait_for_io_events(socks, events, timeout=None): + """ Waits for IO events to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be interacted with immediately. """ + if not HAS_SELECT: + raise ValueError('Platform does not have a selector') + + if not isinstance(socks, list): + # Probably just a single socket. + if hasattr(socks, "fileno"): + socks = [socks] + # Otherwise it might be a non-list iterable. + else: + socks = list(socks) + with DefaultSelector() as selector: + for sock in socks: + selector.register(sock, events) + return [ + key[0].fileobj + for key in selector.select(timeout) + if key[1] & events + ] + + +def wait_for_read(socks, timeout=None): + """ Waits for reading to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be read from immediately. """ + return _wait_for_io_events(socks, EVENT_READ, timeout) + + +def wait_for_write(socks, timeout=None): + """ Waits for writing to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be written to immediately. """ + return _wait_for_io_events(socks, EVENT_WRITE, timeout) |
