diff options
author | Jannis Leidel <jannis@leidel.info> | 2012-03-13 23:42:03 -0700 |
---|---|---|
committer | Jannis Leidel <jannis@leidel.info> | 2012-03-13 23:42:03 -0700 |
commit | 6d9725a0bfd4070f62c3fd148e604018ef229d5e (patch) | |
tree | 75a98d04987c9f82c6100bfdead09ca4ad541bac | |
parent | d8cfa877c5ddc2ff1adce76dd2fd0778943edba6 (diff) | |
download | pip-feature/ssl-mirror-requests.tar.gz |
Adopted requests and added pep381 verification.feature/ssl-mirror-requests
-rw-r--r-- | pip/backwardcompat.py | 146 | ||||
-rw-r--r-- | pip/basecommand.py | 30 | ||||
-rw-r--r-- | pip/baseparser.py | 6 | ||||
-rw-r--r-- | pip/commands/install.py | 15 | ||||
-rw-r--r-- | pip/commands/search.py | 2 | ||||
-rw-r--r-- | pip/download.py | 223 | ||||
-rw-r--r-- | pip/index.py | 421 | ||||
-rw-r--r-- | pip/locations.py | 3 | ||||
-rw-r--r-- | pip/mirrors.py | 400 | ||||
-rw-r--r-- | pip/req.py | 132 | ||||
-rw-r--r-- | tests/test_pip.py | 2 |
11 files changed, 1101 insertions, 279 deletions
diff --git a/pip/backwardcompat.py b/pip/backwardcompat.py index e33da9896..68400a4ae 100644 --- a/pip/backwardcompat.py +++ b/pip/backwardcompat.py @@ -3,6 +3,7 @@ import sys import os import shutil +import base64 __all__ = ['any', 'WindowsError', 'md5', 'copytree'] @@ -60,6 +61,9 @@ if sys.version_info >= (3,): def u(s): return s.decode('utf-8') + def _ord(x): + return x + def console_to_str(s): try: return s.decode(console_encoding) @@ -72,6 +76,13 @@ if sys.version_info >= (3,): bytes = bytes string_types = (str,) raw_input = input + + def decode_base64(source): + source = source.encode("ascii") # ensure bytes + return base64.decodebytes(source) + + _long = lambda x: x + else: from cStringIO import StringIO from urllib2 import URLError, HTTPError @@ -91,6 +102,9 @@ else: def u(s): return s + def _ord(x): + return ord(x) + def console_to_str(s): return s @@ -103,6 +117,8 @@ else: cmp = cmp raw_input = raw_input BytesIO = StringIO + decode_base64 = base64.decodestring + _long = lambda x: long(x) try: from email.parser import FeedParser @@ -133,3 +149,133 @@ def product(*args, **kwds): result = [x+[y] for x in result for y in pool] for prod in result: yield tuple(prod) + +try: + from collections import OrderedDict +except ImportError: + # Copyright (c) 2009 Raymond Hettinger + # + # Permission is hereby granted, free of charge, to any person + # obtaining a copy of this software and associated documentation files + # (the "Software"), to deal in the Software without restriction, + # including without limitation the rights to use, copy, modify, merge, + # publish, distribute, sublicense, and/or sell copies of the Software, + # and to permit persons to whom the Software is furnished to do so, + # subject to the following conditions: + # + # The above copyright notice and this permission notice shall be + # included in all copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + # OTHER DEALINGS IN THE SOFTWARE. + from UserDict import DictMixin + + class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(self.items(), other.items()): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/pip/basecommand.py b/pip/basecommand.py index 12bcd6211..0d0745e96 100644 --- a/pip/basecommand.py +++ b/pip/basecommand.py @@ -12,7 +12,8 @@ from pip.baseparser import parser, ConfigOptionParser, UpdatingDefaultsHelpForma from pip.download import urlopen from pip.exceptions import (BadCommand, InstallationError, UninstallationError, CommandError) -from pip.backwardcompat import StringIO, walk_packages +from pip.locations import serverkey_file +from pip.backwardcompat import StringIO, walk_packages, u from pip.status_codes import SUCCESS, ERROR, UNKNOWN_ERROR, VIRTUALENV_NOT_FOUND @@ -56,6 +57,24 @@ class Command(object): options.quiet += initial_options.quiet options.verbose += initial_options.verbose + def refresh_serverkey(self, url='https://pypi.python.org/serverkey'): + serverkey_cache = open(serverkey_file, 'wb') + try: + try: + content = urlopen(url).content + serverkey_cache.write(content) + except Exception: + e = sys.exc_info()[1] + raise InstallationError('Could not refresh local cache (%s) ' + 'of PyPI server key (%s): %s' % + (serverkey_file, url, e)) + else: + logger.notify('Refreshed local cache (%s) of ' + 'PyPI server key (%s):\n\n%s' % + (serverkey_file, url, u(content))) + finally: + serverkey_cache.close() + def setup_logging(self): pass @@ -88,15 +107,18 @@ class Command(object): logger.fatal('Could not find an activated virtualenv (required).') sys.exit(VIRTUALENV_NOT_FOUND) + if not os.path.exists(serverkey_file) or options.refresh_serverkey: + self.refresh_serverkey() + if options.log: log_fp = open_logfile(options.log, 'a') logger.consumers.append((logger.DEBUG, log_fp)) else: log_fp = None - socket.setdefaulttimeout(options.timeout or None) - - urlopen.setup(proxystr=options.proxy, prompting=not options.no_input) + urlopen.setup(proxystr=options.proxy, + timeout=options.timeout or None, + prompting=not options.no_input) exit = SUCCESS store_log = False diff --git a/pip/baseparser.py b/pip/baseparser.py index b3864f3da..dd2adfeaa 100644 --- a/pip/baseparser.py +++ b/pip/baseparser.py @@ -208,6 +208,12 @@ parser.add_option( type='str', default='', help=optparse.SUPPRESS_HELP) +parser.add_option( + '--refresh-serverkey', + dest='refresh_serverkey', + action='store_true', + default=False, + help="Refresh the cached version of PyPI's server key") parser.add_option( # Option when path already exist diff --git a/pip/commands/install.py b/pip/commands/install.py index 925d57feb..011b74376 100644 --- a/pip/commands/install.py +++ b/pip/commands/install.py @@ -1,3 +1,4 @@ +import optparse import os import sys import tempfile @@ -49,7 +50,7 @@ class InstallCommand(Command): '-i', '--index-url', '--pypi-url', dest='index_url', metavar='URL', - default='http://pypi.python.org/simple/', + default='https://pypi.python.org/simple/', help='Base URL of Python Package Index (default %default)') self.parser.add_option( '--extra-index-url', @@ -65,18 +66,24 @@ class InstallCommand(Command): default=False, help='Ignore package index (only looking at --find-links URLs instead)') self.parser.add_option( + '--no-mirrors', + dest='use_mirrors', + action='store_false', + default=False, + help='Ignore the PyPI mirrors') + self.parser.add_option( '-M', '--use-mirrors', dest='use_mirrors', action='store_true', - default=False, - help='Use the PyPI mirrors as a fallback in case the main index is down.') + default=True, + help=optparse.SUPPRESS_HELP) self.parser.add_option( '--mirrors', dest='mirrors', metavar='URL', action='append', default=[], - help='Specific mirror URLs to query when --use-mirrors is used') + help='Specific mirror URLs to use instead of querying the DNS for list of mirrors') self.parser.add_option( '-b', '--build', '--build-dir', '--build-directory', diff --git a/pip/commands/search.py b/pip/commands/search.py index 9f287e594..0e72ab364 100644 --- a/pip/commands/search.py +++ b/pip/commands/search.py @@ -22,7 +22,7 @@ class SearchCommand(Command): '--index', dest='index', metavar='URL', - default='http://pypi.python.org/pypi', + default='https://pypi.python.org/pypi', help='Base URL of Python Package Index (default %default)') def run(self, options, args): diff --git a/pip/download.py b/pip/download.py index a31e5d670..abd0e87a1 100644 --- a/pip/download.py +++ b/pip/download.py @@ -3,12 +3,18 @@ import getpass import mimetypes import os import re +import pkg_resources import shutil +import socket import sys import tempfile + +import requests + from pip.backwardcompat import (md5, copytree, xmlrpclib, urllib, urllib2, - urlparse, string_types, HTTPError) + urlparse) from pip.exceptions import InstallationError +from pip.locations import cacert_crt_file from pip.util import (splitext, rmtree, format_size, display_path, backup_dir, ask, ask_path_exists, unpack_file, create_download_cache_folder, cache_download) @@ -21,10 +27,69 @@ __all__ = ['xmlrpclib_transport', 'get_file_content', 'urlopen', 'geturl', 'is_archive_file', 'unpack_vcs_link', 'unpack_file_url', 'is_vcs_url', 'is_file_url', 'unpack_http_url'] +try: + import ssl +except ImportError: + logger.fatal('WARNING! Could not import the ssl module needed to ' + 'verify the SSL certificate of PyPI. Try installing ' + 'it by running (requires compiler): pip install ssl') + + +try: + pip_version = pkg_resources.get_distribution('pip').version +except: + pip_version = 'unknown' + xmlrpclib_transport = xmlrpclib.Transport() +# CAcert Class 1 Root Certificate from +# https://www.cacert.org/certs/root.crt +CACERT_ROOT_CRT = """\ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE-----""" + + def get_file_content(url, comes_from=None): """Gets the content of a file; it may be a filename, file: URL, or http: URL. Returns (location, content)""" @@ -48,8 +113,8 @@ def get_file_content(url, comes_from=None): url = path else: ## FIXME: catch some errors - resp = urlopen(url) - return geturl(resp), resp.read() + response = urlopen(url) + return response.url, response.content try: f = open(url) content = f.read() @@ -70,71 +135,95 @@ class URLOpener(object): pip's own URL helper that adds HTTP auth and proxy support """ def __init__(self): + self.proxies = {} + self.timeout = None self.passman = urllib2.HTTPPasswordMgrWithDefaultRealm() - def __call__(self, url): + def __call__(self, url, method='get', redirect=True): """ If the given url contains auth info or if a normal request gets a 401 response, an attempt is made to fetch the resource using basic HTTP auth. - """ url, username, password = self.extract_credentials(url) - if username is None: - try: - response = urllib2.urlopen(self.get_request(url)) - except urllib2.HTTPError: - e = sys.exc_info()[1] - if e.code != 401: - raise - response = self.get_response(url) - else: - response = self.get_response(url, username, password) - return response - - def get_request(self, url): - """ - Wraps the URL to retrieve to protects against "creative" - interpretation of the RFC: http://bugs.python.org/issue8732 - """ - if isinstance(url, string_types): - url = urllib2.Request(url, headers={'Accept-encoding': 'identity'}) - return url + return self.get_response(url, username, password, method=method, redirect=redirect) - def get_response(self, url, username=None, password=None): - """ - does the dirty work of actually getting the rsponse object using urllib2 - and its HTTP auth builtins. - """ + def handle_401(self, url, username, password, method): scheme, netloc, path, query, frag = urlparse.urlsplit(url) - req = self.get_request(url) - - stored_username, stored_password = self.passman.find_user_password(None, netloc) # see if we have a password stored + stored_username, stored_password = self.passman.find_user_password(None, netloc) if stored_username is None: if username is None and self.prompting: username = urllib.quote(raw_input('User for %s: ' % netloc)) password = urllib.quote(getpass.getpass('Password: ')) + else: + username, password = stored_username, stored_password + response = self.get_response(url, username, password, retry=True, method=method) + if response.status_code != 401: if username and password: self.passman.add_password(None, netloc, username, password) - stored_username, stored_password = self.passman.find_user_password(None, netloc) - authhandler = urllib2.HTTPBasicAuthHandler(self.passman) - opener = urllib2.build_opener(authhandler) - # FIXME: should catch a 401 and offer to let the user reenter credentials - return opener.open(req) + return response - def setup(self, proxystr='', prompting=True): + def get_response(self, url, username=None, password=None, + retry=False, method='get', redirect=True): + """ + does the dirty work of actually getting the rsponse object using urllib2 + and its HTTP auth builtins. + """ + + if username and password: + auth = (username, password) + else: + auth = None + + if url.startswith('https://pypi.python.org'): + verify = cacert_crt_file + else: + verify = True + + response = requests.request(method, url, + proxies=self.proxies, + timeout=self.timeout, + auth=auth, + verify=verify, + allow_redirects=redirect, + headers={ + 'User-Agent': 'pip/%s' % pip_version, + }) + + if response.status_code == 401: + if retry: + # catch a 401 and offer to let the user reenter credentials + entered = ask('Credentials were incorrect. Re-enter? (y/n) ', + ('y', 'n')) + if entered != 'y': + raise InstallationError( + "The entered credentials for %s were wrong." % url) + username = password = None + return self.handle_401(url, username, password, method) + + return response + + def setup(self, proxystr='', timeout=None, prompting=True): """ Sets the proxy handler given the option passed on the command line. If an empty string is passed it looks at the HTTP_PROXY environment variable. """ self.prompting = prompting + self.timeout = timeout proxy = self.get_proxy(proxystr) - if proxy: - proxy_support = urllib2.ProxyHandler({"http": proxy, "ftp": proxy, "https": proxy}) - opener = urllib2.build_opener(proxy_support, urllib2.CacheFTPHandler) - urllib2.install_opener(opener) + self.proxies = { + 'http': proxy, + 'https': proxy, + } + if not os.path.exists(cacert_crt_file): + # write cacert root cert to temporary file + cacert_file = open(cacert_crt_file, 'w') + try: + cacert_file.write(CACERT_ROOT_CRT) + finally: + cacert_file.close() def parse_credentials(self, netloc): if "@" in netloc: @@ -204,7 +293,7 @@ def is_url(name): if ':' not in name: return False scheme = name.split(':', 1)[0].lower() - return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes + return scheme in ['http', 'https', 'file'] + vcs.all_schemes def url_to_path(url): @@ -322,10 +411,10 @@ def is_file_url(link): def _check_md5(download_hash, link): - download_hash = download_hash.hexdigest() - if download_hash != link.md5_hash: - logger.fatal("MD5 hash of the package %s (%s) doesn't match the expected hash %s!" - % (link, download_hash, link.md5_hash)) + digest = download_hash.hexdigest() + if digest != link.md5_hash: + logger.fatal("MD5 hash of the package %s (%s) doesn't match the " + "expected hash %s!" % (link, digest, link.md5_hash)) raise InstallationError('Bad MD5 hash for package %s' % link) @@ -347,12 +436,12 @@ def _download_url(resp, link, temp_location): if link.md5_hash: download_hash = md5() try: - total_length = int(resp.info()['content-length']) + total_length = int(resp.headers['content-length']) except (ValueError, KeyError, TypeError): total_length = 0 downloaded = 0 show_progress = total_length > 40*1000 or not total_length - show_url = link.show_url + show_url = link.url try: if show_progress: ## FIXME: the URL can get really long in this message: @@ -365,7 +454,7 @@ def _download_url(resp, link, temp_location): logger.debug('Downloading from URL %s' % link) while True: - chunk = resp.read(4096) + chunk = resp.raw.read(4096) if not chunk: break downloaded += len(chunk) @@ -413,14 +502,13 @@ def unpack_http_url(link, location, download_cache, download_dir=None): target_file = None download_hash = None if download_cache: - target_file = os.path.join(download_cache, - urllib.quote(target_url, '')) + cache_filename = list(filter(None, target_url.split('/')))[-1] + target_file = os.path.join(download_cache, cache_filename) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) - if (target_file - and os.path.exists(target_file) - and os.path.exists(target_file + '.content-type')): - fp = open(target_file+'.content-type') + if (target_file and os.path.exists(target_file) + and os.path.exists(target_file + '.content-type')): + fp = open(target_file + '.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: @@ -428,11 +516,11 @@ def unpack_http_url(link, location, download_cache, download_dir=None): temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: - resp = _get_response_from_url(target_url, link) - content_type = resp.info()['content-type'] + response = _get_response_from_url(target_url, link) + content_type = response.headers['content-type'] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess - content_disposition = resp.info().get('content-disposition') + content_disposition = response.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value @@ -443,12 +531,12 @@ def unpack_http_url(link, location, download_cache, download_dir=None): ext = mimetypes.guess_extension(content_type) if ext: filename += ext - if not ext and link.url != geturl(resp): - ext = os.path.splitext(geturl(resp))[1] + if not ext and link.url != geturl(response): + ext = os.path.splitext(geturl(response))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) - download_hash = _download_url(resp, link, temp_location) + download_hash = _download_url(response, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if download_dir: @@ -476,6 +564,9 @@ def _get_response_from_url(target_url, link): return resp -class Urllib2HeadRequest(urllib2.Request): - def get_method(self): - return "HEAD" +def valid_ipv6_addr(addr): + try: + addr = socket.inet_pton(socket.AF_INET6, addr) + except socket.error: # not a valid address + return False + return True diff --git a/pip/index.py b/pip/index.py index 8e53e44b7..e9ce48630 100644 --- a/pip/index.py +++ b/pip/index.py @@ -1,8 +1,10 @@ -"""Routines related to PyPI, indexes""" - +""" +Routines related to PyPI, indexes, PEP381 mirrors +""" import sys import os import re +import hashlib import gzip import mimetypes try: @@ -15,21 +17,31 @@ import random import socket import string import zlib + +import requests + +from pip.locations import serverkey_file from pip.log import logger -from pip.util import Inf -from pip.util import normalize_name, splitext +from pip.util import Inf, normalize_name, splitext from pip.exceptions import DistributionNotFound, BestVersionAlreadyInstalled from pip.backwardcompat import (WindowsError, BytesIO, - Queue, httplib, urlparse, - URLError, HTTPError, u, - product, url2pathname) -from pip.backwardcompat import Empty as QueueEmpty -from pip.download import urlopen, path_to_url2, url_to_path, geturl, Urllib2HeadRequest + Queue, urlparse, + URLError, HTTPError, b, u, + product, url2pathname, + OrderedDict, _ord as ord, + decode_base64, _long, + Empty as QueueEmpty) +from pip.download import urlopen, path_to_url2, url_to_path, geturl +from pip.mirrors import verify, load_key, find_mirrors __all__ = ['PackageFinder'] -DEFAULT_MIRROR_URL = "last.pypi.python.org" +_egg_fragment_re = re.compile(r'#egg=([^&]*)') +_egg_info_re = re.compile(r'([a-z0-9_.]+)-([a-z0-9_.-]+)', re.I) +_py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') +_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%~_\\|-]', re.I) +_md5_re = re.compile(r'md5=([a-f0-9]+)') class PackageFinder(object): @@ -38,9 +50,8 @@ class PackageFinder(object): This is meant to match easy_install's technique for looking for packages, by reading pages and looking for appropriate links """ - def __init__(self, find_links, index_urls, - use_mirrors=False, mirrors=None, main_mirror_url=None): + use_mirrors=False, mirrors=None): self.find_links = find_links self.index_urls = index_urls self.dependency_links = [] @@ -48,10 +59,16 @@ class PackageFinder(object): # These are boring links that have already been logged somehow: self.logged_links = set() if use_mirrors: - self.mirror_urls = self._get_mirror_urls(mirrors, main_mirror_url) - logger.info('Using PyPI mirrors: %s' % ', '.join(self.mirror_urls)) + self.mirror_urls = self._get_mirror_urls(mirrors) + logger.info('Using PyPI mirrors:\n* %s' % + '\n* '.join([url.url for url in self.mirror_urls])) else: - self.mirror_urls = [] + self.mirror_urls = () + serverkey_cache = open(serverkey_file, 'rb') + try: + self.serverkey = load_key(serverkey_cache.read()) + finally: + serverkey_cache.close() def add_dependency_links(self, links): ## FIXME: this shouldn't be global list this, it should only @@ -64,76 +81,119 @@ class PackageFinder(object): def _sort_locations(locations): """ Sort locations into "files" (archives) and "urls", and return - a pair of lists (files,urls) + a pair of lists (files, urls) """ files = [] urls = [] # puts the url for the given file path into the appropriate # list - def sort_path(path): - url = path_to_url2(path) - if mimetypes.guess_type(url, strict=False)[0] == 'text/html': + def sort_path(url, path): + new_url = path_to_url2(path) + mimetype = mimetypes.guess_type(new_url, strict=False)[0] + url.url = new_url + if mimetype == 'text/html': urls.append(url) else: files.append(url) for url in locations: - if url.startswith('file:'): - path = url_to_path(url) + if isinstance(url, Link): + url = url.copy() + else: + url = Link(url) + if url.url.startswith('file:'): + path = url_to_path(url.url) if os.path.isdir(path): path = os.path.realpath(path) for item in os.listdir(path): - sort_path(os.path.join(path, item)) + sort_path(url, os.path.join(path, item)) elif os.path.isfile(path): - sort_path(path) + sort_path(url, path) else: urls.append(url) return files, urls + def make_package_url(self, url, name): + """ + For maximum compatibility with easy_install, ensure the path + ends in a trailing slash. Although this isn't in the spec + (and PyPI can handle it without the slash) some other index + implementations might break if they relied on easy_install's + behavior. + """ + if isinstance(url, Link): + package_url = url.copy() + else: + package_url = Link(url) + new_url = posixpath.join(package_url.url, name) + if not new_url.endswith('/'): + new_url = new_url + '/' + package_url.url = new_url + return package_url + + def verify(self, requirement, url): + """ + Verifies the URL for the given requirement using the PEP381 + verification code. + """ + if url.comes_from and url.base_url: + try: + data = b(url.comes_from.content) + serversig = requirement.serversig(url.base_url) + if data and serversig: + return verify(self.serverkey, data, serversig) + except ValueError: + return False + return False + def find_requirement(self, req, upgrade): url_name = req.url_name # Only check main index if index URL is given: main_index_url = None if self.index_urls: # Check that we have the url_name correctly spelled: - main_index_url = Link(posixpath.join(self.index_urls[0], url_name)) - # This will also cache the page, so it's okay that we get it again later: + main_index_url = self.make_package_url(self.index_urls[0], + url_name) + # This will also cache the page, + # so it's okay that we get it again later: page = self._get_page(main_index_url, req) if page is None: url_name = self._find_url_name(Link(self.index_urls[0]), url_name, req) or req.url_name # Combine index URLs with mirror URLs here to allow # adding more index URLs from requirements files - all_index_urls = self.index_urls + self.mirror_urls - - def mkurl_pypi_url(url): - loc = posixpath.join(url, url_name) - # For maximum compatibility with easy_install, ensure the path - # ends in a trailing slash. Although this isn't in the spec - # (and PyPI can handle it without the slash) some other index - # implementations might break if they relied on easy_install's behavior. - if not loc.endswith('/'): - loc = loc + '/' - return loc + + locations = [] + indexes_package_urls = [] + mirrors_package_urls = [] if url_name is not None: - locations = [ - mkurl_pypi_url(url) - for url in all_index_urls] + self.find_links - else: - locations = list(self.find_links) - locations.extend(self.dependency_links) + indexes_package_urls = [self.make_package_url(url, url_name) + for url in self.index_urls] + locations.extend(indexes_package_urls) + mirrors_package_urls = [self.make_package_url(url, url_name) + for url in self.mirror_urls] + locations.extend(mirrors_package_urls) + + locations.extend(self.find_links + self.dependency_links) + for version in req.absolute_versions: if url_name is not None and main_index_url is not None: - locations = [ - posixpath.join(main_index_url.url, version)] + locations + version_url = posixpath.join(main_index_url.url, version) + locations = [version_url] + locations file_locations, url_locations = self._sort_locations(locations) - locations = [Link(url) for url in url_locations] + locations = [] + for url in url_locations: + if isinstance(url, Link): + locations.append(url) + else: + locations.append(Link(url)) logger.debug('URLs to search for versions for %s:' % req) for location in locations: logger.debug('* %s' % location) + found_versions = [] found_versions.extend( self._package_versions( @@ -143,60 +203,109 @@ class PackageFinder(object): logger.debug('Analyzing links from page %s' % page.url) logger.indent += 2 try: - page_versions.extend(self._package_versions(page.links, req.name.lower())) + page_versions.extend(self._package_versions( + page.links, req.name.lower())) finally: logger.indent -= 2 + dependency_versions = list(self._package_versions( [Link(url) for url in self.dependency_links], req.name.lower())) if dependency_versions: - logger.info('dependency_links found: %s' % ', '.join([link.url for parsed, link, version in dependency_versions])) + dependency_urls = [link.url for _, link, _ in dependency_versions] + logger.info('dependency_links found: %s' % + ', '.join(dependency_urls)) + file_versions = list(self._package_versions( [Link(url) for url in file_locations], req.name.lower())) - if not found_versions and not page_versions and not dependency_versions and not file_versions: - logger.fatal('Could not find any downloads that satisfy the requirement %s' % req) - raise DistributionNotFound('No distributions at all found for %s' % req) + if (not found_versions and not page_versions and + not dependency_versions and not file_versions): + logger.fatal('Could not find any downloads that satisfy ' + 'the requirement %s' % req) + raise DistributionNotFound('No distributions at all found for %s' + % req) + if req.satisfied_by is not None: - found_versions.append((req.satisfied_by.parsed_version, Inf, req.satisfied_by.version)) + found_versions.append((req.satisfied_by.parsed_version, + Inf, req.satisfied_by.version)) + if file_versions: file_versions.sort(reverse=True) - logger.info('Local files found: %s' % ', '.join([url_to_path(link.url) for parsed, link, version in file_versions])) + file_urls = [url_to_path(link.url) for _, link, _ in file_versions] + logger.info('Local files found: %s' % ', '.join(file_urls)) found_versions = file_versions + found_versions + all_versions = found_versions + page_versions + dependency_versions - applicable_versions = [] - for (parsed_version, link, version) in all_versions: + + applicable_versions = OrderedDict() + for parsed_version, link, version in all_versions: if version not in req.req: - logger.info("Ignoring link %s, version %s doesn't match %s" - % (link, version, ','.join([''.join(s) for s in req.req.specs]))) + req_specs = [''.join(s) for s in req.req.specs] + logger.info("Ignoring link %s, version %s doesn't match %s" % + (link, version, ','.join(req_specs))) continue - applicable_versions.append((link, version)) - applicable_versions = sorted(applicable_versions, key=lambda v: pkg_resources.parse_version(v[1]), reverse=True) - existing_applicable = bool([link for link, version in applicable_versions if link is Inf]) + link_comes_from = None + mirror_urls = [mirror.url for mirror in mirrors_package_urls] + if link is not Inf: + link_comes_from = getattr(link, 'comes_from', None) + if link_comes_from is not None: + link.is_mirror = link_comes_from.url in mirror_urls + applicable_versions.setdefault(version, []).append(link) + + for version in applicable_versions: + random.shuffle(applicable_versions[version]) + + sorted_applicable_versions = sorted(applicable_versions.items(), + key=lambda v: pkg_resources.parse_version(v[0]), reverse=True) + applicable_versions = OrderedDict(sorted_applicable_versions) + + all_links = [link for link in [links + for links in applicable_versions.values()] + if link is Inf] + existing_applicable = bool(all_links) + if not upgrade and existing_applicable: - if applicable_versions[0][1] is Inf: - logger.info('Existing installed version (%s) is most up-to-date and satisfies requirement' - % req.satisfied_by.version) + if Inf in applicable_versions[0][1]: + logger.info('Existing installed version (%s) is most ' + 'up-to-date and satisfies requirement' % + req.satisfied_by.version) raise BestVersionAlreadyInstalled else: - logger.info('Existing installed version (%s) satisfies requirement (most up-to-date version is %s)' - % (req.satisfied_by.version, applicable_versions[0][1])) + logger.info('Existing installed version (%s) satisfies ' + 'requirement (most up-to-date version is %s)' % + (req.satisfied_by.version, + applicable_versions[0][1])) return None + if not applicable_versions: - logger.fatal('Could not find a version that satisfies the requirement %s (from versions: %s)' - % (req, ', '.join([version for parsed_version, link, version in found_versions]))) - raise DistributionNotFound('No distributions matching the version for %s' % req) - if applicable_versions[0][0] is Inf: - # We have an existing version, and its the best version - logger.info('Installed version (%s) is most up-to-date (past versions: %s)' - % (req.satisfied_by.version, ', '.join([version for link, version in applicable_versions[1:]]) or 'none')) + show_versions = [version for _, _, version in found_versions] + logger.fatal('Could not find a version that satisfies ' + 'the requirement %s (from versions: %s)' % + (req, ', '.join(show_versions))) + raise DistributionNotFound('No distributions matching ' + 'the version for %s' % req) + + newest = list(applicable_versions.keys())[0] + + if Inf in applicable_versions: + # We have an existing version, and it's the best version + show_versions = [vers for vers in applicable_versions.keys()[1:]] + logger.info('Installed version (%s) is most up-to-date ' + '(past versions: %s)' % + (req.satisfied_by.version, + ', '.join(show_versions) or 'none')) raise BestVersionAlreadyInstalled + if len(applicable_versions) > 1: logger.info('Using version %s (newest of versions: %s)' % - (applicable_versions[0][1], ', '.join([version for link, version in applicable_versions]))) - return applicable_versions[0][0] + (newest, ', '.join(applicable_versions.keys()))) + + return applicable_versions[newest] def _find_url_name(self, index_url, url_name, req): - """Finds the true URL name of a package, when the given name isn't quite correct. - This is usually used to implement case-insensitivity.""" + """ + Finds the true URL name of a package, when the given name isn't + quite correct. This is usually used to implement case-insensitivity. + """ if not index_url.url.endswith('/'): # Vaguely part of the PyPI API... weird but true. ## FIXME: bad to modify this? @@ -249,12 +358,11 @@ class PackageFinder(object): for link in page.rel_links(): pending_queue.put(link) - _egg_fragment_re = re.compile(r'#egg=([^&]*)') - _egg_info_re = re.compile(r'([a-z0-9_.]+)-([a-z0-9_.-]+)', re.I) - _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') - def _sort_links(self, links): - "Returns elements of links in order, non-egg links first, egg links second, while eliminating duplicates" + """ + Returns elements of links in order, non-egg links first, + egg links second, while eliminating duplicates + """ eggs, no_eggs = [], [] seen = set() for link in links: @@ -306,7 +414,7 @@ class PackageFinder(object): if version is None: logger.debug('Skipping link %s; wrong project name (not %s)' % (link, search_name)) return [] - match = self._py_version_re.search(version) + match = _py_version_re.search(version) if match: version = version[:match.start()] py_version = match.group(1) @@ -319,7 +427,7 @@ class PackageFinder(object): version)] def _egg_info_matches(self, egg_info, search_name, link): - match = self._egg_info_re.search(egg_info) + match = _egg_info_re.search(egg_info) if not match: logger.debug('Could not parse version from link: %s' % link) return None @@ -334,25 +442,26 @@ class PackageFinder(object): def _get_page(self, link, req): return HTMLPage.get_page(link, req, cache=self.cache) - def _get_mirror_urls(self, mirrors=None, main_mirror_url=None): - """Retrieves a list of URLs from the main mirror DNS entry + def _get_mirror_urls(self, mirrors=None): + """ + Retrieves a list of URLs from the main mirror DNS entry unless a list of mirror URLs are passed. """ if not mirrors: - mirrors = get_mirrors(main_mirror_url) - # Should this be made "less random"? E.g. netselect like? - random.shuffle(mirrors) + mirrors = find_mirrors(amount=10, start_with='b', prefer_fastest=False) + mirrors = [mirror[0] for mirror in mirrors] - mirror_urls = set() + mirror_urls = [] for mirror_url in mirrors: # Make sure we have a valid URL - if not ("http://" or "https://" or "file://") in mirror_url: + if not mirror_url.startswith(("http://", "https://", "file://")): mirror_url = "http://%s" % mirror_url if not mirror_url.endswith("/simple"): mirror_url = "%s/simple/" % mirror_url - mirror_urls.add(mirror_url) + if mirror_url not in mirror_urls: + mirror_urls.append(mirror_url) - return list(mirror_urls) + return tuple(Link(url, is_mirror=True) for url in mirror_urls) class PageCache(object): @@ -415,7 +524,8 @@ class HTMLPage(object): from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': - logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) + logger.debug('Cannot look at %s URL %s' % + (scheme, link)) return None if cache is not None: @@ -441,37 +551,30 @@ class HTMLPage(object): logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories - (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) + parsed_url = urlparse.urlparse(url) + scheme, netloc, path, params, query, fragment = parsed_url if scheme == 'file' and os.path.isdir(url2pathname(path)): - # add trailing slash if not present so urljoin doesn't trim final segment + # add trailing slash if not present so urljoin + # doesn't trim final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) - resp = urlopen(url) - - real_url = geturl(resp) - headers = resp.info() - contents = resp.read() - encoding = headers.get('Content-Encoding', None) - #XXX need to handle exceptions and add testing for this - if encoding is not None: - if encoding == 'gzip': - contents = gzip.GzipFile(fileobj=BytesIO(contents)).read() - if encoding == 'deflate': - contents = zlib.decompress(contents) - inst = cls(u(contents), real_url, headers) - except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError): + response = urlopen(url) + inst = cls(u(response.content), response.url, response.headers) + except (HTTPError, URLError, socket.timeout, + socket.error, OSError, WindowsError, + requests.RequestException): e = sys.exc_info()[1] desc = str(e) - if isinstance(e, socket.timeout): + if isinstance(e, (socket.timeout, requests.Timeout)): log_meth = logger.info - level =1 + level = 1 desc = 'timed out' elif isinstance(e, URLError): log_meth = logger.info - if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): + if hasattr(e, 'reason') and isinstance(e.reason, (socket.timeout, requests.Timeout)): desc = 'timed out' level = 1 else: @@ -489,7 +592,7 @@ class HTMLPage(object): cache.add_page_failure(url, level) return None if cache is not None: - cache.add_page([url, real_url], inst) + cache.add_page([url, response.url], inst) return inst @staticmethod @@ -500,15 +603,11 @@ class HTMLPage(object): ## FIXME: some warning or something? ## assertion error? return '' - req = Urllib2HeadRequest(url, headers={'Host': netloc}) - resp = urlopen(req) - try: - if hasattr(resp, 'code') and resp.code != 200 and scheme not in ('ftp', 'ftps'): - ## FIXME: doesn't handle redirects - return '' - return resp.info().get('content-type', '') - finally: - resp.close() + response = urlopen(url, method='head') + if hasattr(response, 'status_code') and response.status_code != 200 and scheme not in ('ftp', 'ftps'): + ## FIXME: doesn't handle redirects + return '' + return response.headers.get('content-type', '') @property def base_url(self): @@ -522,7 +621,9 @@ class HTMLPage(object): @property def links(self): - """Yields all links in the page""" + """ + Yields all links in the page + """ for match in self._href_re.finditer(self.content): url = match.group(1) or match.group(2) or match.group(3) url = self.clean_link(urlparse.urljoin(self.base_url, url)) @@ -535,7 +636,9 @@ class HTMLPage(object): yield url def explicit_rel_links(self, rels=('homepage', 'download')): - """Yields all links with the given relations""" + """ + Yields all links with the given relations + """ for match in self._rel_re.finditer(self.content): found_rels = match.group(1).lower().split() for rel in rels: @@ -564,21 +667,30 @@ class HTMLPage(object): url = self.clean_link(urlparse.urljoin(self.base_url, url)) yield Link(url, self) - _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) - def clean_link(self, url): - """Makes sure a link is fully encoded. That is, if a ' ' shows up in + """ + Makes sure a link is fully encoded. That is, if a ' ' shows up in the link, it will be rewritten to %20 (while not over-quoting - % or other characters).""" - return self._clean_re.sub( - lambda match: '%%%2x' % ord(match.group(0)), url) + % or other characters). + """ + def replacer(match): + matched_group = match.group(0) + return '%%%2x' % ord(matched_group) + return _clean_re.sub(replacer, url.strip()) class Link(object): - def __init__(self, url, comes_from=None): + def __init__(self, url, comes_from=None, is_mirror=False, mirror_urls=None): self.url = url self.comes_from = comes_from + self.is_mirror = is_mirror + if mirror_urls is not None: + for mirror in mirror_urls: + if not isinstance(mirror, Link): + mirror = Link(mirror) + if self.url.startswith(mirror.base_url): + self.is_mirror = True def __str__(self): if self.comes_from: @@ -603,6 +715,14 @@ class Link(object): return name @property + def netloc(self): + return urlparse.urlsplit(self.url)[1] + + @property + def base_url(self): + return urlparse.urlunsplit((self.scheme, self.netloc, '', '', '')) + + @property def scheme(self): return urlparse.urlsplit(self.url)[0] @@ -621,20 +741,16 @@ class Link(object): url = url.rstrip('/') return url - _egg_fragment_re = re.compile(r'#egg=([^&]*)') - @property def egg_fragment(self): - match = self._egg_fragment_re.search(self.url) + match = _egg_fragment_re.search(self.url) if not match: return None return match.group(1) - _md5_re = re.compile(r'md5=([a-f0-9]+)') - @property def md5_hash(self): - match = self._md5_re.search(self.url) + match = _md5_re.search(self.url) if match: return match.group(1) return None @@ -643,6 +759,10 @@ class Link(object): def show_url(self): return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0]) + def copy(self): + return self.__class__(self.url, comes_from=self.comes_from, + is_mirror=self.is_mirror) + def get_requirement_from_url(url): """Get a requirement from the URL, if possible. This looks for #egg @@ -669,33 +789,9 @@ def package_to_requirement(package_name): return name -def get_mirrors(hostname=None): - """Return the list of mirrors from the last record found on the DNS - entry:: - - >>> from pip.index import get_mirrors - >>> get_mirrors() - ['a.pypi.python.org', 'b.pypi.python.org', 'c.pypi.python.org', - 'd.pypi.python.org'] - - Originally written for the distutils2 project by Alexis Metaireau. - """ - if hostname is None: - hostname = DEFAULT_MIRROR_URL - - # return the last mirror registered on PyPI. - try: - hostname = socket.gethostbyname_ex(hostname)[0] - except socket.gaierror: - return [] - end_letter = hostname.split(".", 1) - - # determine the list from the last one. - return ["%s.%s" % (s, end_letter[1]) for s in string_range(end_letter[0])] - - def string_range(last): - """Compute the range of string between "a" and last. + """ + Compute the range of string between "a" and last. This works for simple "a to z" lists, but also for "a to zz" lists. """ @@ -705,4 +801,3 @@ def string_range(last): yield result if result == last: return - diff --git a/pip/locations.py b/pip/locations.py index 34c6dbbe6..ae6c3df45 100644 --- a/pip/locations.py +++ b/pip/locations.py @@ -50,3 +50,6 @@ else: if sys.platform[:6] == 'darwin' and sys.prefix[:16] == '/System/Library/': bin_py = '/usr/local/bin' default_log_file = os.path.join(user_dir, 'Library/Logs/pip.log') + +serverkey_file = os.path.join(default_storage_dir, 'serverkey.pub') +cacert_crt_file = os.path.join(default_storage_dir, 'cacert.root.crt')
\ No newline at end of file diff --git a/pip/mirrors.py b/pip/mirrors.py new file mode 100644 index 000000000..52992b4e9 --- /dev/null +++ b/pip/mirrors.py @@ -0,0 +1,400 @@ +""" +Library to support tools that access PyPI mirrors. The following +functional areas are covered: +- mirror selection (find_mirror) +- mirror verification +- key rollover +""" + +import datetime +import hashlib +import errno +import random +import select +import socket +import time +from pip.backwardcompat import (b, u, _ord as ord, + decode_base64, _long) + + +def _mirror_list(first): + """ + Generator producing all mirror names + """ + ord_a = ord('a') + try: + last = socket.gethostbyname_ex('last.pypi.python.org') + except socket.gaierror: + return + cur_index = ord(first) - ord_a + cur = first + '.pypi.python.org' + while True: + for family, _, _, _, sockaddr in socket.getaddrinfo(cur, 0, 0, socket.SOCK_STREAM): + yield cur, family, sockaddr[0] + if last[0] == cur: + break + cur_index += 1 + if cur_index < 26: + # a..z + cur = chr(ord_a + cur_index) + elif cur_index > 701: + raise ValueError('too many mirrors') + else: + # aa, ab, ... zz + cur = divmod(cur_index, 26) + cur = chr(ord_a - 1 + cur[0]) + chr(ord_a + cur[1]) + cur += '.pypi.python.org' + +def _batched_mirror_list(first): + """ + Generator that does DNS lookups in batches of 10, and shuffles them. + """ + batch = [] + for res in _mirror_list(first): + batch.append(res) + if len(batch) == 10: + random.shuffle(batch) + for res2 in batch: + yield res2 + batch = [] + random.shuffle(batch) + for res2 in batch: + yield res2 + +class _Mirror: + # status values: + # 0: wants to send + # 1: wants to recv + # 2: completed, ok + # 3: completed, failed + def __init__(self, name, family, ip): + self.name = name + self.family = family + self.ip = ip + self.socket = socket.socket(family, socket.SOCK_STREAM) + self.socket.setblocking(0) + self.started = time.time() + try: + self.socket.connect((ip, 80)) + except socket.error, e: + if e.errno != errno.EINPROGRESS: + raise + # now need to select for writing + self.status = 0 + + def write(self): + url = 'last-modified' + if self.name == 'a.pypi.python.org': + # the master server doesn't provide last-modified, + # as that would be pointless. Instead, /daytime can be + # used as an indication of currency and responsiveness. + url = 'daytime' + self.socket.send('GET /%s HTTP/1.0\r\n' + 'Host: %s\r\n' + '\r\n' % (url, self.name)) + self.status = 1 + + def read(self): + data = self.socket.recv(1200) + self.response_time = time.time()-self.started + # response should be much shorter + assert len(data) < 1200 + self.socket.close() + data = data.splitlines() + if data[0].split()[1] == '200': + # ok + data = data[-1] + try: + self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S") + self.status = 2 # complete + except ValueError: + self.status = 3 # failed + else: + self.status = 3 + + def failed(self): + self.socket.close() + self.status = 3 + + def results(self): + return self.name, self.family, self.ip, self.response_time, self.last_modified + +def _select(mirrors): + # perform select call on mirrors dictionary + rlist = [] + wlist = [] + xlist = [] + for m in mirrors.values(): + if m.status == 0: + wlist.append(m.socket) + xlist.append(m.socket) + elif m.status == 1: + rlist.append(m.socket) + xlist.append(m.socket) + rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0) + completed = [] + for s in wlist: + mirrors[s].write() + for s in rlist: + m = mirrors[s] + del mirrors[s] + m.read() + if m.status == 2: + completed.append(m) + for s in xlist: + mirrors[s].failed() + del mirrors[s] + return completed + +def _close(mirrors): + for m in mirrors: + m.close() + +def _newest(mirrors, amount=1): + mirrors.sort(key=lambda m: m.last_modified) + results = [mirror.results() for mirror in mirrors[-amount:]] + if amount == 1: + return results[0] + return results[::-1] + + +def find_mirrors(start_with='a', + good_age=30*60, + slow_mirrors_wait=5, + prefer_fastest=True, + amount=1): + """ + find_mirrors(start_with, good_age, slow_mirrors_wait, prefer_fastest) + -> [(name, family, IP, response_time, last_modified)] + + Find a PyPI mirror matching given criteria. + start_with indicates the first mirror that should be considered (defaults to 'a'). + If prefer_fastest is True, it stops with the first mirror responding. Mirrors 'compete' + against each other in randomly-shuffled batches of 10. + If this procedure goes on for longer than slow_mirrors_wait (default 5s) and prefer_fastest + is false, return even if not all mirrors have been responding. + If no matching mirror can be found, the newest one that did response is returned. + If no mirror can be found at all, ValueError is raised + """ + started = time.time() + good_mirrors = [] + pending_mirrors = {} # socket:mirror + good_last_modified = datetime.datetime.utcnow() - datetime.timedelta(seconds=good_age) + for host, family, ip in _batched_mirror_list(start_with): + try: + m = _Mirror(host, family, ip) + except socket.error: + continue + pending_mirrors[m.socket] = m + for m in _select(pending_mirrors): + if prefer_fastest and m.last_modified > good_last_modified: + _close(pending_mirrors) + return m.results() + else: + good_mirrors.append(m) + + while pending_mirrors: + if time.time() > started + slow_mirrors_wait and good_mirrors: + # if we have looked for 5s for a mirror, and we already have one + # return the newest one + _close(pending_mirrors) + return _newest(good_mirrors, amount) + for m in _select(pending_mirrors): + if prefer_fastest and m.last_modified > good_last_modified: + _close(pending_mirrors) + return [m.results()] + else: + good_mirrors.append(m) + if not good_mirrors: + raise ValueError("No mirrors found") + return _newest(good_mirrors, amount) + +# Distribute and use freely; there are no restrictions on further +# dissemination and usage except those imposed by the laws of your +# country of residence. This software is provided "as is" without +# warranty of fitness for use or suitability for any purpose, express +# or implied. Use at your own risk or not at all. +""" +Verify a DSA signature, for use with PyPI mirrors. + +Originally copied from PyPI's own code: +https://svn.python.org/packages/trunk/pypi/tools/verify.py + +Verification should use the following steps: +1. Download the DSA key from http://pypi.python.org/serverkey, as key_string +2. key = load_key(key_string) +3. Download the package page, from <mirror>/simple/<package>/, as data +4. Download the package signature, from <mirror>/serversig/<package>, as sig +5. Check verify(key, data, sig) +""" + +try: + from M2Crypto import EVP, DSA, BIO + + def load_key(string): + """ + load_key(string) -> key + + Convert a PEM format public DSA key into + an internal representation. + """ + return DSA.load_pub_key_bio(BIO.MemoryBuffer(string)) + + def verify(key, data, sig): + """ + verify(key, data, sig) -> bool + + Verify autenticity of the signature created by key for + data. data is the bytes that got signed; signature is the + bytes that represent the signature, using the sha1+DSA + algorithm. key is an internal representation of the DSA key + as returned from load_key.""" + md = EVP.MessageDigest('sha1') + md.update(data) + digest = md.final() + return key.verify_asn1(digest, sig) + +except ImportError: + + # DSA signature algorithm, taken from pycrypto 2.0.1 + # The license terms are the same as the ones for this module. + def _inverse(u, v): + """ + _inverse(u:long, u:long):long + Return the inverse of u mod v. + """ + u3, v3 = _long(u), _long(v) + u1, v1 = _long(1), _long(0) + while v3 > 0: + q = u3 // v3 + u1, v1 = v1, u1 - v1 * q + u3, v3 = v3, u3 - v3 * q + while u1 < 0: + u1 = u1 + v + return u1 + + def _verify(key, M, sig): + p, q, g, y = key + r, s = sig + if r <= 0 or r >= q or s <= 0 or s >= q: + return False + w = _inverse(s, q) + u1, u2 = (M * w) % q, (r * w) % q + v1 = pow(g, u1, p) + v2 = pow(y, u2, p) + v = (v1 * v2) % p + v = v % q + return v == r + + # END OF pycrypto + + def _bytes2int(b): + value = 0 + for c in b: + value = value * 256 + ord(c) + return value + + _SEQUENCE = 0x30 # cons + _INTEGER = 2 # prim + _BITSTRING = 3 # prim + _OID = 6 # prim + + def _asn1parse(string): + tag = ord(string[0]) + assert tag & 31 != 31 # only support one-byte tags + length = ord(string[1]) + assert length != 128 # indefinite length not supported + pos = 2 + if length > 128: + # multi-byte length + val = 0 + length -= 128 + val = _bytes2int(string[pos:pos + length]) + pos += length + length = val + data = string[pos:pos + length] + rest = string[pos + length:] + assert pos + length <= len(string) + if tag == _SEQUENCE: + result = [] + while data: + value, data = _asn1parse(data) + result.append(value) + elif tag == _INTEGER: + assert ord(data[0]) < 128 # negative numbers not supported + result = 0 + for c in data: + result = result * 256 + ord(c) + elif tag == _BITSTRING: + result = data + elif tag == _OID: + result = data + else: + raise ValueError("Unsupported tag %x" % tag) + return (tag, result), rest + + def load_key(string): + """ + load_key(string) -> key + + Convert a PEM format public DSA key into + an internal representation.""" + lines = [line.strip() for line in string.splitlines()] + assert lines[0] == b("-----BEGIN PUBLIC KEY-----") + assert lines[-1] == b("-----END PUBLIC KEY-----") + data = decode_base64(''.join([u(line) for line in lines[1:-1]])) + spki, rest = _asn1parse(data) + assert not rest + # SubjectPublicKeyInfo ::= SEQUENCE { + # algorithm AlgorithmIdentifier, + # subjectPublicKey BIT STRING } + assert spki[0] == _SEQUENCE + algoid, key = spki[1] + assert key[0] == _BITSTRING + key = key[1] + # AlgorithmIdentifier ::= SEQUENCE { + # algorithm OBJECT IDENTIFIER, + # parameters ANY DEFINED BY algorithm OPTIONAL } + assert algoid[0] == _SEQUENCE + algorithm, parameters = algoid[1] + # dsaEncryption + # assert algorithm[0] == _OID and algorithm[1] == '*\x86H\xce8\x04\x01' + # Dss-Parms ::= SEQUENCE { + # p INTEGER, + # q INTEGER, + # g INTEGER } + assert parameters[0] == _SEQUENCE + p, q, g = parameters[1] + assert p[0] == q[0] == g[0] == _INTEGER + p, q, g = p[1], q[1], g[1] + # Parse bit string value as integer + # assert key[0] == '\0' # number of bits multiple of 8 + y, rest = _asn1parse(key[1:]) + assert not rest + assert y[0] == _INTEGER + y = y[1] + return p, q, g, y + + def verify(key, data, sig): + """ + verify(key, data, sig) -> bool + + Verify autenticity of the signature created by key for + data. data is the bytes that got signed; signature is the + bytes that represent the signature, using the sha1+DSA + algorithm. key is an internal representation of the DSA key + as returned from load_key.""" + sha = hashlib.sha1() + sha.update(data) + data = sha.digest() + data = _bytes2int(data) + # Dss-Sig-Value ::= SEQUENCE { + # r INTEGER, + # s INTEGER } + sig, rest = _asn1parse(sig) + assert not rest + assert sig[0] == _SEQUENCE + r, s = sig[1] + assert r[0] == s[0] == _INTEGER + sig = r[1], s[1] + return _verify(key, data, sig) diff --git a/pip/req.py b/pip/req.py index ff423dfa2..d8ad1d737 100644 --- a/pip/req.py +++ b/pip/req.py @@ -1,29 +1,31 @@ -import sys import os -import shutil import re +import shutil +import socket +import sys +import tempfile + import zipfile import pkg_resources -import tempfile -from pip.locations import bin_py, running_under_virtualenv + +import requests + +from pip import call_subprocess +from pip.index import Link +from pip.log import logger +from pip.locations import bin_py, running_under_virtualenv, build_prefix from pip.exceptions import (InstallationError, UninstallationError, BestVersionAlreadyInstalled) from pip.vcs import vcs -from pip.log import logger -from pip.util import display_path, rmtree -from pip.util import ask, ask_path_exists, backup_dir -from pip.util import is_installable_dir, is_local, dist_is_local -from pip.util import renames, normalize_path, egg_link_path -from pip.util import make_path_relative -from pip import call_subprocess from pip.backwardcompat import (any, copytree, urlparse, urllib, ConfigParser, string_types, HTTPError, - FeedParser, get_python_version, - b) -from pip.index import Link -from pip.locations import build_prefix + FeedParser, get_python_version, b, + WindowsError, URLError) +from pip.util import (display_path, rmtree, ask, ask_path_exists, backup_dir, + is_installable_dir, is_local, dist_is_local, renames, + normalize_path, egg_link_path, make_path_relative, Inf) from pip.download import (get_file_content, is_url, url_to_path, - path_to_url, is_archive_file, + path_to_url, is_archive_file, urlopen, unpack_vcs_link, is_vcs_url, is_file_url, unpack_file_url, unpack_http_url) @@ -44,6 +46,7 @@ class InstallRequirement(object): self.source_dir = source_dir self.editable = editable self.url = url + self.urls = [] self._egg_info_path = None # This holds the pkg_resources.Distribution object if this requirement # is already available: @@ -59,6 +62,8 @@ class InstallRequirement(object): self.install_succeeded = None # UninstallPathSet of uninstalled distribution (for possible rollback) self.uninstalled = None + # The server signature from PyPI + self._serversigs = {} @classmethod def from_editable(cls, editable_req, comes_from=None, default_vcs=None): @@ -71,7 +76,8 @@ class InstallRequirement(object): @classmethod def from_line(cls, name, comes_from=None): - """Creates an InstallRequirement from a name, which might be a + """ + Creates an InstallRequirement from a name, which might be a requirement, directory containing 'setup.py', filename, or URL. """ url = None @@ -84,7 +90,8 @@ class InstallRequirement(object): link = Link(name) elif os.path.isdir(path) and (os.path.sep in name or name.startswith('.')): if not is_installable_dir(path): - raise InstallationError("Directory %r is not installable. File 'setup.py' not found.", name) + raise InstallationError("Directory %r is not installable. " + "File 'setup.py' not found.", name) link = Link(path_to_url(name)) elif is_archive_file(path): if not os.path.isfile(path): @@ -198,6 +205,21 @@ class InstallRequirement(object): def setup_py(self): return os.path.join(self.source_dir, 'setup.py') + def serversig(self, base_url): + if base_url not in self._serversigs: + if self.req is not None: + sig_url = '%s/serversig/%s' % (base_url, self.url_name) + try: + response = urlopen(sig_url) + self._serversigs[base_url] = response.content + except (HTTPError, URLError, socket.timeout, + socket.error, OSError, WindowsError, + requests.RequestException): + # return empty string in case this was just a + # temporary connection failure + return '' + return self._serversigs.get(base_url, '') + def run_egg_info(self, force_root_egg_info=False): assert self.source_dir if self.name: @@ -477,11 +499,12 @@ exec(compile(open(__file__).read().replace('\\r\\n', '\\n'), __file__, 'exec')) config.readfp(FakeFile(dist.get_metadata_lines('entry_points.txt'))) if config.has_section('console_scripts'): for name, value in config.items('console_scripts'): - paths_to_remove.add(os.path.join(bin_py, name)) + this_bin = os.path.join(bin_py, name) + paths_to_remove.add(this_bin) if sys.platform == 'win32': - paths_to_remove.add(os.path.join(bin_py, name) + '.exe') - paths_to_remove.add(os.path.join(bin_py, name) + '.exe.manifest') - paths_to_remove.add(os.path.join(bin_py, name) + '-script.py') + paths_to_remove.add(this_bin + '.exe') + paths_to_remove.add(this_bin + '.exe.manifest') + paths_to_remove.add(this_bin + '-script.py') paths_to_remove.remove(auto_confirm) self.uninstalled = paths_to_remove @@ -785,6 +808,8 @@ class RequirementSet(object): self.build_dir = build_dir self.src_dir = src_dir self.download_dir = download_dir + if download_cache is not None: + download_cache = os.path.expanduser(download_cache) self.download_cache = download_cache self.upgrade = upgrade self.ignore_installed = ignore_installed @@ -900,7 +925,10 @@ class RequirementSet(object): % (req_to_install, req_to_install.source_dir)) def prepare_files(self, finder, force_root_egg_info=False, bundle=False): - """Prepare process. Create temp directories, download and/or unpack files.""" + """ + Prepare process. Create temp directories, download + and/or unpack files. + """ unnamed = list(self.unnamed_requirements) reqs = list(self.requirements.values()) while reqs or unnamed: @@ -923,7 +951,7 @@ class RequirementSet(object): install = False else: # Avoid the need to call find_requirement again - req_to_install.url = url.url + req_to_install.urls = url if not best_installed: req_to_install.conflicts_with = req_to_install.satisfied_by @@ -975,21 +1003,39 @@ class RequirementSet(object): if not os.path.exists(os.path.join(location, 'setup.py')): ## FIXME: this won't upgrade when there's an existing package unpacked in `location` if req_to_install.url is None: - url = finder.find_requirement(req_to_install, upgrade=self.upgrade) - else: + urls = finder.find_requirement(req_to_install, upgrade=self.upgrade) + elif req_to_install.urls: ## FIXME: should req_to_install.url already be a link? - url = Link(req_to_install.url) - assert url - if url: - try: - self.unpack_url(url, location, self.is_download) - except HTTPError: - e = sys.exc_info()[1] - logger.fatal('Could not install requirement %s because of error %s' - % (req_to_install, e)) - raise InstallationError( - 'Could not install requirement %s because of HTTP error %s for URL %s' - % (req_to_install, e, url)) + # print req_to_install.url + urls = [Link(url.url, mirror_urls=finder.mirror_urls) + for url in req_to_install.urls] + if urls: + # Trying each of the returned URLs one by one + for url in urls: + if url.is_mirror: + if finder.verify(req_to_install, url): + logger.warn('Verifying %s: successful' % url) + else: + logger.warn('Verifying %s: failed' % url) + continue + try: + self.unpack_url(url, location, self.is_download) + except HTTPError: + e = sys.exc_info()[1] + logger.fatal('Could not install ' + 'requirement %s because of error %s' % + (req_to_install, e)) + raise InstallationError('Could not ' + 'install requirement %s because of ' + 'HTTP error %s for URL %s' % + (req_to_install, e, url)) + else: + # stop trying after successful retrieval + break + else: + raise InstallationError('Could not install ' + 'requirement %s because no valid URLs ' + 'were found.' % req_to_install) else: unpack = False if unpack: @@ -1012,7 +1058,7 @@ class RequirementSet(object): # directory is created for packing in the bundle req_to_install.run_egg_info(force_root_egg_info=True) req_to_install.assert_source_matches_version() - #@@ sketchy way of identifying packages not grabbed from an index + # @@ sketchy way of identifying packages not grabbed from an index if bundle and req_to_install.url: self.copy_to_build_dir(req_to_install) install = False @@ -1029,7 +1075,8 @@ class RequirementSet(object): ## FIXME: shouldn't be globally added: finder.add_dependency_links(req_to_install.dependency_links) if (req_to_install.extras): - logger.notify("Installing extra requirements: %r" % ','.join(req_to_install.extras)) + logger.notify("Installing extra requirements: %r" % + ','.join(req_to_install.extras)) if not self.ignore_dependencies: for req in req_to_install.requirements(req_to_install.extras): try: @@ -1112,7 +1159,10 @@ class RequirementSet(object): return retval def install(self, install_options, global_options=()): - """Install everything in this set (after having downloaded and unpacked the packages)""" + """ + Install everything in this set + (after having downloaded and unpacked the packages) + """ to_install = [r for r in self.requirements.values() if not r.satisfied_by] diff --git a/tests/test_pip.py b/tests/test_pip.py index 17e8f6616..8f66b4f22 100644 --- a/tests/test_pip.py +++ b/tests/test_pip.py @@ -23,6 +23,7 @@ download_cache = tempfile.mkdtemp(prefix='pip-test-cache') site_packages_suffix = site.USER_SITE[len(site.USER_BASE) + 1:] + def path_to_url(path): """ Convert a path to URI. The path will be made absolute and @@ -278,6 +279,7 @@ class TestPipEnvironment(TestFileEnvironment): environ['PIP_NO_INPUT'] = '1' environ['PIP_LOG_FILE'] = str(self.root_path/'pip-log.txt') + environ['PIP_USE_MIRRORS'] = 'false' super(TestPipEnvironment, self).__init__( self.root_path, ignore_hidden=False, |