summaryrefslogtreecommitdiff
path: root/pip/index.py
diff options
context:
space:
mode:
Diffstat (limited to 'pip/index.py')
-rw-r--r--pip/index.py421
1 files changed, 258 insertions, 163 deletions
diff --git a/pip/index.py b/pip/index.py
index 8e53e44b7..e9ce48630 100644
--- a/pip/index.py
+++ b/pip/index.py
@@ -1,8 +1,10 @@
-"""Routines related to PyPI, indexes"""
-
+"""
+Routines related to PyPI, indexes, PEP381 mirrors
+"""
import sys
import os
import re
+import hashlib
import gzip
import mimetypes
try:
@@ -15,21 +17,31 @@ import random
import socket
import string
import zlib
+
+import requests
+
+from pip.locations import serverkey_file
from pip.log import logger
-from pip.util import Inf
-from pip.util import normalize_name, splitext
+from pip.util import Inf, normalize_name, splitext
from pip.exceptions import DistributionNotFound, BestVersionAlreadyInstalled
from pip.backwardcompat import (WindowsError, BytesIO,
- Queue, httplib, urlparse,
- URLError, HTTPError, u,
- product, url2pathname)
-from pip.backwardcompat import Empty as QueueEmpty
-from pip.download import urlopen, path_to_url2, url_to_path, geturl, Urllib2HeadRequest
+ Queue, urlparse,
+ URLError, HTTPError, b, u,
+ product, url2pathname,
+ OrderedDict, _ord as ord,
+ decode_base64, _long,
+ Empty as QueueEmpty)
+from pip.download import urlopen, path_to_url2, url_to_path, geturl
+from pip.mirrors import verify, load_key, find_mirrors
__all__ = ['PackageFinder']
-DEFAULT_MIRROR_URL = "last.pypi.python.org"
+_egg_fragment_re = re.compile(r'#egg=([^&]*)')
+_egg_info_re = re.compile(r'([a-z0-9_.]+)-([a-z0-9_.-]+)', re.I)
+_py_version_re = re.compile(r'-py([123]\.?[0-9]?)$')
+_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%~_\\|-]', re.I)
+_md5_re = re.compile(r'md5=([a-f0-9]+)')
class PackageFinder(object):
@@ -38,9 +50,8 @@ class PackageFinder(object):
This is meant to match easy_install's technique for looking for
packages, by reading pages and looking for appropriate links
"""
-
def __init__(self, find_links, index_urls,
- use_mirrors=False, mirrors=None, main_mirror_url=None):
+ use_mirrors=False, mirrors=None):
self.find_links = find_links
self.index_urls = index_urls
self.dependency_links = []
@@ -48,10 +59,16 @@ class PackageFinder(object):
# These are boring links that have already been logged somehow:
self.logged_links = set()
if use_mirrors:
- self.mirror_urls = self._get_mirror_urls(mirrors, main_mirror_url)
- logger.info('Using PyPI mirrors: %s' % ', '.join(self.mirror_urls))
+ self.mirror_urls = self._get_mirror_urls(mirrors)
+ logger.info('Using PyPI mirrors:\n* %s' %
+ '\n* '.join([url.url for url in self.mirror_urls]))
else:
- self.mirror_urls = []
+ self.mirror_urls = ()
+ serverkey_cache = open(serverkey_file, 'rb')
+ try:
+ self.serverkey = load_key(serverkey_cache.read())
+ finally:
+ serverkey_cache.close()
def add_dependency_links(self, links):
## FIXME: this shouldn't be global list this, it should only
@@ -64,76 +81,119 @@ class PackageFinder(object):
def _sort_locations(locations):
"""
Sort locations into "files" (archives) and "urls", and return
- a pair of lists (files,urls)
+ a pair of lists (files, urls)
"""
files = []
urls = []
# puts the url for the given file path into the appropriate
# list
- def sort_path(path):
- url = path_to_url2(path)
- if mimetypes.guess_type(url, strict=False)[0] == 'text/html':
+ def sort_path(url, path):
+ new_url = path_to_url2(path)
+ mimetype = mimetypes.guess_type(new_url, strict=False)[0]
+ url.url = new_url
+ if mimetype == 'text/html':
urls.append(url)
else:
files.append(url)
for url in locations:
- if url.startswith('file:'):
- path = url_to_path(url)
+ if isinstance(url, Link):
+ url = url.copy()
+ else:
+ url = Link(url)
+ if url.url.startswith('file:'):
+ path = url_to_path(url.url)
if os.path.isdir(path):
path = os.path.realpath(path)
for item in os.listdir(path):
- sort_path(os.path.join(path, item))
+ sort_path(url, os.path.join(path, item))
elif os.path.isfile(path):
- sort_path(path)
+ sort_path(url, path)
else:
urls.append(url)
return files, urls
+ def make_package_url(self, url, name):
+ """
+ For maximum compatibility with easy_install, ensure the path
+ ends in a trailing slash. Although this isn't in the spec
+ (and PyPI can handle it without the slash) some other index
+ implementations might break if they relied on easy_install's
+ behavior.
+ """
+ if isinstance(url, Link):
+ package_url = url.copy()
+ else:
+ package_url = Link(url)
+ new_url = posixpath.join(package_url.url, name)
+ if not new_url.endswith('/'):
+ new_url = new_url + '/'
+ package_url.url = new_url
+ return package_url
+
+ def verify(self, requirement, url):
+ """
+ Verifies the URL for the given requirement using the PEP381
+ verification code.
+ """
+ if url.comes_from and url.base_url:
+ try:
+ data = b(url.comes_from.content)
+ serversig = requirement.serversig(url.base_url)
+ if data and serversig:
+ return verify(self.serverkey, data, serversig)
+ except ValueError:
+ return False
+ return False
+
def find_requirement(self, req, upgrade):
url_name = req.url_name
# Only check main index if index URL is given:
main_index_url = None
if self.index_urls:
# Check that we have the url_name correctly spelled:
- main_index_url = Link(posixpath.join(self.index_urls[0], url_name))
- # This will also cache the page, so it's okay that we get it again later:
+ main_index_url = self.make_package_url(self.index_urls[0],
+ url_name)
+ # This will also cache the page,
+ # so it's okay that we get it again later:
page = self._get_page(main_index_url, req)
if page is None:
url_name = self._find_url_name(Link(self.index_urls[0]), url_name, req) or req.url_name
# Combine index URLs with mirror URLs here to allow
# adding more index URLs from requirements files
- all_index_urls = self.index_urls + self.mirror_urls
-
- def mkurl_pypi_url(url):
- loc = posixpath.join(url, url_name)
- # For maximum compatibility with easy_install, ensure the path
- # ends in a trailing slash. Although this isn't in the spec
- # (and PyPI can handle it without the slash) some other index
- # implementations might break if they relied on easy_install's behavior.
- if not loc.endswith('/'):
- loc = loc + '/'
- return loc
+
+ locations = []
+ indexes_package_urls = []
+ mirrors_package_urls = []
if url_name is not None:
- locations = [
- mkurl_pypi_url(url)
- for url in all_index_urls] + self.find_links
- else:
- locations = list(self.find_links)
- locations.extend(self.dependency_links)
+ indexes_package_urls = [self.make_package_url(url, url_name)
+ for url in self.index_urls]
+ locations.extend(indexes_package_urls)
+ mirrors_package_urls = [self.make_package_url(url, url_name)
+ for url in self.mirror_urls]
+ locations.extend(mirrors_package_urls)
+
+ locations.extend(self.find_links + self.dependency_links)
+
for version in req.absolute_versions:
if url_name is not None and main_index_url is not None:
- locations = [
- posixpath.join(main_index_url.url, version)] + locations
+ version_url = posixpath.join(main_index_url.url, version)
+ locations = [version_url] + locations
file_locations, url_locations = self._sort_locations(locations)
- locations = [Link(url) for url in url_locations]
+ locations = []
+ for url in url_locations:
+ if isinstance(url, Link):
+ locations.append(url)
+ else:
+ locations.append(Link(url))
logger.debug('URLs to search for versions for %s:' % req)
for location in locations:
logger.debug('* %s' % location)
+
found_versions = []
found_versions.extend(
self._package_versions(
@@ -143,60 +203,109 @@ class PackageFinder(object):
logger.debug('Analyzing links from page %s' % page.url)
logger.indent += 2
try:
- page_versions.extend(self._package_versions(page.links, req.name.lower()))
+ page_versions.extend(self._package_versions(
+ page.links, req.name.lower()))
finally:
logger.indent -= 2
+
dependency_versions = list(self._package_versions(
[Link(url) for url in self.dependency_links], req.name.lower()))
if dependency_versions:
- logger.info('dependency_links found: %s' % ', '.join([link.url for parsed, link, version in dependency_versions]))
+ dependency_urls = [link.url for _, link, _ in dependency_versions]
+ logger.info('dependency_links found: %s' %
+ ', '.join(dependency_urls))
+
file_versions = list(self._package_versions(
[Link(url) for url in file_locations], req.name.lower()))
- if not found_versions and not page_versions and not dependency_versions and not file_versions:
- logger.fatal('Could not find any downloads that satisfy the requirement %s' % req)
- raise DistributionNotFound('No distributions at all found for %s' % req)
+ if (not found_versions and not page_versions and
+ not dependency_versions and not file_versions):
+ logger.fatal('Could not find any downloads that satisfy '
+ 'the requirement %s' % req)
+ raise DistributionNotFound('No distributions at all found for %s'
+ % req)
+
if req.satisfied_by is not None:
- found_versions.append((req.satisfied_by.parsed_version, Inf, req.satisfied_by.version))
+ found_versions.append((req.satisfied_by.parsed_version,
+ Inf, req.satisfied_by.version))
+
if file_versions:
file_versions.sort(reverse=True)
- logger.info('Local files found: %s' % ', '.join([url_to_path(link.url) for parsed, link, version in file_versions]))
+ file_urls = [url_to_path(link.url) for _, link, _ in file_versions]
+ logger.info('Local files found: %s' % ', '.join(file_urls))
found_versions = file_versions + found_versions
+
all_versions = found_versions + page_versions + dependency_versions
- applicable_versions = []
- for (parsed_version, link, version) in all_versions:
+
+ applicable_versions = OrderedDict()
+ for parsed_version, link, version in all_versions:
if version not in req.req:
- logger.info("Ignoring link %s, version %s doesn't match %s"
- % (link, version, ','.join([''.join(s) for s in req.req.specs])))
+ req_specs = [''.join(s) for s in req.req.specs]
+ logger.info("Ignoring link %s, version %s doesn't match %s" %
+ (link, version, ','.join(req_specs)))
continue
- applicable_versions.append((link, version))
- applicable_versions = sorted(applicable_versions, key=lambda v: pkg_resources.parse_version(v[1]), reverse=True)
- existing_applicable = bool([link for link, version in applicable_versions if link is Inf])
+ link_comes_from = None
+ mirror_urls = [mirror.url for mirror in mirrors_package_urls]
+ if link is not Inf:
+ link_comes_from = getattr(link, 'comes_from', None)
+ if link_comes_from is not None:
+ link.is_mirror = link_comes_from.url in mirror_urls
+ applicable_versions.setdefault(version, []).append(link)
+
+ for version in applicable_versions:
+ random.shuffle(applicable_versions[version])
+
+ sorted_applicable_versions = sorted(applicable_versions.items(),
+ key=lambda v: pkg_resources.parse_version(v[0]), reverse=True)
+ applicable_versions = OrderedDict(sorted_applicable_versions)
+
+ all_links = [link for link in [links
+ for links in applicable_versions.values()]
+ if link is Inf]
+ existing_applicable = bool(all_links)
+
if not upgrade and existing_applicable:
- if applicable_versions[0][1] is Inf:
- logger.info('Existing installed version (%s) is most up-to-date and satisfies requirement'
- % req.satisfied_by.version)
+ if Inf in applicable_versions[0][1]:
+ logger.info('Existing installed version (%s) is most '
+ 'up-to-date and satisfies requirement' %
+ req.satisfied_by.version)
raise BestVersionAlreadyInstalled
else:
- logger.info('Existing installed version (%s) satisfies requirement (most up-to-date version is %s)'
- % (req.satisfied_by.version, applicable_versions[0][1]))
+ logger.info('Existing installed version (%s) satisfies '
+ 'requirement (most up-to-date version is %s)' %
+ (req.satisfied_by.version,
+ applicable_versions[0][1]))
return None
+
if not applicable_versions:
- logger.fatal('Could not find a version that satisfies the requirement %s (from versions: %s)'
- % (req, ', '.join([version for parsed_version, link, version in found_versions])))
- raise DistributionNotFound('No distributions matching the version for %s' % req)
- if applicable_versions[0][0] is Inf:
- # We have an existing version, and its the best version
- logger.info('Installed version (%s) is most up-to-date (past versions: %s)'
- % (req.satisfied_by.version, ', '.join([version for link, version in applicable_versions[1:]]) or 'none'))
+ show_versions = [version for _, _, version in found_versions]
+ logger.fatal('Could not find a version that satisfies '
+ 'the requirement %s (from versions: %s)' %
+ (req, ', '.join(show_versions)))
+ raise DistributionNotFound('No distributions matching '
+ 'the version for %s' % req)
+
+ newest = list(applicable_versions.keys())[0]
+
+ if Inf in applicable_versions:
+ # We have an existing version, and it's the best version
+ show_versions = [vers for vers in applicable_versions.keys()[1:]]
+ logger.info('Installed version (%s) is most up-to-date '
+ '(past versions: %s)' %
+ (req.satisfied_by.version,
+ ', '.join(show_versions) or 'none'))
raise BestVersionAlreadyInstalled
+
if len(applicable_versions) > 1:
logger.info('Using version %s (newest of versions: %s)' %
- (applicable_versions[0][1], ', '.join([version for link, version in applicable_versions])))
- return applicable_versions[0][0]
+ (newest, ', '.join(applicable_versions.keys())))
+
+ return applicable_versions[newest]
def _find_url_name(self, index_url, url_name, req):
- """Finds the true URL name of a package, when the given name isn't quite correct.
- This is usually used to implement case-insensitivity."""
+ """
+ Finds the true URL name of a package, when the given name isn't
+ quite correct. This is usually used to implement case-insensitivity.
+ """
if not index_url.url.endswith('/'):
# Vaguely part of the PyPI API... weird but true.
## FIXME: bad to modify this?
@@ -249,12 +358,11 @@ class PackageFinder(object):
for link in page.rel_links():
pending_queue.put(link)
- _egg_fragment_re = re.compile(r'#egg=([^&]*)')
- _egg_info_re = re.compile(r'([a-z0-9_.]+)-([a-z0-9_.-]+)', re.I)
- _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$')
-
def _sort_links(self, links):
- "Returns elements of links in order, non-egg links first, egg links second, while eliminating duplicates"
+ """
+ Returns elements of links in order, non-egg links first,
+ egg links second, while eliminating duplicates
+ """
eggs, no_eggs = [], []
seen = set()
for link in links:
@@ -306,7 +414,7 @@ class PackageFinder(object):
if version is None:
logger.debug('Skipping link %s; wrong project name (not %s)' % (link, search_name))
return []
- match = self._py_version_re.search(version)
+ match = _py_version_re.search(version)
if match:
version = version[:match.start()]
py_version = match.group(1)
@@ -319,7 +427,7 @@ class PackageFinder(object):
version)]
def _egg_info_matches(self, egg_info, search_name, link):
- match = self._egg_info_re.search(egg_info)
+ match = _egg_info_re.search(egg_info)
if not match:
logger.debug('Could not parse version from link: %s' % link)
return None
@@ -334,25 +442,26 @@ class PackageFinder(object):
def _get_page(self, link, req):
return HTMLPage.get_page(link, req, cache=self.cache)
- def _get_mirror_urls(self, mirrors=None, main_mirror_url=None):
- """Retrieves a list of URLs from the main mirror DNS entry
+ def _get_mirror_urls(self, mirrors=None):
+ """
+ Retrieves a list of URLs from the main mirror DNS entry
unless a list of mirror URLs are passed.
"""
if not mirrors:
- mirrors = get_mirrors(main_mirror_url)
- # Should this be made "less random"? E.g. netselect like?
- random.shuffle(mirrors)
+ mirrors = find_mirrors(amount=10, start_with='b', prefer_fastest=False)
+ mirrors = [mirror[0] for mirror in mirrors]
- mirror_urls = set()
+ mirror_urls = []
for mirror_url in mirrors:
# Make sure we have a valid URL
- if not ("http://" or "https://" or "file://") in mirror_url:
+ if not mirror_url.startswith(("http://", "https://", "file://")):
mirror_url = "http://%s" % mirror_url
if not mirror_url.endswith("/simple"):
mirror_url = "%s/simple/" % mirror_url
- mirror_urls.add(mirror_url)
+ if mirror_url not in mirror_urls:
+ mirror_urls.append(mirror_url)
- return list(mirror_urls)
+ return tuple(Link(url, is_mirror=True) for url in mirror_urls)
class PageCache(object):
@@ -415,7 +524,8 @@ class HTMLPage(object):
from pip.vcs import VcsSupport
for scheme in VcsSupport.schemes:
if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
- logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals())
+ logger.debug('Cannot look at %s URL %s' %
+ (scheme, link))
return None
if cache is not None:
@@ -441,37 +551,30 @@ class HTMLPage(object):
logger.debug('Getting page %s' % url)
# Tack index.html onto file:// URLs that point to directories
- (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
+ parsed_url = urlparse.urlparse(url)
+ scheme, netloc, path, params, query, fragment = parsed_url
if scheme == 'file' and os.path.isdir(url2pathname(path)):
- # add trailing slash if not present so urljoin doesn't trim final segment
+ # add trailing slash if not present so urljoin
+ # doesn't trim final segment
if not url.endswith('/'):
url += '/'
url = urlparse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s' % url)
- resp = urlopen(url)
-
- real_url = geturl(resp)
- headers = resp.info()
- contents = resp.read()
- encoding = headers.get('Content-Encoding', None)
- #XXX need to handle exceptions and add testing for this
- if encoding is not None:
- if encoding == 'gzip':
- contents = gzip.GzipFile(fileobj=BytesIO(contents)).read()
- if encoding == 'deflate':
- contents = zlib.decompress(contents)
- inst = cls(u(contents), real_url, headers)
- except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError):
+ response = urlopen(url)
+ inst = cls(u(response.content), response.url, response.headers)
+ except (HTTPError, URLError, socket.timeout,
+ socket.error, OSError, WindowsError,
+ requests.RequestException):
e = sys.exc_info()[1]
desc = str(e)
- if isinstance(e, socket.timeout):
+ if isinstance(e, (socket.timeout, requests.Timeout)):
log_meth = logger.info
- level =1
+ level = 1
desc = 'timed out'
elif isinstance(e, URLError):
log_meth = logger.info
- if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
+ if hasattr(e, 'reason') and isinstance(e.reason, (socket.timeout, requests.Timeout)):
desc = 'timed out'
level = 1
else:
@@ -489,7 +592,7 @@ class HTMLPage(object):
cache.add_page_failure(url, level)
return None
if cache is not None:
- cache.add_page([url, real_url], inst)
+ cache.add_page([url, response.url], inst)
return inst
@staticmethod
@@ -500,15 +603,11 @@ class HTMLPage(object):
## FIXME: some warning or something?
## assertion error?
return ''
- req = Urllib2HeadRequest(url, headers={'Host': netloc})
- resp = urlopen(req)
- try:
- if hasattr(resp, 'code') and resp.code != 200 and scheme not in ('ftp', 'ftps'):
- ## FIXME: doesn't handle redirects
- return ''
- return resp.info().get('content-type', '')
- finally:
- resp.close()
+ response = urlopen(url, method='head')
+ if hasattr(response, 'status_code') and response.status_code != 200 and scheme not in ('ftp', 'ftps'):
+ ## FIXME: doesn't handle redirects
+ return ''
+ return response.headers.get('content-type', '')
@property
def base_url(self):
@@ -522,7 +621,9 @@ class HTMLPage(object):
@property
def links(self):
- """Yields all links in the page"""
+ """
+ Yields all links in the page
+ """
for match in self._href_re.finditer(self.content):
url = match.group(1) or match.group(2) or match.group(3)
url = self.clean_link(urlparse.urljoin(self.base_url, url))
@@ -535,7 +636,9 @@ class HTMLPage(object):
yield url
def explicit_rel_links(self, rels=('homepage', 'download')):
- """Yields all links with the given relations"""
+ """
+ Yields all links with the given relations
+ """
for match in self._rel_re.finditer(self.content):
found_rels = match.group(1).lower().split()
for rel in rels:
@@ -564,21 +667,30 @@ class HTMLPage(object):
url = self.clean_link(urlparse.urljoin(self.base_url, url))
yield Link(url, self)
- _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
-
def clean_link(self, url):
- """Makes sure a link is fully encoded. That is, if a ' ' shows up in
+ """
+ Makes sure a link is fully encoded. That is, if a ' ' shows up in
the link, it will be rewritten to %20 (while not over-quoting
- % or other characters)."""
- return self._clean_re.sub(
- lambda match: '%%%2x' % ord(match.group(0)), url)
+ % or other characters).
+ """
+ def replacer(match):
+ matched_group = match.group(0)
+ return '%%%2x' % ord(matched_group)
+ return _clean_re.sub(replacer, url.strip())
class Link(object):
- def __init__(self, url, comes_from=None):
+ def __init__(self, url, comes_from=None, is_mirror=False, mirror_urls=None):
self.url = url
self.comes_from = comes_from
+ self.is_mirror = is_mirror
+ if mirror_urls is not None:
+ for mirror in mirror_urls:
+ if not isinstance(mirror, Link):
+ mirror = Link(mirror)
+ if self.url.startswith(mirror.base_url):
+ self.is_mirror = True
def __str__(self):
if self.comes_from:
@@ -603,6 +715,14 @@ class Link(object):
return name
@property
+ def netloc(self):
+ return urlparse.urlsplit(self.url)[1]
+
+ @property
+ def base_url(self):
+ return urlparse.urlunsplit((self.scheme, self.netloc, '', '', ''))
+
+ @property
def scheme(self):
return urlparse.urlsplit(self.url)[0]
@@ -621,20 +741,16 @@ class Link(object):
url = url.rstrip('/')
return url
- _egg_fragment_re = re.compile(r'#egg=([^&]*)')
-
@property
def egg_fragment(self):
- match = self._egg_fragment_re.search(self.url)
+ match = _egg_fragment_re.search(self.url)
if not match:
return None
return match.group(1)
- _md5_re = re.compile(r'md5=([a-f0-9]+)')
-
@property
def md5_hash(self):
- match = self._md5_re.search(self.url)
+ match = _md5_re.search(self.url)
if match:
return match.group(1)
return None
@@ -643,6 +759,10 @@ class Link(object):
def show_url(self):
return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0])
+ def copy(self):
+ return self.__class__(self.url, comes_from=self.comes_from,
+ is_mirror=self.is_mirror)
+
def get_requirement_from_url(url):
"""Get a requirement from the URL, if possible. This looks for #egg
@@ -669,33 +789,9 @@ def package_to_requirement(package_name):
return name
-def get_mirrors(hostname=None):
- """Return the list of mirrors from the last record found on the DNS
- entry::
-
- >>> from pip.index import get_mirrors
- >>> get_mirrors()
- ['a.pypi.python.org', 'b.pypi.python.org', 'c.pypi.python.org',
- 'd.pypi.python.org']
-
- Originally written for the distutils2 project by Alexis Metaireau.
- """
- if hostname is None:
- hostname = DEFAULT_MIRROR_URL
-
- # return the last mirror registered on PyPI.
- try:
- hostname = socket.gethostbyname_ex(hostname)[0]
- except socket.gaierror:
- return []
- end_letter = hostname.split(".", 1)
-
- # determine the list from the last one.
- return ["%s.%s" % (s, end_letter[1]) for s in string_range(end_letter[0])]
-
-
def string_range(last):
- """Compute the range of string between "a" and last.
+ """
+ Compute the range of string between "a" and last.
This works for simple "a to z" lists, but also for "a to zz" lists.
"""
@@ -705,4 +801,3 @@ def string_range(last):
yield result
if result == last:
return
-