summaryrefslogtreecommitdiff
path: root/pip/download.py
diff options
context:
space:
mode:
Diffstat (limited to 'pip/download.py')
-rw-r--r--pip/download.py223
1 files changed, 157 insertions, 66 deletions
diff --git a/pip/download.py b/pip/download.py
index a31e5d670..abd0e87a1 100644
--- a/pip/download.py
+++ b/pip/download.py
@@ -3,12 +3,18 @@ import getpass
import mimetypes
import os
import re
+import pkg_resources
import shutil
+import socket
import sys
import tempfile
+
+import requests
+
from pip.backwardcompat import (md5, copytree, xmlrpclib, urllib, urllib2,
- urlparse, string_types, HTTPError)
+ urlparse)
from pip.exceptions import InstallationError
+from pip.locations import cacert_crt_file
from pip.util import (splitext, rmtree, format_size, display_path,
backup_dir, ask, ask_path_exists, unpack_file,
create_download_cache_folder, cache_download)
@@ -21,10 +27,69 @@ __all__ = ['xmlrpclib_transport', 'get_file_content', 'urlopen',
'geturl', 'is_archive_file', 'unpack_vcs_link',
'unpack_file_url', 'is_vcs_url', 'is_file_url', 'unpack_http_url']
+try:
+ import ssl
+except ImportError:
+ logger.fatal('WARNING! Could not import the ssl module needed to '
+ 'verify the SSL certificate of PyPI. Try installing '
+ 'it by running (requires compiler): pip install ssl')
+
+
+try:
+ pip_version = pkg_resources.get_distribution('pip').version
+except:
+ pip_version = 'unknown'
+
xmlrpclib_transport = xmlrpclib.Transport()
+# CAcert Class 1 Root Certificate from
+# https://www.cacert.org/certs/root.crt
+CACERT_ROOT_CRT = """\
+-----BEGIN CERTIFICATE-----
+MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290
+IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB
+IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA
+Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO
+BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi
+MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ
+ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC
+CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ
+8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6
+zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y
+fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7
+w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc
+G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k
+epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q
+laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ
+QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU
+fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826
+YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w
+ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY
+gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe
+MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0
+IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy
+dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw
+czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0
+dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl
+aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC
+AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg
+b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB
+ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc
+nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg
+18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c
+gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl
+Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY
+sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T
+SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF
+CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum
+GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk
+zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW
+omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD
+-----END CERTIFICATE-----"""
+
+
def get_file_content(url, comes_from=None):
"""Gets the content of a file; it may be a filename, file: URL, or
http: URL. Returns (location, content)"""
@@ -48,8 +113,8 @@ def get_file_content(url, comes_from=None):
url = path
else:
## FIXME: catch some errors
- resp = urlopen(url)
- return geturl(resp), resp.read()
+ response = urlopen(url)
+ return response.url, response.content
try:
f = open(url)
content = f.read()
@@ -70,71 +135,95 @@ class URLOpener(object):
pip's own URL helper that adds HTTP auth and proxy support
"""
def __init__(self):
+ self.proxies = {}
+ self.timeout = None
self.passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
- def __call__(self, url):
+ def __call__(self, url, method='get', redirect=True):
"""
If the given url contains auth info or if a normal request gets a 401
response, an attempt is made to fetch the resource using basic HTTP
auth.
-
"""
url, username, password = self.extract_credentials(url)
- if username is None:
- try:
- response = urllib2.urlopen(self.get_request(url))
- except urllib2.HTTPError:
- e = sys.exc_info()[1]
- if e.code != 401:
- raise
- response = self.get_response(url)
- else:
- response = self.get_response(url, username, password)
- return response
-
- def get_request(self, url):
- """
- Wraps the URL to retrieve to protects against "creative"
- interpretation of the RFC: http://bugs.python.org/issue8732
- """
- if isinstance(url, string_types):
- url = urllib2.Request(url, headers={'Accept-encoding': 'identity'})
- return url
+ return self.get_response(url, username, password, method=method, redirect=redirect)
- def get_response(self, url, username=None, password=None):
- """
- does the dirty work of actually getting the rsponse object using urllib2
- and its HTTP auth builtins.
- """
+ def handle_401(self, url, username, password, method):
scheme, netloc, path, query, frag = urlparse.urlsplit(url)
- req = self.get_request(url)
-
- stored_username, stored_password = self.passman.find_user_password(None, netloc)
# see if we have a password stored
+ stored_username, stored_password = self.passman.find_user_password(None, netloc)
if stored_username is None:
if username is None and self.prompting:
username = urllib.quote(raw_input('User for %s: ' % netloc))
password = urllib.quote(getpass.getpass('Password: '))
+ else:
+ username, password = stored_username, stored_password
+ response = self.get_response(url, username, password, retry=True, method=method)
+ if response.status_code != 401:
if username and password:
self.passman.add_password(None, netloc, username, password)
- stored_username, stored_password = self.passman.find_user_password(None, netloc)
- authhandler = urllib2.HTTPBasicAuthHandler(self.passman)
- opener = urllib2.build_opener(authhandler)
- # FIXME: should catch a 401 and offer to let the user reenter credentials
- return opener.open(req)
+ return response
- def setup(self, proxystr='', prompting=True):
+ def get_response(self, url, username=None, password=None,
+ retry=False, method='get', redirect=True):
+ """
+ does the dirty work of actually getting the rsponse object using urllib2
+ and its HTTP auth builtins.
+ """
+
+ if username and password:
+ auth = (username, password)
+ else:
+ auth = None
+
+ if url.startswith('https://pypi.python.org'):
+ verify = cacert_crt_file
+ else:
+ verify = True
+
+ response = requests.request(method, url,
+ proxies=self.proxies,
+ timeout=self.timeout,
+ auth=auth,
+ verify=verify,
+ allow_redirects=redirect,
+ headers={
+ 'User-Agent': 'pip/%s' % pip_version,
+ })
+
+ if response.status_code == 401:
+ if retry:
+ # catch a 401 and offer to let the user reenter credentials
+ entered = ask('Credentials were incorrect. Re-enter? (y/n) ',
+ ('y', 'n'))
+ if entered != 'y':
+ raise InstallationError(
+ "The entered credentials for %s were wrong." % url)
+ username = password = None
+ return self.handle_401(url, username, password, method)
+
+ return response
+
+ def setup(self, proxystr='', timeout=None, prompting=True):
"""
Sets the proxy handler given the option passed on the command
line. If an empty string is passed it looks at the HTTP_PROXY
environment variable.
"""
self.prompting = prompting
+ self.timeout = timeout
proxy = self.get_proxy(proxystr)
- if proxy:
- proxy_support = urllib2.ProxyHandler({"http": proxy, "ftp": proxy, "https": proxy})
- opener = urllib2.build_opener(proxy_support, urllib2.CacheFTPHandler)
- urllib2.install_opener(opener)
+ self.proxies = {
+ 'http': proxy,
+ 'https': proxy,
+ }
+ if not os.path.exists(cacert_crt_file):
+ # write cacert root cert to temporary file
+ cacert_file = open(cacert_crt_file, 'w')
+ try:
+ cacert_file.write(CACERT_ROOT_CRT)
+ finally:
+ cacert_file.close()
def parse_credentials(self, netloc):
if "@" in netloc:
@@ -204,7 +293,7 @@ def is_url(name):
if ':' not in name:
return False
scheme = name.split(':', 1)[0].lower()
- return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
+ return scheme in ['http', 'https', 'file'] + vcs.all_schemes
def url_to_path(url):
@@ -322,10 +411,10 @@ def is_file_url(link):
def _check_md5(download_hash, link):
- download_hash = download_hash.hexdigest()
- if download_hash != link.md5_hash:
- logger.fatal("MD5 hash of the package %s (%s) doesn't match the expected hash %s!"
- % (link, download_hash, link.md5_hash))
+ digest = download_hash.hexdigest()
+ if digest != link.md5_hash:
+ logger.fatal("MD5 hash of the package %s (%s) doesn't match the "
+ "expected hash %s!" % (link, digest, link.md5_hash))
raise InstallationError('Bad MD5 hash for package %s' % link)
@@ -347,12 +436,12 @@ def _download_url(resp, link, temp_location):
if link.md5_hash:
download_hash = md5()
try:
- total_length = int(resp.info()['content-length'])
+ total_length = int(resp.headers['content-length'])
except (ValueError, KeyError, TypeError):
total_length = 0
downloaded = 0
show_progress = total_length > 40*1000 or not total_length
- show_url = link.show_url
+ show_url = link.url
try:
if show_progress:
## FIXME: the URL can get really long in this message:
@@ -365,7 +454,7 @@ def _download_url(resp, link, temp_location):
logger.debug('Downloading from URL %s' % link)
while True:
- chunk = resp.read(4096)
+ chunk = resp.raw.read(4096)
if not chunk:
break
downloaded += len(chunk)
@@ -413,14 +502,13 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
target_file = None
download_hash = None
if download_cache:
- target_file = os.path.join(download_cache,
- urllib.quote(target_url, ''))
+ cache_filename = list(filter(None, target_url.split('/')))[-1]
+ target_file = os.path.join(download_cache, cache_filename)
if not os.path.isdir(download_cache):
create_download_cache_folder(download_cache)
- if (target_file
- and os.path.exists(target_file)
- and os.path.exists(target_file + '.content-type')):
- fp = open(target_file+'.content-type')
+ if (target_file and os.path.exists(target_file)
+ and os.path.exists(target_file + '.content-type')):
+ fp = open(target_file + '.content-type')
content_type = fp.read().strip()
fp.close()
if link.md5_hash:
@@ -428,11 +516,11 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
temp_location = target_file
logger.notify('Using download cache from %s' % target_file)
else:
- resp = _get_response_from_url(target_url, link)
- content_type = resp.info()['content-type']
+ response = _get_response_from_url(target_url, link)
+ content_type = response.headers['content-type']
filename = link.filename # fallback
# Have a look at the Content-Disposition header for a better guess
- content_disposition = resp.info().get('content-disposition')
+ content_disposition = response.headers.get('content-disposition')
if content_disposition:
type, params = cgi.parse_header(content_disposition)
# We use ``or`` here because we don't want to use an "empty" value
@@ -443,12 +531,12 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
ext = mimetypes.guess_extension(content_type)
if ext:
filename += ext
- if not ext and link.url != geturl(resp):
- ext = os.path.splitext(geturl(resp))[1]
+ if not ext and link.url != geturl(response):
+ ext = os.path.splitext(geturl(response))[1]
if ext:
filename += ext
temp_location = os.path.join(temp_dir, filename)
- download_hash = _download_url(resp, link, temp_location)
+ download_hash = _download_url(response, link, temp_location)
if link.md5_hash:
_check_md5(download_hash, link)
if download_dir:
@@ -476,6 +564,9 @@ def _get_response_from_url(target_url, link):
return resp
-class Urllib2HeadRequest(urllib2.Request):
- def get_method(self):
- return "HEAD"
+def valid_ipv6_addr(addr):
+ try:
+ addr = socket.inet_pton(socket.AF_INET6, addr)
+ except socket.error: # not a valid address
+ return False
+ return True