summaryrefslogtreecommitdiff
path: root/pip/mirrors.py
diff options
context:
space:
mode:
Diffstat (limited to 'pip/mirrors.py')
-rw-r--r--pip/mirrors.py400
1 files changed, 400 insertions, 0 deletions
diff --git a/pip/mirrors.py b/pip/mirrors.py
new file mode 100644
index 000000000..52992b4e9
--- /dev/null
+++ b/pip/mirrors.py
@@ -0,0 +1,400 @@
+"""
+Library to support tools that access PyPI mirrors. The following
+functional areas are covered:
+- mirror selection (find_mirror)
+- mirror verification
+- key rollover
+"""
+
+import datetime
+import hashlib
+import errno
+import random
+import select
+import socket
+import time
+from pip.backwardcompat import (b, u, _ord as ord,
+ decode_base64, _long)
+
+
+def _mirror_list(first):
+ """
+ Generator producing all mirror names
+ """
+ ord_a = ord('a')
+ try:
+ last = socket.gethostbyname_ex('last.pypi.python.org')
+ except socket.gaierror:
+ return
+ cur_index = ord(first) - ord_a
+ cur = first + '.pypi.python.org'
+ while True:
+ for family, _, _, _, sockaddr in socket.getaddrinfo(cur, 0, 0, socket.SOCK_STREAM):
+ yield cur, family, sockaddr[0]
+ if last[0] == cur:
+ break
+ cur_index += 1
+ if cur_index < 26:
+ # a..z
+ cur = chr(ord_a + cur_index)
+ elif cur_index > 701:
+ raise ValueError('too many mirrors')
+ else:
+ # aa, ab, ... zz
+ cur = divmod(cur_index, 26)
+ cur = chr(ord_a - 1 + cur[0]) + chr(ord_a + cur[1])
+ cur += '.pypi.python.org'
+
+def _batched_mirror_list(first):
+ """
+ Generator that does DNS lookups in batches of 10, and shuffles them.
+ """
+ batch = []
+ for res in _mirror_list(first):
+ batch.append(res)
+ if len(batch) == 10:
+ random.shuffle(batch)
+ for res2 in batch:
+ yield res2
+ batch = []
+ random.shuffle(batch)
+ for res2 in batch:
+ yield res2
+
+class _Mirror:
+ # status values:
+ # 0: wants to send
+ # 1: wants to recv
+ # 2: completed, ok
+ # 3: completed, failed
+ def __init__(self, name, family, ip):
+ self.name = name
+ self.family = family
+ self.ip = ip
+ self.socket = socket.socket(family, socket.SOCK_STREAM)
+ self.socket.setblocking(0)
+ self.started = time.time()
+ try:
+ self.socket.connect((ip, 80))
+ except socket.error, e:
+ if e.errno != errno.EINPROGRESS:
+ raise
+ # now need to select for writing
+ self.status = 0
+
+ def write(self):
+ url = 'last-modified'
+ if self.name == 'a.pypi.python.org':
+ # the master server doesn't provide last-modified,
+ # as that would be pointless. Instead, /daytime can be
+ # used as an indication of currency and responsiveness.
+ url = 'daytime'
+ self.socket.send('GET /%s HTTP/1.0\r\n'
+ 'Host: %s\r\n'
+ '\r\n' % (url, self.name))
+ self.status = 1
+
+ def read(self):
+ data = self.socket.recv(1200)
+ self.response_time = time.time()-self.started
+ # response should be much shorter
+ assert len(data) < 1200
+ self.socket.close()
+ data = data.splitlines()
+ if data[0].split()[1] == '200':
+ # ok
+ data = data[-1]
+ try:
+ self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S")
+ self.status = 2 # complete
+ except ValueError:
+ self.status = 3 # failed
+ else:
+ self.status = 3
+
+ def failed(self):
+ self.socket.close()
+ self.status = 3
+
+ def results(self):
+ return self.name, self.family, self.ip, self.response_time, self.last_modified
+
+def _select(mirrors):
+ # perform select call on mirrors dictionary
+ rlist = []
+ wlist = []
+ xlist = []
+ for m in mirrors.values():
+ if m.status == 0:
+ wlist.append(m.socket)
+ xlist.append(m.socket)
+ elif m.status == 1:
+ rlist.append(m.socket)
+ xlist.append(m.socket)
+ rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0)
+ completed = []
+ for s in wlist:
+ mirrors[s].write()
+ for s in rlist:
+ m = mirrors[s]
+ del mirrors[s]
+ m.read()
+ if m.status == 2:
+ completed.append(m)
+ for s in xlist:
+ mirrors[s].failed()
+ del mirrors[s]
+ return completed
+
+def _close(mirrors):
+ for m in mirrors:
+ m.close()
+
+def _newest(mirrors, amount=1):
+ mirrors.sort(key=lambda m: m.last_modified)
+ results = [mirror.results() for mirror in mirrors[-amount:]]
+ if amount == 1:
+ return results[0]
+ return results[::-1]
+
+
+def find_mirrors(start_with='a',
+ good_age=30*60,
+ slow_mirrors_wait=5,
+ prefer_fastest=True,
+ amount=1):
+ """
+ find_mirrors(start_with, good_age, slow_mirrors_wait, prefer_fastest)
+ -> [(name, family, IP, response_time, last_modified)]
+
+ Find a PyPI mirror matching given criteria.
+ start_with indicates the first mirror that should be considered (defaults to 'a').
+ If prefer_fastest is True, it stops with the first mirror responding. Mirrors 'compete'
+ against each other in randomly-shuffled batches of 10.
+ If this procedure goes on for longer than slow_mirrors_wait (default 5s) and prefer_fastest
+ is false, return even if not all mirrors have been responding.
+ If no matching mirror can be found, the newest one that did response is returned.
+ If no mirror can be found at all, ValueError is raised
+ """
+ started = time.time()
+ good_mirrors = []
+ pending_mirrors = {} # socket:mirror
+ good_last_modified = datetime.datetime.utcnow() - datetime.timedelta(seconds=good_age)
+ for host, family, ip in _batched_mirror_list(start_with):
+ try:
+ m = _Mirror(host, family, ip)
+ except socket.error:
+ continue
+ pending_mirrors[m.socket] = m
+ for m in _select(pending_mirrors):
+ if prefer_fastest and m.last_modified > good_last_modified:
+ _close(pending_mirrors)
+ return m.results()
+ else:
+ good_mirrors.append(m)
+
+ while pending_mirrors:
+ if time.time() > started + slow_mirrors_wait and good_mirrors:
+ # if we have looked for 5s for a mirror, and we already have one
+ # return the newest one
+ _close(pending_mirrors)
+ return _newest(good_mirrors, amount)
+ for m in _select(pending_mirrors):
+ if prefer_fastest and m.last_modified > good_last_modified:
+ _close(pending_mirrors)
+ return [m.results()]
+ else:
+ good_mirrors.append(m)
+ if not good_mirrors:
+ raise ValueError("No mirrors found")
+ return _newest(good_mirrors, amount)
+
+# Distribute and use freely; there are no restrictions on further
+# dissemination and usage except those imposed by the laws of your
+# country of residence. This software is provided "as is" without
+# warranty of fitness for use or suitability for any purpose, express
+# or implied. Use at your own risk or not at all.
+"""
+Verify a DSA signature, for use with PyPI mirrors.
+
+Originally copied from PyPI's own code:
+https://svn.python.org/packages/trunk/pypi/tools/verify.py
+
+Verification should use the following steps:
+1. Download the DSA key from http://pypi.python.org/serverkey, as key_string
+2. key = load_key(key_string)
+3. Download the package page, from <mirror>/simple/<package>/, as data
+4. Download the package signature, from <mirror>/serversig/<package>, as sig
+5. Check verify(key, data, sig)
+"""
+
+try:
+ from M2Crypto import EVP, DSA, BIO
+
+ def load_key(string):
+ """
+ load_key(string) -> key
+
+ Convert a PEM format public DSA key into
+ an internal representation.
+ """
+ return DSA.load_pub_key_bio(BIO.MemoryBuffer(string))
+
+ def verify(key, data, sig):
+ """
+ verify(key, data, sig) -> bool
+
+ Verify autenticity of the signature created by key for
+ data. data is the bytes that got signed; signature is the
+ bytes that represent the signature, using the sha1+DSA
+ algorithm. key is an internal representation of the DSA key
+ as returned from load_key."""
+ md = EVP.MessageDigest('sha1')
+ md.update(data)
+ digest = md.final()
+ return key.verify_asn1(digest, sig)
+
+except ImportError:
+
+ # DSA signature algorithm, taken from pycrypto 2.0.1
+ # The license terms are the same as the ones for this module.
+ def _inverse(u, v):
+ """
+ _inverse(u:long, u:long):long
+ Return the inverse of u mod v.
+ """
+ u3, v3 = _long(u), _long(v)
+ u1, v1 = _long(1), _long(0)
+ while v3 > 0:
+ q = u3 // v3
+ u1, v1 = v1, u1 - v1 * q
+ u3, v3 = v3, u3 - v3 * q
+ while u1 < 0:
+ u1 = u1 + v
+ return u1
+
+ def _verify(key, M, sig):
+ p, q, g, y = key
+ r, s = sig
+ if r <= 0 or r >= q or s <= 0 or s >= q:
+ return False
+ w = _inverse(s, q)
+ u1, u2 = (M * w) % q, (r * w) % q
+ v1 = pow(g, u1, p)
+ v2 = pow(y, u2, p)
+ v = (v1 * v2) % p
+ v = v % q
+ return v == r
+
+ # END OF pycrypto
+
+ def _bytes2int(b):
+ value = 0
+ for c in b:
+ value = value * 256 + ord(c)
+ return value
+
+ _SEQUENCE = 0x30 # cons
+ _INTEGER = 2 # prim
+ _BITSTRING = 3 # prim
+ _OID = 6 # prim
+
+ def _asn1parse(string):
+ tag = ord(string[0])
+ assert tag & 31 != 31 # only support one-byte tags
+ length = ord(string[1])
+ assert length != 128 # indefinite length not supported
+ pos = 2
+ if length > 128:
+ # multi-byte length
+ val = 0
+ length -= 128
+ val = _bytes2int(string[pos:pos + length])
+ pos += length
+ length = val
+ data = string[pos:pos + length]
+ rest = string[pos + length:]
+ assert pos + length <= len(string)
+ if tag == _SEQUENCE:
+ result = []
+ while data:
+ value, data = _asn1parse(data)
+ result.append(value)
+ elif tag == _INTEGER:
+ assert ord(data[0]) < 128 # negative numbers not supported
+ result = 0
+ for c in data:
+ result = result * 256 + ord(c)
+ elif tag == _BITSTRING:
+ result = data
+ elif tag == _OID:
+ result = data
+ else:
+ raise ValueError("Unsupported tag %x" % tag)
+ return (tag, result), rest
+
+ def load_key(string):
+ """
+ load_key(string) -> key
+
+ Convert a PEM format public DSA key into
+ an internal representation."""
+ lines = [line.strip() for line in string.splitlines()]
+ assert lines[0] == b("-----BEGIN PUBLIC KEY-----")
+ assert lines[-1] == b("-----END PUBLIC KEY-----")
+ data = decode_base64(''.join([u(line) for line in lines[1:-1]]))
+ spki, rest = _asn1parse(data)
+ assert not rest
+ # SubjectPublicKeyInfo ::= SEQUENCE {
+ # algorithm AlgorithmIdentifier,
+ # subjectPublicKey BIT STRING }
+ assert spki[0] == _SEQUENCE
+ algoid, key = spki[1]
+ assert key[0] == _BITSTRING
+ key = key[1]
+ # AlgorithmIdentifier ::= SEQUENCE {
+ # algorithm OBJECT IDENTIFIER,
+ # parameters ANY DEFINED BY algorithm OPTIONAL }
+ assert algoid[0] == _SEQUENCE
+ algorithm, parameters = algoid[1]
+ # dsaEncryption
+ # assert algorithm[0] == _OID and algorithm[1] == '*\x86H\xce8\x04\x01'
+ # Dss-Parms ::= SEQUENCE {
+ # p INTEGER,
+ # q INTEGER,
+ # g INTEGER }
+ assert parameters[0] == _SEQUENCE
+ p, q, g = parameters[1]
+ assert p[0] == q[0] == g[0] == _INTEGER
+ p, q, g = p[1], q[1], g[1]
+ # Parse bit string value as integer
+ # assert key[0] == '\0' # number of bits multiple of 8
+ y, rest = _asn1parse(key[1:])
+ assert not rest
+ assert y[0] == _INTEGER
+ y = y[1]
+ return p, q, g, y
+
+ def verify(key, data, sig):
+ """
+ verify(key, data, sig) -> bool
+
+ Verify autenticity of the signature created by key for
+ data. data is the bytes that got signed; signature is the
+ bytes that represent the signature, using the sha1+DSA
+ algorithm. key is an internal representation of the DSA key
+ as returned from load_key."""
+ sha = hashlib.sha1()
+ sha.update(data)
+ data = sha.digest()
+ data = _bytes2int(data)
+ # Dss-Sig-Value ::= SEQUENCE {
+ # r INTEGER,
+ # s INTEGER }
+ sig, rest = _asn1parse(sig)
+ assert not rest
+ assert sig[0] == _SEQUENCE
+ r, s = sig[1]
+ assert r[0] == s[0] == _INTEGER
+ sig = r[1], s[1]
+ return _verify(key, data, sig)