summaryrefslogtreecommitdiff
path: root/pip/mirrors.py
blob: 52992b4e91ebca20086acfc1925d7c2c4a44c011 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
"""
Library to support tools that access PyPI mirrors. The following
functional areas are covered:
- mirror selection (find_mirror)
- mirror verification
- key rollover
"""

import datetime
import hashlib
import errno
import random
import select
import socket
import time
from pip.backwardcompat import (b, u, _ord as ord,
                                decode_base64, _long)


def _mirror_list(first):
    """
    Generator producing all mirror names
    """
    ord_a = ord('a')
    try:
        last = socket.gethostbyname_ex('last.pypi.python.org')
    except socket.gaierror:
        return
    cur_index = ord(first) - ord_a
    cur = first + '.pypi.python.org'
    while True:
        for family, _, _, _, sockaddr in socket.getaddrinfo(cur, 0, 0, socket.SOCK_STREAM):
            yield cur, family, sockaddr[0]
        if last[0] == cur:
            break
        cur_index += 1
        if cur_index < 26:
            # a..z
            cur = chr(ord_a + cur_index)
        elif cur_index > 701:
            raise ValueError('too many mirrors')
        else:
            # aa, ab, ... zz
            cur = divmod(cur_index, 26)
            cur = chr(ord_a - 1 + cur[0]) + chr(ord_a + cur[1])
        cur += '.pypi.python.org'

def _batched_mirror_list(first):
    """
    Generator that does DNS lookups in batches of 10, and shuffles them.
    """
    batch = []
    for res in _mirror_list(first):
        batch.append(res)
        if len(batch) == 10:
            random.shuffle(batch)
            for res2 in batch:
                yield res2
            batch = []
    random.shuffle(batch)
    for res2 in batch:
        yield res2

class _Mirror:
    # status values:
    # 0: wants to send
    # 1: wants to recv
    # 2: completed, ok
    # 3: completed, failed
    def __init__(self, name, family, ip):
        self.name = name
        self.family = family
        self.ip = ip
        self.socket = socket.socket(family, socket.SOCK_STREAM)
        self.socket.setblocking(0)
        self.started = time.time()
        try:
            self.socket.connect((ip, 80))
        except socket.error, e:
            if e.errno != errno.EINPROGRESS:
                raise
        # now need to select for writing
        self.status = 0

    def write(self):
        url = 'last-modified'
        if self.name == 'a.pypi.python.org':
            # the master server doesn't provide last-modified,
            # as that would be pointless. Instead, /daytime can be
            # used as an indication of currency and responsiveness.
            url = 'daytime'
        self.socket.send('GET /%s HTTP/1.0\r\n'
                         'Host: %s\r\n'
                         '\r\n' % (url, self.name))
        self.status = 1

    def read(self):
        data = self.socket.recv(1200)
        self.response_time = time.time()-self.started
        # response should be much shorter
        assert len(data) < 1200
        self.socket.close()
        data = data.splitlines()
        if data[0].split()[1] == '200':
            # ok
            data = data[-1]
            try:
                self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S")
                self.status = 2 # complete
            except ValueError:
                self.status = 3 # failed
        else:
            self.status = 3

    def failed(self):
        self.socket.close()
        self.status = 3

    def results(self):
        return self.name, self.family, self.ip, self.response_time, self.last_modified

def _select(mirrors):
    # perform select call on mirrors dictionary
    rlist = []
    wlist = []
    xlist = []
    for m in mirrors.values():
        if m.status == 0:
            wlist.append(m.socket)
            xlist.append(m.socket)
        elif m.status == 1:
            rlist.append(m.socket)
            xlist.append(m.socket)
    rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0)
    completed = []
    for s in wlist:
        mirrors[s].write()
    for s in rlist:
        m = mirrors[s]
        del mirrors[s]
        m.read()
        if m.status == 2:
            completed.append(m)
    for s in xlist:
        mirrors[s].failed()
        del mirrors[s]
    return completed

def _close(mirrors):
    for m in mirrors:
        m.close()

def _newest(mirrors, amount=1):
    mirrors.sort(key=lambda m: m.last_modified)
    results = [mirror.results() for mirror in mirrors[-amount:]]
    if amount == 1:
        return results[0]
    return results[::-1]


def find_mirrors(start_with='a',
                 good_age=30*60,
                 slow_mirrors_wait=5,
                 prefer_fastest=True,
                 amount=1):
    """
    find_mirrors(start_with, good_age, slow_mirrors_wait, prefer_fastest)
       -> [(name, family, IP, response_time, last_modified)]

    Find a PyPI mirror matching given criteria.
    start_with indicates the first mirror that should be considered (defaults to 'a').
    If prefer_fastest is True, it stops with the first mirror responding. Mirrors 'compete'
    against each other in randomly-shuffled batches of 10.
    If this procedure goes on for longer than slow_mirrors_wait (default 5s) and prefer_fastest
    is false, return even if not all mirrors have been responding.
    If no matching mirror can be found, the newest one that did response is returned.
    If no mirror can be found at all, ValueError is raised
    """
    started = time.time()
    good_mirrors = []
    pending_mirrors = {} # socket:mirror
    good_last_modified = datetime.datetime.utcnow() - datetime.timedelta(seconds=good_age)
    for host, family, ip in _batched_mirror_list(start_with):
        try:
            m = _Mirror(host, family, ip)
        except socket.error:
            continue
        pending_mirrors[m.socket] = m
        for m in _select(pending_mirrors):
            if prefer_fastest and m.last_modified > good_last_modified:
                _close(pending_mirrors)
                return m.results()
            else:
                good_mirrors.append(m)

    while pending_mirrors:
        if time.time() > started + slow_mirrors_wait and good_mirrors:
            # if we have looked for 5s for a mirror, and we already have one
            # return the newest one
            _close(pending_mirrors)
            return _newest(good_mirrors, amount)
        for m in _select(pending_mirrors):
            if prefer_fastest and m.last_modified > good_last_modified:
                _close(pending_mirrors)
                return [m.results()]
            else:
                good_mirrors.append(m)
    if not good_mirrors:
        raise ValueError("No mirrors found")
    return _newest(good_mirrors, amount)

# Distribute and use freely; there are no restrictions on further
# dissemination and usage except those imposed by the laws of your
# country of residence.  This software is provided "as is" without
# warranty of fitness for use or suitability for any purpose, express
# or implied. Use at your own risk or not at all.
"""
Verify a DSA signature, for use with PyPI mirrors.

Originally copied from PyPI's own code:
https://svn.python.org/packages/trunk/pypi/tools/verify.py

Verification should use the following steps:
1. Download the DSA key from http://pypi.python.org/serverkey, as key_string
2. key = load_key(key_string)
3. Download the package page, from <mirror>/simple/<package>/, as data
4. Download the package signature, from <mirror>/serversig/<package>, as sig
5. Check verify(key, data, sig)
"""

try:
    from M2Crypto import EVP, DSA, BIO

    def load_key(string):
        """
        load_key(string) -> key

        Convert a PEM format public DSA key into
        an internal representation.
        """
        return DSA.load_pub_key_bio(BIO.MemoryBuffer(string))

    def verify(key, data, sig):
        """
        verify(key, data, sig) -> bool

        Verify autenticity of the signature created by key for
        data. data is the bytes that got signed; signature is the
        bytes that represent the signature, using the sha1+DSA
        algorithm. key is an internal representation of the DSA key
        as returned from load_key."""
        md = EVP.MessageDigest('sha1')
        md.update(data)
        digest = md.final()
        return key.verify_asn1(digest, sig)

except ImportError:

    # DSA signature algorithm, taken from pycrypto 2.0.1
    # The license terms are the same as the ones for this module.
    def _inverse(u, v):
        """
        _inverse(u:long, u:long):long
        Return the inverse of u mod v.
        """
        u3, v3 = _long(u), _long(v)
        u1, v1 = _long(1), _long(0)
        while v3 > 0:
            q = u3 // v3
            u1, v1 = v1, u1 - v1 * q
            u3, v3 = v3, u3 - v3 * q
        while u1 < 0:
            u1 = u1 + v
        return u1

    def _verify(key, M, sig):
        p, q, g, y = key
        r, s = sig
        if r <= 0 or r >= q or s <= 0 or s >= q:
            return False
        w = _inverse(s, q)
        u1, u2 = (M * w) % q, (r * w) % q
        v1 = pow(g, u1, p)
        v2 = pow(y, u2, p)
        v = (v1 * v2) % p
        v = v % q
        return v == r

    # END OF pycrypto

    def _bytes2int(b):
        value = 0
        for c in b:
            value = value * 256 + ord(c)
        return value

    _SEQUENCE = 0x30  # cons
    _INTEGER = 2      # prim
    _BITSTRING = 3    # prim
    _OID = 6          # prim

    def _asn1parse(string):
        tag = ord(string[0])
        assert tag & 31 != 31  # only support one-byte tags
        length = ord(string[1])
        assert length != 128  # indefinite length not supported
        pos = 2
        if length > 128:
            # multi-byte length
            val = 0
            length -= 128
            val = _bytes2int(string[pos:pos + length])
            pos += length
            length = val
        data = string[pos:pos + length]
        rest = string[pos + length:]
        assert pos + length <= len(string)
        if tag == _SEQUENCE:
            result = []
            while data:
                value, data = _asn1parse(data)
                result.append(value)
        elif tag == _INTEGER:
            assert ord(data[0]) < 128  # negative numbers not supported
            result = 0
            for c in data:
                result = result * 256 + ord(c)
        elif tag == _BITSTRING:
            result = data
        elif tag == _OID:
            result = data
        else:
            raise ValueError("Unsupported tag %x" % tag)
        return (tag, result), rest

    def load_key(string):
        """
        load_key(string) -> key

        Convert a PEM format public DSA key into
        an internal representation."""
        lines = [line.strip() for line in string.splitlines()]
        assert lines[0] == b("-----BEGIN PUBLIC KEY-----")
        assert lines[-1] == b("-----END PUBLIC KEY-----")
        data = decode_base64(''.join([u(line) for line in lines[1:-1]]))
        spki, rest = _asn1parse(data)
        assert not rest
        # SubjectPublicKeyInfo  ::=  SEQUENCE  {
        #  algorithm            AlgorithmIdentifier,
        #  subjectPublicKey     BIT STRING  }
        assert spki[0] == _SEQUENCE
        algoid, key = spki[1]
        assert key[0] == _BITSTRING
        key = key[1]
        # AlgorithmIdentifier  ::=  SEQUENCE  {
        #  algorithm               OBJECT IDENTIFIER,
        #  parameters              ANY DEFINED BY algorithm OPTIONAL  }
        assert algoid[0] == _SEQUENCE
        algorithm, parameters = algoid[1]
        # dsaEncryption
        # assert algorithm[0] == _OID and algorithm[1] == '*\x86H\xce8\x04\x01'
        # Dss-Parms  ::=  SEQUENCE  {
        #  p             INTEGER,
        #  q             INTEGER,
        #  g             INTEGER  }
        assert parameters[0] == _SEQUENCE
        p, q, g = parameters[1]
        assert p[0] == q[0] == g[0] == _INTEGER
        p, q, g = p[1], q[1], g[1]
        # Parse bit string value as integer
        # assert key[0] == '\0'  # number of bits multiple of 8
        y, rest = _asn1parse(key[1:])
        assert not rest
        assert y[0] == _INTEGER
        y = y[1]
        return p, q, g, y

    def verify(key, data, sig):
        """
        verify(key, data, sig) -> bool

        Verify autenticity of the signature created by key for
        data. data is the bytes that got signed; signature is the
        bytes that represent the signature, using the sha1+DSA
        algorithm. key is an internal representation of the DSA key
        as returned from load_key."""
        sha = hashlib.sha1()
        sha.update(data)
        data = sha.digest()
        data = _bytes2int(data)
        # Dss-Sig-Value  ::=  SEQUENCE  {
        #      r       INTEGER,
        #      s       INTEGER  }
        sig, rest = _asn1parse(sig)
        assert not rest
        assert sig[0] == _SEQUENCE
        r, s = sig[1]
        assert r[0] == s[0] == _INTEGER
        sig = r[1], s[1]
        return _verify(key, data, sig)