diff options
| author | martin.von.loewis <devnull@localhost> | 2010-09-20 08:46:00 +0000 |
|---|---|---|
| committer | martin.von.loewis <devnull@localhost> | 2010-09-20 08:46:00 +0000 |
| commit | ef2669dfb1f55f90d1ec00be3bdfb265992bccb5 (patch) | |
| tree | a28da194279d6e92569990d4bd21e5716ddef036 /tools/mirrorlib.py | |
| parent | 4ab1500d9662d08d9d192a131e2b65ab7e87a71c (diff) | |
| download | decorator-ef2669dfb1f55f90d1ec00be3bdfb265992bccb5.tar.gz | |
Create library to deal with PyPI mirrors.
Diffstat (limited to 'tools/mirrorlib.py')
| -rw-r--r-- | tools/mirrorlib.py | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/tools/mirrorlib.py b/tools/mirrorlib.py new file mode 100644 index 0000000..9e322b8 --- /dev/null +++ b/tools/mirrorlib.py @@ -0,0 +1,158 @@ +'''Library to support tools that access PyPI mirrors. The following +functional areas are covered: +- mirror selection (find_mirror) +- mirror verification +- key rollover +''' + +################## Mirror Selection ############################## +import socket, time, datetime, errno, select + +def _mirror_list(first): + '''Generator producing all mirror names''' + ord_a = ord('a') + last = socket.gethostbyname_ex('last.pypi.python.org') + cur_index = ord(first)-ord_a + cur = first+'.pypi.python.org' + while last[0] != cur: + yield cur, socket.gethostbyname(cur) + cur_index += 1 + if cur_index < 26: + # a..z + cur = chr(ord_a+cur_index) + elif cur_index > 701: + raise ValueError, 'too many mirrors' + else: + # aa, ab, ... zz + cur = divmod(cur_index, 26) + cur = chr(ord_a-1+cur[0])+chr(ord_a+cur[1]) + cur += '.pypi.python.org' + yield last[0], last[2][0] + +class _Mirror: + # status values: + # 0: wants to send + # 1: wants to recv + # 2: completed, ok + # 3: completed, failed + def __init__(self, name, ip): + self.name = name + self.ip = ip + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.setblocking(0) + self.started = time.time() + try: + self.socket.connect((name, 80)) + except socket.error, e: + if e.errno != errno.EINPROGRESS: + raise + # now need to select for writing + self.status = 0 + + def write(self): + self.socket.send('GET /last-modified HTTP/1.0\r\n' + 'Host: %s\r\n' + '\r\n' % self.name) + self.status = 1 + + def read(self): + data = self.socket.recv(1200) + self.response_time = time.time()-self.started + # response should be much shorter + assert len(data) < 1200 + self.socket.close() + data = data.splitlines() + if data[0].split()[1] == '200': + # ok + data = data[-1] + try: + self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S") + self.status = 2 # complete + except ValueError: + self.status = 3 # failed + else: + self.status = 3 + + def failed(self): + self.socket.close() + self.status = failed() + + def results(self): + return self.name, self.ip, self.response_time, self.last_modified + +def _select(mirrors): + # perform select call on mirrors dictionary + rlist = [] + wlist = [] + xlist = [] + for m in mirrors.values(): + if m.status == 0: + wlist.append(m.socket) + xlist.append(m.socket) + elif m.status == 1: + rlist.append(m.socket) + xlist.append(m.socket) + rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0) + completed = [] + for s in wlist: + mirrors[s].write() + for s in rlist: + m = mirrors[s] + del mirrors[s] + m.read() + if m.status == 2: + completed.append(m) + for s in xlist: + mirrors[s].failed() + del mirrors[s] + return completed + +def _close(mirrors): + for m in mirrors: + m.close() + +def _newest(mirrors): + if not mirrors: + raise ValueError, "no mirrors found" + mirrors.sort(key=lambda m:m.last_modified) + return mirrors[-1].results() + +def find_mirror(start_with='a', + good_response_time = 1, + good_age = 30*60, + max_wait = 5): + '''find_mirror(start_with, good_response_time, good_age, max_wait) -> name, IP, response_time, last_modified + Find a PyPI mirror matching given criteria. + start_with indicates the first mirror that should be considered (defaults to 'a'). + good_response_time is the maximum response time which lets this algorithm look no further; + likewise, good_age is the maximum age acceptable to the caller. + If this procedure goes on for longer than max_wait (default 5s), return even if + not all mirrors have been responding. + If no matching mirror can be found, the newest one that did response is returned.''' + started = time.time() + good_mirrors = [] + pending_mirrors = {} # socket:mirror + good_last_modified = datetime.datetime.utcnow()-datetime.timedelta(seconds=good_age) + for host, ip in _mirror_list(start_with): + m = _Mirror(host, ip) + pending_mirrors[m.socket] = m + for m in _select(pending_mirrors): + if m.response_time < good_response_time and m.last_modified > good_last_modified: + _close(pending_mirrors) + return m.results() + else: + good_mirrors.append(m) + + while pending_mirrors: + if time.time() > started+max_wait and good_mirrors: + # if we have looked for 5s for a mirror, and we already have one + # return the newest one + _close(pending) + return _newest(good_mirrors) + for m in _select(pending_mirrors): + if m.response_time < good_response_time and m.last_modified > good_last_modified: + _close(pending_mirrors) + return m.results() + else: + good_mirrors.append(m) + return _newest(good_mirrors) |
