diff options
author | mstenner <mstenner> | 2005-02-14 21:55:06 +0000 |
---|---|---|
committer | mstenner <mstenner> | 2005-02-14 21:55:06 +0000 |
commit | c3b8f50636637fb6ede6187d2428202ba123e2d4 (patch) | |
tree | 7243eef9195dd55e76e7f82f6f4b599d6457500e | |
parent | 844d48d85e54ac6352f1eb17c5b523f0d0fc7187 (diff) | |
download | urlgrabber-c3b8f50636637fb6ede6187d2428202ba123e2d4.tar.gz |
Fixed python 2.4 bug - added .code attribute to returned file objects.
Changed keepalive.HANDLE_ERRORS behavior for the way 2.4 does things.
-rw-r--r-- | test/test_keepalive.py | 32 | ||||
-rw-r--r-- | test/test_mirror.py | 9 | ||||
-rw-r--r-- | urlgrabber/byterange.py | 8 | ||||
-rw-r--r-- | urlgrabber/grabber.py | 5 | ||||
-rw-r--r-- | urlgrabber/keepalive.py | 39 |
5 files changed, 68 insertions, 25 deletions
diff --git a/test/test_keepalive.py b/test/test_keepalive.py index 9d4cfb9..f0d18d2 100644 --- a/test/test_keepalive.py +++ b/test/test_keepalive.py @@ -21,7 +21,7 @@ """keepalive.py tests""" -# $Id: test_keepalive.py,v 1.8 2004/03/31 17:02:00 mstenner Exp $ +# $Id: test_keepalive.py,v 1.9 2005/02/14 21:55:06 mstenner Exp $ import sys import os @@ -30,7 +30,7 @@ import urllib2 import threading import re -from urllib2 import URLError +from urllib2 import URLError, HTTPError from base_test_code import * @@ -92,6 +92,8 @@ class HTTPErrorTests(TestCase): def setUp(self): self.kh = keepalive.HTTPHandler() self.opener = urllib2.build_opener(self.kh) + import sys + self.python_version = map(int, sys.version.split()[0].split('.')) def tearDown(self): self.kh.close_all() @@ -121,10 +123,15 @@ class HTTPErrorTests(TestCase): def test_404_handler_off(self): "test that 404 works without fancy handler" keepalive.HANDLE_ERRORS = 0 - fo = self.opener.open(ref_404) - data = fo.read() - fo.close() - self.assertEqual((fo.status, fo.reason), (404, 'Not Found')) + ## see the HANDLE_ERRORS note in keepalive.py for discussion of + ## the changes in python 2.4 + if self.python_version >= [2, 4]: + self.assertRaises(URLError, self.opener.open, ref_404) + else: + fo = self.opener.open(ref_404) + data = fo.read() + fo.close() + self.assertEqual((fo.status, fo.reason), (404, 'Not Found')) def test_403_handler_on(self): "test that 403 works with fancy handler" @@ -134,10 +141,15 @@ class HTTPErrorTests(TestCase): def test_403_handler_off(self): "test that 403 works without fancy handler" keepalive.HANDLE_ERRORS = 0 - fo = self.opener.open(ref_403) - data = fo.read() - fo.close() - self.assertEqual((fo.status, fo.reason), (403, 'Forbidden')) + ## see the HANDLE_ERRORS note in keepalive.py for discussion of + ## the changes in python 2.4 + if self.python_version >= [2, 4]: + self.assertRaises(URLError, self.opener.open, ref_403) + else: + fo = self.opener.open(ref_403) + data = fo.read() + fo.close() + self.assertEqual((fo.status, fo.reason), (403, 'Forbidden')) class DroppedConnectionTests(TestCase): def setUp(self): diff --git a/test/test_mirror.py b/test/test_mirror.py index 0bb266f..fc905ef 100644 --- a/test/test_mirror.py +++ b/test/test_mirror.py @@ -21,7 +21,7 @@ """mirror.py tests""" -# $Id: test_mirror.py,v 1.9 2004/08/20 19:30:24 mstenner Exp $ +# $Id: test_mirror.py,v 1.10 2005/02/14 21:55:06 mstenner Exp $ import sys import os @@ -104,12 +104,13 @@ class CallbackTests(TestCase): def test_failure_callback(self): "test that MG executes the failure callback correctly" tricky_list = [] - def failure_callback(cb_obj, tl): tl.append(str(cb_obj.exception)) + def failure_callback(cb_obj, tl): + tl.append(str(cb_obj.exception)) self.mg.failure_callback = failure_callback, (tricky_list, ), {} data = self.mg.urlread('reference') self.assert_(data == reference_data) - self.assertEquals(tricky_list, - ['[Errno 4] IOError: HTTP Error 403: Forbidden']) + self.assertEquals(tricky_list[0][:33], + '[Errno 4] IOError: HTTP Error 403') def test_callback_reraise(self): "test that the callback can correctly re-raise the exception" diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py index 8f1ebe3..94f42a0 100644 --- a/urlgrabber/byterange.py +++ b/urlgrabber/byterange.py @@ -17,7 +17,7 @@ # This file is part of urlgrabber, a high-level cross-protocol url-grabber # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko -# $Id: byterange.py,v 1.8 2005/02/04 16:27:44 rtomayko Exp $ +# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ import os import stat @@ -60,7 +60,10 @@ class HTTPRangeHandler(urllib2.BaseHandler): def http_error_206(self, req, fp, code, msg, hdrs): # 206 Partial Content Response - return urllib.addinfourl(fp, hdrs, req.get_full_url()) + r = urllib.addinfourl(fp, hdrs, req.get_full_url()) + r.code = code + r.msg = msg + return r def http_error_416(self, req, fp, code, msg, hdrs): # HTTP's Range Not Satisfiable error @@ -233,6 +236,7 @@ class FileRangeHandler(urllib2.FileHandler): (mtype or 'text/plain', size, modified))) return urllib.addinfourl(fo, headers, 'file:'+file) + # FTP Range Support # Unfortunately, a large amount of base FTP code had to be copied # from urllib and urllib2 in order to insert the FTP REST command. diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index 4484940..ecf9596 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -279,7 +279,7 @@ BANDWIDTH THROTTLING """ -# $Id: grabber.py,v 1.33 2005/01/14 18:21:41 rtomayko Exp $ +# $Id: grabber.py,v 1.34 2005/02/14 21:55:07 mstenner Exp $ import os import os.path @@ -834,7 +834,8 @@ class URLGrabberFileObject: except RangeError, e: raise URLGrabError(9, _('%s') % (e, )) except IOError, e: - if hasattr(e, 'reason') and isinstance(e.reason, TimeoutError): + if hasattr(e, 'reason') and have_socket_timeout and \ + isinstance(e.reason, TimeoutError): raise URLGrabError(12, _('Timeout: %s') % (e, )) else: raise URLGrabError(4, _('IOError: %s') % (e, )) diff --git a/urlgrabber/keepalive.py b/urlgrabber/keepalive.py index 7fc006c..5c6b426 100644 --- a/urlgrabber/keepalive.py +++ b/urlgrabber/keepalive.py @@ -73,14 +73,33 @@ EXTRA ATTRIBUTES AND METHODS urllib2 tries to do clever things with error codes 301, 302, 401, and 407, and it wraps the object upon return. - You can optionally set the module-level global HANDLE_ERRORS to 0, - in which case the handler will always return the object directly. - If you like the fancy handling of errors, don't do this. If you - prefer to see your error codes, then do. + For python versions earlier than 2.4, you can avoid this fancy error + handling by setting the module-level global HANDLE_ERRORS to zero. + You see, prior to 2.4, it's the HTTP Handler's job to determine what + to handle specially, and what to just pass up. HANDLE_ERRORS == 0 + means "pass everything up". In python 2.4, however, this job no + longer belongs to the HTTP Handler and is now done by a NEW handler, + HTTPErrorProcessor. Here's the bottom line: + + python version < 2.4 + HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as + errors + HANDLE_ERRORS == 0 pass everything up, error processing is + left to the calling code + python version >= 2.4 + HANDLE_ERRORS == 1 pass up 200, treat the rest as errors + HANDLE_ERRORS == 0 (default) pass everything up, let the + other handlers (specifically, + HTTPErrorProcessor) decide what to do + + In practice, setting the variable either way makes little difference + in python 2.4, so for the most consistent behavior across versions, + you probably just want to use the defaults, which will give you + exceptions on errors. """ -# $Id: keepalive.py,v 1.8 2004/03/31 17:02:01 mstenner Exp $ +# $Id: keepalive.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ import urllib2 import httplib @@ -89,8 +108,12 @@ import thread DEBUG = 0 def DBPRINT(*args): print ' '.join(args) -HANDLE_ERRORS = 1 +import sys +_python_version = map(int, sys.version.split()[0].split('.')) +if _python_version < [2, 4]: HANDLE_ERRORS = 1 +else: HANDLE_ERRORS = 0 + class ConnectionManager: """ The connection manager must be able to: @@ -227,7 +250,8 @@ class HTTPHandler(urllib2.HTTPHandler): r._host = host r._url = req.get_full_url() r._connection = h - + r.code = r.status + if r.status == 200 or not HANDLE_ERRORS: return r else: @@ -326,6 +350,7 @@ class HTTPResponse(httplib.HTTPResponse): else: # 2.2 doesn't httplib.HTTPResponse.__init__(self, sock, debuglevel) self.fileno = sock.fileno + self.code = None self._rbuf = '' self._rbufsize = 8096 self._handler = None # inserted by the handler later |