summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormstenner <mstenner>2005-02-14 21:55:06 +0000
committermstenner <mstenner>2005-02-14 21:55:06 +0000
commitc3b8f50636637fb6ede6187d2428202ba123e2d4 (patch)
tree7243eef9195dd55e76e7f82f6f4b599d6457500e
parent844d48d85e54ac6352f1eb17c5b523f0d0fc7187 (diff)
downloadurlgrabber-c3b8f50636637fb6ede6187d2428202ba123e2d4.tar.gz
Fixed python 2.4 bug - added .code attribute to returned file objects.
Changed keepalive.HANDLE_ERRORS behavior for the way 2.4 does things.
-rw-r--r--test/test_keepalive.py32
-rw-r--r--test/test_mirror.py9
-rw-r--r--urlgrabber/byterange.py8
-rw-r--r--urlgrabber/grabber.py5
-rw-r--r--urlgrabber/keepalive.py39
5 files changed, 68 insertions, 25 deletions
diff --git a/test/test_keepalive.py b/test/test_keepalive.py
index 9d4cfb9..f0d18d2 100644
--- a/test/test_keepalive.py
+++ b/test/test_keepalive.py
@@ -21,7 +21,7 @@
"""keepalive.py tests"""
-# $Id: test_keepalive.py,v 1.8 2004/03/31 17:02:00 mstenner Exp $
+# $Id: test_keepalive.py,v 1.9 2005/02/14 21:55:06 mstenner Exp $
import sys
import os
@@ -30,7 +30,7 @@ import urllib2
import threading
import re
-from urllib2 import URLError
+from urllib2 import URLError, HTTPError
from base_test_code import *
@@ -92,6 +92,8 @@ class HTTPErrorTests(TestCase):
def setUp(self):
self.kh = keepalive.HTTPHandler()
self.opener = urllib2.build_opener(self.kh)
+ import sys
+ self.python_version = map(int, sys.version.split()[0].split('.'))
def tearDown(self):
self.kh.close_all()
@@ -121,10 +123,15 @@ class HTTPErrorTests(TestCase):
def test_404_handler_off(self):
"test that 404 works without fancy handler"
keepalive.HANDLE_ERRORS = 0
- fo = self.opener.open(ref_404)
- data = fo.read()
- fo.close()
- self.assertEqual((fo.status, fo.reason), (404, 'Not Found'))
+ ## see the HANDLE_ERRORS note in keepalive.py for discussion of
+ ## the changes in python 2.4
+ if self.python_version >= [2, 4]:
+ self.assertRaises(URLError, self.opener.open, ref_404)
+ else:
+ fo = self.opener.open(ref_404)
+ data = fo.read()
+ fo.close()
+ self.assertEqual((fo.status, fo.reason), (404, 'Not Found'))
def test_403_handler_on(self):
"test that 403 works with fancy handler"
@@ -134,10 +141,15 @@ class HTTPErrorTests(TestCase):
def test_403_handler_off(self):
"test that 403 works without fancy handler"
keepalive.HANDLE_ERRORS = 0
- fo = self.opener.open(ref_403)
- data = fo.read()
- fo.close()
- self.assertEqual((fo.status, fo.reason), (403, 'Forbidden'))
+ ## see the HANDLE_ERRORS note in keepalive.py for discussion of
+ ## the changes in python 2.4
+ if self.python_version >= [2, 4]:
+ self.assertRaises(URLError, self.opener.open, ref_403)
+ else:
+ fo = self.opener.open(ref_403)
+ data = fo.read()
+ fo.close()
+ self.assertEqual((fo.status, fo.reason), (403, 'Forbidden'))
class DroppedConnectionTests(TestCase):
def setUp(self):
diff --git a/test/test_mirror.py b/test/test_mirror.py
index 0bb266f..fc905ef 100644
--- a/test/test_mirror.py
+++ b/test/test_mirror.py
@@ -21,7 +21,7 @@
"""mirror.py tests"""
-# $Id: test_mirror.py,v 1.9 2004/08/20 19:30:24 mstenner Exp $
+# $Id: test_mirror.py,v 1.10 2005/02/14 21:55:06 mstenner Exp $
import sys
import os
@@ -104,12 +104,13 @@ class CallbackTests(TestCase):
def test_failure_callback(self):
"test that MG executes the failure callback correctly"
tricky_list = []
- def failure_callback(cb_obj, tl): tl.append(str(cb_obj.exception))
+ def failure_callback(cb_obj, tl):
+ tl.append(str(cb_obj.exception))
self.mg.failure_callback = failure_callback, (tricky_list, ), {}
data = self.mg.urlread('reference')
self.assert_(data == reference_data)
- self.assertEquals(tricky_list,
- ['[Errno 4] IOError: HTTP Error 403: Forbidden'])
+ self.assertEquals(tricky_list[0][:33],
+ '[Errno 4] IOError: HTTP Error 403')
def test_callback_reraise(self):
"test that the callback can correctly re-raise the exception"
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 8f1ebe3..94f42a0 100644
--- a/urlgrabber/byterange.py
+++ b/urlgrabber/byterange.py
@@ -17,7 +17,7 @@
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-# $Id: byterange.py,v 1.8 2005/02/04 16:27:44 rtomayko Exp $
+# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
import os
import stat
@@ -60,7 +60,10 @@ class HTTPRangeHandler(urllib2.BaseHandler):
def http_error_206(self, req, fp, code, msg, hdrs):
# 206 Partial Content Response
- return urllib.addinfourl(fp, hdrs, req.get_full_url())
+ r = urllib.addinfourl(fp, hdrs, req.get_full_url())
+ r.code = code
+ r.msg = msg
+ return r
def http_error_416(self, req, fp, code, msg, hdrs):
# HTTP's Range Not Satisfiable error
@@ -233,6 +236,7 @@ class FileRangeHandler(urllib2.FileHandler):
(mtype or 'text/plain', size, modified)))
return urllib.addinfourl(fo, headers, 'file:'+file)
+
# FTP Range Support
# Unfortunately, a large amount of base FTP code had to be copied
# from urllib and urllib2 in order to insert the FTP REST command.
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 4484940..ecf9596 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -279,7 +279,7 @@ BANDWIDTH THROTTLING
"""
-# $Id: grabber.py,v 1.33 2005/01/14 18:21:41 rtomayko Exp $
+# $Id: grabber.py,v 1.34 2005/02/14 21:55:07 mstenner Exp $
import os
import os.path
@@ -834,7 +834,8 @@ class URLGrabberFileObject:
except RangeError, e:
raise URLGrabError(9, _('%s') % (e, ))
except IOError, e:
- if hasattr(e, 'reason') and isinstance(e.reason, TimeoutError):
+ if hasattr(e, 'reason') and have_socket_timeout and \
+ isinstance(e.reason, TimeoutError):
raise URLGrabError(12, _('Timeout: %s') % (e, ))
else:
raise URLGrabError(4, _('IOError: %s') % (e, ))
diff --git a/urlgrabber/keepalive.py b/urlgrabber/keepalive.py
index 7fc006c..5c6b426 100644
--- a/urlgrabber/keepalive.py
+++ b/urlgrabber/keepalive.py
@@ -73,14 +73,33 @@ EXTRA ATTRIBUTES AND METHODS
urllib2 tries to do clever things with error codes 301, 302, 401,
and 407, and it wraps the object upon return.
- You can optionally set the module-level global HANDLE_ERRORS to 0,
- in which case the handler will always return the object directly.
- If you like the fancy handling of errors, don't do this. If you
- prefer to see your error codes, then do.
+ For python versions earlier than 2.4, you can avoid this fancy error
+ handling by setting the module-level global HANDLE_ERRORS to zero.
+ You see, prior to 2.4, it's the HTTP Handler's job to determine what
+ to handle specially, and what to just pass up. HANDLE_ERRORS == 0
+ means "pass everything up". In python 2.4, however, this job no
+ longer belongs to the HTTP Handler and is now done by a NEW handler,
+ HTTPErrorProcessor. Here's the bottom line:
+
+ python version < 2.4
+ HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
+ errors
+ HANDLE_ERRORS == 0 pass everything up, error processing is
+ left to the calling code
+ python version >= 2.4
+ HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
+ HANDLE_ERRORS == 0 (default) pass everything up, let the
+ other handlers (specifically,
+ HTTPErrorProcessor) decide what to do
+
+ In practice, setting the variable either way makes little difference
+ in python 2.4, so for the most consistent behavior across versions,
+ you probably just want to use the defaults, which will give you
+ exceptions on errors.
"""
-# $Id: keepalive.py,v 1.8 2004/03/31 17:02:01 mstenner Exp $
+# $Id: keepalive.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
import urllib2
import httplib
@@ -89,8 +108,12 @@ import thread
DEBUG = 0
def DBPRINT(*args): print ' '.join(args)
-HANDLE_ERRORS = 1
+import sys
+_python_version = map(int, sys.version.split()[0].split('.'))
+if _python_version < [2, 4]: HANDLE_ERRORS = 1
+else: HANDLE_ERRORS = 0
+
class ConnectionManager:
"""
The connection manager must be able to:
@@ -227,7 +250,8 @@ class HTTPHandler(urllib2.HTTPHandler):
r._host = host
r._url = req.get_full_url()
r._connection = h
-
+ r.code = r.status
+
if r.status == 200 or not HANDLE_ERRORS:
return r
else:
@@ -326,6 +350,7 @@ class HTTPResponse(httplib.HTTPResponse):
else: # 2.2 doesn't
httplib.HTTPResponse.__init__(self, sock, debuglevel)
self.fileno = sock.fileno
+ self.code = None
self._rbuf = ''
self._rbufsize = 8096
self._handler = None # inserted by the handler later