summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrtomayko <rtomayko>2004-07-21 02:45:01 +0000
committerrtomayko <rtomayko>2004-07-21 02:45:01 +0000
commitff9e21d416dc5939b4d44c8c750100b1086d4d4e (patch)
treeb0b096a2d7286d93d1870230a8e39c49cb0b0274
parent289ac1bebcbc34bbd9ae05c2768bde1899df7d0a (diff)
downloadurlgrabber-ff9e21d416dc5939b4d44c8c750100b1086d4d4e.tar.gz
Fixed bug with urllib2.OpenerDirector caching.
Added cache_openers option (pass to any urlXXX function/method) to control OpenerDirector caching. The default is to cache openers.
-rw-r--r--urlgrabber/grabber.py18
1 files changed, 9 insertions, 9 deletions
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 4d1785c..fb574e7 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -255,7 +255,7 @@ BANDWIDTH THROTTLING
"""
-# $Id: grabber.py,v 1.23 2004/03/31 17:02:00 mstenner Exp $
+# $Id: grabber.py,v 1.24 2004/07/21 02:45:01 rtomayko Exp $
import os
import os.path
@@ -268,8 +268,6 @@ from stat import * # S_* and ST_*
from urlgrabber import __version__
-# XXX: leaving this global may cause problems with
-# multiple threads. -rtomayko
auth_handler = urllib2.HTTPBasicAuthHandler( \
urllib2.HTTPPasswordMgrWithDefaultRealm())
@@ -460,7 +458,8 @@ class URLGrabberOptions:
self.failure_callback = None
self.prefix = None
self.opener = None
-
+ self.cache_openers = True
+
class URLGrabber:
"""Provides easy opening of URLs with a variety of options.
@@ -666,11 +665,10 @@ class URLGrabberFileObject:
if range_handlers and (self.opts.range or self.opts.reget):
handlers.extend( range_handlers )
handlers.append( auth_handler )
- # Temporarily disabling this because it doesn't yet work
- # correctly. Some reget tests fail. I really don't understand
- # why, but some of the error handlers aren't set correctly.
- #self._opener = CachedOpenerDirector(*handlers)
- self._opener = urllib2.build_opener(*handlers)
+ if self.opts.cache_openers:
+ self._opener = CachedOpenerDirector(*handlers)
+ else:
+ self._opener = urllib2.build_opener(*handlers)
# OK, I don't like to do this, but otherwise, we end up with
# TWO user-agent headers.
self._opener.addheaders = []
@@ -871,6 +869,8 @@ _handler_cache = []
def CachedOpenerDirector(*handlers):
for (cached_handlers, opener) in _handler_cache:
if cached_handlers == handlers:
+ for handler in opener.handlers:
+ handler.add_parent(opener)
return opener
opener = urllib2.build_opener(*handlers)
_handler_cache.append( (handlers, opener) )