diff options
author | rtomayko <rtomayko> | 2004-07-21 02:45:01 +0000 |
---|---|---|
committer | rtomayko <rtomayko> | 2004-07-21 02:45:01 +0000 |
commit | ff9e21d416dc5939b4d44c8c750100b1086d4d4e (patch) | |
tree | b0b096a2d7286d93d1870230a8e39c49cb0b0274 | |
parent | 289ac1bebcbc34bbd9ae05c2768bde1899df7d0a (diff) | |
download | urlgrabber-ff9e21d416dc5939b4d44c8c750100b1086d4d4e.tar.gz |
Fixed bug with urllib2.OpenerDirector caching.
Added cache_openers option (pass to any urlXXX function/method) to control OpenerDirector caching. The default is to cache openers.
-rw-r--r-- | urlgrabber/grabber.py | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index 4d1785c..fb574e7 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -255,7 +255,7 @@ BANDWIDTH THROTTLING """ -# $Id: grabber.py,v 1.23 2004/03/31 17:02:00 mstenner Exp $ +# $Id: grabber.py,v 1.24 2004/07/21 02:45:01 rtomayko Exp $ import os import os.path @@ -268,8 +268,6 @@ from stat import * # S_* and ST_* from urlgrabber import __version__ -# XXX: leaving this global may cause problems with -# multiple threads. -rtomayko auth_handler = urllib2.HTTPBasicAuthHandler( \ urllib2.HTTPPasswordMgrWithDefaultRealm()) @@ -460,7 +458,8 @@ class URLGrabberOptions: self.failure_callback = None self.prefix = None self.opener = None - + self.cache_openers = True + class URLGrabber: """Provides easy opening of URLs with a variety of options. @@ -666,11 +665,10 @@ class URLGrabberFileObject: if range_handlers and (self.opts.range or self.opts.reget): handlers.extend( range_handlers ) handlers.append( auth_handler ) - # Temporarily disabling this because it doesn't yet work - # correctly. Some reget tests fail. I really don't understand - # why, but some of the error handlers aren't set correctly. - #self._opener = CachedOpenerDirector(*handlers) - self._opener = urllib2.build_opener(*handlers) + if self.opts.cache_openers: + self._opener = CachedOpenerDirector(*handlers) + else: + self._opener = urllib2.build_opener(*handlers) # OK, I don't like to do this, but otherwise, we end up with # TWO user-agent headers. self._opener.addheaders = [] @@ -871,6 +869,8 @@ _handler_cache = [] def CachedOpenerDirector(*handlers): for (cached_handlers, opener) in _handler_cache: if cached_handlers == handlers: + for handler in opener.handlers: + handler.add_parent(opener) return opener opener = urllib2.build_opener(*handlers) _handler_cache.append( (handlers, opener) ) |