diff options
author | Neal Gompa (ニール・ゴンパ) <ngompa13@gmail.com> | 2019-02-25 07:01:54 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-25 07:01:54 -0500 |
commit | 506b1012c67fd2f1de1f9c2d126bd3be07aa3664 (patch) | |
tree | a84ea4c2ae0a377d047748a6cbbe29ad3cdb2604 /urlgrabber/mirror.py | |
parent | ade507d18786ad6a148b71088245e154256bc9cb (diff) | |
parent | 8ca6fba2db8d481e440032610b2a7eba775046ff (diff) | |
download | urlgrabber-506b1012c67fd2f1de1f9c2d126bd3be07aa3664.tar.gz |
Merge pull request #9 from keszybz/py3k
Python3 compatibility
Diffstat (limited to 'urlgrabber/mirror.py')
-rw-r--r-- | urlgrabber/mirror.py | 86 |
1 files changed, 50 insertions, 36 deletions
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py index 988a309..75f0bcb 100644 --- a/urlgrabber/mirror.py +++ b/urlgrabber/mirror.py @@ -9,9 +9,9 @@ # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, # Boston, MA 02111-1307 USA # This file is part of urlgrabber, a high-level cross-protocol url-grabber @@ -93,14 +93,27 @@ CUSTOMIZATION import sys import random -import thread # needed for locking to make this threadsafe -from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 -from grabber import _run_callback, _do_raise -from grabber import exception2msg -from grabber import _TH +if sys.version_info >= (3,): + # We use a version check because python2 also has _thread + import _thread as thread +else: + import thread -def _(st): +try: + import urllib.parse as urlparse +except ImportError: + import urlparse + +from six import string_types + +from .grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 +from .grabber import _run_callback, _do_raise +from .grabber import exception2msg +from .grabber import _TH +from .grabber import _bytes_repr + +def _(st): return st class GrabRequest: @@ -142,7 +155,7 @@ class MirrorGroup: In addition to the required arguments "grabber" and "mirrors", MirrorGroup also takes the following optional arguments: - + default_action A dict that describes the actions to be taken upon failure @@ -173,7 +186,7 @@ class MirrorGroup: or by returning an action dict from the failure_callback return {'fail':0} in increasing precedence. - + If all three of these were done, the net result would be: {'increment': 0, # set in method 'increment_master': 1, # class default @@ -278,19 +291,14 @@ class MirrorGroup: # methods, they will be stripped before getting passed on to the # grabber options = ['default_action', 'failure_callback'] - + def _process_kwargs(self, kwargs): self.failure_callback = kwargs.get('failure_callback') self.default_action = kwargs.get('default_action') - + def _parse_mirrors(self, mirrors): - parsed_mirrors = [] - for m in mirrors: - if isinstance(m, basestring): - m = {'mirror': _to_utf8(m)} - parsed_mirrors.append(m) - return parsed_mirrors - + return [{'mirror':_to_utf8(m)} for m in mirrors] + def _load_gr(self, gr): # OVERRIDE IDEAS: # shuffle gr list @@ -316,7 +324,7 @@ class MirrorGroup: # the callback) cb = gr.kw.get('failure_callback') or self.failure_callback if cb: - if type(cb) == type( () ): + if isinstance(cb, tuple): cb, args, kwargs = cb else: args, kwargs = (), {} @@ -351,7 +359,7 @@ class MirrorGroup: urlopen, there's no good way for the mirror group to know that an error occurs mid-download (it's already returned and given you the file object). - + remove --- can have several values 0 do not remove the mirror from the list 1 remove the mirror for this download only @@ -373,7 +381,7 @@ class MirrorGroup: self._next += 1 if self._next >= len(self.mirrors): self._next = 0 self._lock.release() - + if action.get('remove', 1): del gr.mirrors[gr._next] elif action.get('increment', 1): @@ -381,9 +389,9 @@ class MirrorGroup: if gr._next >= len(gr.mirrors): gr._next = 0 if DEBUG: - grm = [m['mirror'] for m in gr.mirrors] + grm = [m['mirror'].decode() for m in gr.mirrors] DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) - selfm = [m['mirror'] for m in self.mirrors] + selfm = [m['mirror'].decode() for m in self.mirrors] DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) ##################################################################### @@ -394,11 +402,17 @@ class MirrorGroup: # by overriding the configuration methods :) def _join_url(self, base_url, rel_url): - if base_url.endswith('/') or rel_url.startswith('/'): - return base_url + rel_url + (scheme, netloc, path, query, fragid) = urlparse.urlsplit(base_url) + + if isinstance(base_url, bytes): + if not isinstance(rel_url, bytes): + rel_url = rel_url.encode('utf8') + sep = b'' if path.endswith(b'/') or rel_url.startswith(b'/') else b'/' else: - return base_url + '/' + rel_url - + sep = '' if path.endswith('/') or rel_url.startswith('/') else '/' + + return urlparse.urlunsplit((scheme, netloc, path + sep + rel_url, query, fragid)) + def _mirror_try(self, func, url, kw): gr = GrabRequest() gr.func = func @@ -412,7 +426,7 @@ class MirrorGroup: except KeyError: pass tries = 0 - while 1: + while True: tries += 1 mirrorchoice = self._get_mirror(gr) fullurl = self._join_url(mirrorchoice['mirror'], gr.url) @@ -420,10 +434,10 @@ class MirrorGroup: # apply mirrorchoice kwargs on top of grabber.opts opts = grabber.opts.derive(**mirrorchoice.get('kwargs', {})) func_ref = getattr(grabber, func) - if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) + if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', _bytes_repr(url), _bytes_repr(fullurl)) try: return func_ref( *(fullurl,), opts=opts, **kw ) - except URLGrabError, e: + except URLGrabError as e: if DEBUG: DEBUG.info('MIRROR: failed') gr.errors.append((fullurl, exception2msg(e))) obj = CallbackObject() @@ -437,7 +451,7 @@ class MirrorGroup: def urlgrab(self, url, filename=None, **kwargs): kw = dict(kwargs) kw['filename'] = filename - if kw.get('async'): + if kw.get('async_') or kw.get('async'): # enable mirror failovers in async path kw['mirror_group'] = self, [], {}, set() kw['relative_url'] = url @@ -446,10 +460,10 @@ class MirrorGroup: func = 'urlgrab' try: return self._mirror_try(func, url, kw) - except URLGrabError, e: + except URLGrabError as e: obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs) return _run_callback(kwargs.get('failfunc', _do_raise), obj) - + def urlopen(self, url, **kwargs): kw = dict(kwargs) func = 'urlopen' @@ -460,7 +474,7 @@ class MirrorGroup: kw['limit'] = limit func = 'urlread' return self._mirror_try(func, url, kw) - + class MGRandomStart(MirrorGroup): """A mirror group that starts at a random mirror in the list. |