summaryrefslogtreecommitdiff
path: root/urlgrabber/mirror.py
diff options
context:
space:
mode:
authorNeal Gompa (ニール・ゴンパ) <ngompa13@gmail.com>2019-02-25 07:01:54 -0500
committerGitHub <noreply@github.com>2019-02-25 07:01:54 -0500
commit506b1012c67fd2f1de1f9c2d126bd3be07aa3664 (patch)
treea84ea4c2ae0a377d047748a6cbbe29ad3cdb2604 /urlgrabber/mirror.py
parentade507d18786ad6a148b71088245e154256bc9cb (diff)
parent8ca6fba2db8d481e440032610b2a7eba775046ff (diff)
downloadurlgrabber-506b1012c67fd2f1de1f9c2d126bd3be07aa3664.tar.gz
Merge pull request #9 from keszybz/py3k
Python3 compatibility
Diffstat (limited to 'urlgrabber/mirror.py')
-rw-r--r--urlgrabber/mirror.py86
1 files changed, 50 insertions, 36 deletions
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
index 988a309..75f0bcb 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -93,14 +93,27 @@ CUSTOMIZATION
import sys
import random
-import thread # needed for locking to make this threadsafe
-from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
-from grabber import _run_callback, _do_raise
-from grabber import exception2msg
-from grabber import _TH
+if sys.version_info >= (3,):
+ # We use a version check because python2 also has _thread
+ import _thread as thread
+else:
+ import thread
-def _(st):
+try:
+ import urllib.parse as urlparse
+except ImportError:
+ import urlparse
+
+from six import string_types
+
+from .grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
+from .grabber import _run_callback, _do_raise
+from .grabber import exception2msg
+from .grabber import _TH
+from .grabber import _bytes_repr
+
+def _(st):
return st
class GrabRequest:
@@ -142,7 +155,7 @@ class MirrorGroup:
In addition to the required arguments "grabber" and "mirrors",
MirrorGroup also takes the following optional arguments:
-
+
default_action
A dict that describes the actions to be taken upon failure
@@ -173,7 +186,7 @@ class MirrorGroup:
or by returning an action dict from the failure_callback
return {'fail':0}
in increasing precedence.
-
+
If all three of these were done, the net result would be:
{'increment': 0, # set in method
'increment_master': 1, # class default
@@ -278,19 +291,14 @@ class MirrorGroup:
# methods, they will be stripped before getting passed on to the
# grabber
options = ['default_action', 'failure_callback']
-
+
def _process_kwargs(self, kwargs):
self.failure_callback = kwargs.get('failure_callback')
self.default_action = kwargs.get('default_action')
-
+
def _parse_mirrors(self, mirrors):
- parsed_mirrors = []
- for m in mirrors:
- if isinstance(m, basestring):
- m = {'mirror': _to_utf8(m)}
- parsed_mirrors.append(m)
- return parsed_mirrors
-
+ return [{'mirror':_to_utf8(m)} for m in mirrors]
+
def _load_gr(self, gr):
# OVERRIDE IDEAS:
# shuffle gr list
@@ -316,7 +324,7 @@ class MirrorGroup:
# the callback)
cb = gr.kw.get('failure_callback') or self.failure_callback
if cb:
- if type(cb) == type( () ):
+ if isinstance(cb, tuple):
cb, args, kwargs = cb
else:
args, kwargs = (), {}
@@ -351,7 +359,7 @@ class MirrorGroup:
urlopen, there's no good way for the mirror group to know that
an error occurs mid-download (it's already returned and given
you the file object).
-
+
remove --- can have several values
0 do not remove the mirror from the list
1 remove the mirror for this download only
@@ -373,7 +381,7 @@ class MirrorGroup:
self._next += 1
if self._next >= len(self.mirrors): self._next = 0
self._lock.release()
-
+
if action.get('remove', 1):
del gr.mirrors[gr._next]
elif action.get('increment', 1):
@@ -381,9 +389,9 @@ class MirrorGroup:
if gr._next >= len(gr.mirrors): gr._next = 0
if DEBUG:
- grm = [m['mirror'] for m in gr.mirrors]
+ grm = [m['mirror'].decode() for m in gr.mirrors]
DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
- selfm = [m['mirror'] for m in self.mirrors]
+ selfm = [m['mirror'].decode() for m in self.mirrors]
DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
#####################################################################
@@ -394,11 +402,17 @@ class MirrorGroup:
# by overriding the configuration methods :)
def _join_url(self, base_url, rel_url):
- if base_url.endswith('/') or rel_url.startswith('/'):
- return base_url + rel_url
+ (scheme, netloc, path, query, fragid) = urlparse.urlsplit(base_url)
+
+ if isinstance(base_url, bytes):
+ if not isinstance(rel_url, bytes):
+ rel_url = rel_url.encode('utf8')
+ sep = b'' if path.endswith(b'/') or rel_url.startswith(b'/') else b'/'
else:
- return base_url + '/' + rel_url
-
+ sep = '' if path.endswith('/') or rel_url.startswith('/') else '/'
+
+ return urlparse.urlunsplit((scheme, netloc, path + sep + rel_url, query, fragid))
+
def _mirror_try(self, func, url, kw):
gr = GrabRequest()
gr.func = func
@@ -412,7 +426,7 @@ class MirrorGroup:
except KeyError: pass
tries = 0
- while 1:
+ while True:
tries += 1
mirrorchoice = self._get_mirror(gr)
fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
@@ -420,10 +434,10 @@ class MirrorGroup:
# apply mirrorchoice kwargs on top of grabber.opts
opts = grabber.opts.derive(**mirrorchoice.get('kwargs', {}))
func_ref = getattr(grabber, func)
- if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
+ if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', _bytes_repr(url), _bytes_repr(fullurl))
try:
return func_ref( *(fullurl,), opts=opts, **kw )
- except URLGrabError, e:
+ except URLGrabError as e:
if DEBUG: DEBUG.info('MIRROR: failed')
gr.errors.append((fullurl, exception2msg(e)))
obj = CallbackObject()
@@ -437,7 +451,7 @@ class MirrorGroup:
def urlgrab(self, url, filename=None, **kwargs):
kw = dict(kwargs)
kw['filename'] = filename
- if kw.get('async'):
+ if kw.get('async_') or kw.get('async'):
# enable mirror failovers in async path
kw['mirror_group'] = self, [], {}, set()
kw['relative_url'] = url
@@ -446,10 +460,10 @@ class MirrorGroup:
func = 'urlgrab'
try:
return self._mirror_try(func, url, kw)
- except URLGrabError, e:
+ except URLGrabError as e:
obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs)
return _run_callback(kwargs.get('failfunc', _do_raise), obj)
-
+
def urlopen(self, url, **kwargs):
kw = dict(kwargs)
func = 'urlopen'
@@ -460,7 +474,7 @@ class MirrorGroup:
kw['limit'] = limit
func = 'urlread'
return self._mirror_try(func, url, kw)
-
+
class MGRandomStart(MirrorGroup):
"""A mirror group that starts at a random mirror in the list.