summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-02-11 20:46:45 +0100
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-02-11 20:59:29 +0100
commit4d97612f94e9d0799ad7786b6e7c6865ac21cd46 (patch)
tree8cc467b20f321ac22de13d2b79f7067076985ef1
parent0772e448446ca1252a4b1012ad2f9d2c96e29d4c (diff)
downloadurlgrabber-4d97612f94e9d0799ad7786b6e7c6865ac21cd46.tar.gz
Remove trailing whitespace
-rw-r--r--test/grabberperf.py18
-rw-r--r--test/munittest.py10
-rw-r--r--test/runtests.py16
-rw-r--r--test/test_byterange.py44
-rw-r--r--test/test_grabber.py78
-rw-r--r--test/test_mirror.py32
-rw-r--r--test/threading/batchgrabber.py24
-rw-r--r--urlgrabber/byterange.py108
-rw-r--r--urlgrabber/grabber.py254
-rw-r--r--urlgrabber/mirror.py28
-rw-r--r--urlgrabber/progress.py54
11 files changed, 333 insertions, 333 deletions
diff --git a/test/grabberperf.py b/test/grabberperf.py
index 6eeaf71..4cf26f6 100644
--- a/test/grabberperf.py
+++ b/test/grabberperf.py
@@ -11,9 +11,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -46,7 +46,7 @@ def main():
# remove temp files
os.unlink(tempsrc)
os.unlink(tempdst)
-
+
def setuptemp(size):
if DEBUG: print 'writing %d KB to temporary file (%s).' % (size / 1024, tempsrc)
file = open(tempsrc, 'w', 1024)
@@ -55,7 +55,7 @@ def setuptemp(size):
file.write(chars[i % 10])
file.flush()
file.close()
-
+
def speedtest(size):
setuptemp(size)
full_times = []
@@ -70,7 +70,7 @@ def speedtest(size):
print 'not using progress meter'
else:
tpm = text_progress_meter(fo=open('/dev/null', 'w'))
-
+
# to address concerns that the overhead from the progress meter
# and throttling slow things down, we do this little test.
#
@@ -81,16 +81,16 @@ def speedtest(size):
# note: it _is_ even slower to direct the progress meter to a real
# tty or file, but I'm just interested in the overhead from _this_
# module.
-
+
# get it nicely cached before we start comparing
if DEBUG: print 'pre-caching'
for i in range(100):
urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies)
-
+
if DEBUG: print 'running speed test.'
reps = 500
for i in range(reps):
- if DEBUG:
+ if DEBUG:
print '\r%4i/%-4i' % (i+1, reps),
sys.stdout.flush()
t = time.time()
diff --git a/test/munittest.py b/test/munittest.py
index 437248d..f79a7cb 100644
--- a/test/munittest.py
+++ b/test/munittest.py
@@ -463,12 +463,12 @@ class TestSuite:
self._tests = []
self.addTests(tests)
self.description = description or '(no description)'
-
+
def __repr__(self):
return "<%s tests=%s>" % (_strclass(self.__class__), self._tests)
__str__ = __repr__
-
+
def shortDescription(self):
return self.description
@@ -494,7 +494,7 @@ class TestSuite:
def __call__(self, result):
try: result.startSuite(self)
except AttributeError: pass
-
+
for test in self._tests:
if result.shouldStop:
break
@@ -575,7 +575,7 @@ class TestLoader:
description = (description.splitlines()[0]).strip()
suite = self.suiteClass(instance_list, description)
return suite
-
+
def loadTestsFromModule(self, module):
"""Return a suite of all tests cases contained in the given module"""
tests = []
@@ -735,7 +735,7 @@ class _TextTestResult(TestResult):
except AttributeError: desc = '(no description)'
self.stream.writeln(desc)
self.depth += 1
-
+
def startTest(self, test):
TestResult.startTest(self, test)
if self.showAll:
diff --git a/test/runtests.py b/test/runtests.py
index c48bd1d..562edc0 100644
--- a/test/runtests.py
+++ b/test/runtests.py
@@ -1,19 +1,19 @@
#!/usr/bin/python
"""Usage: python runtests.py [OPTIONS]
-Quick script to run all unit tests from source directory
+Quick script to run all unit tests from source directory
(e.g. without having to install.)
OPTIONS:
-
- -d, --descriptions=NUM Set to 0 to turn off printing
+
+ -d, --descriptions=NUM Set to 0 to turn off printing
test doc strings as descriptions.
-v, --verbosity=NUM Output verbosity level. Defaults to
- 2 which is one line of info per test. Set
+ 2 which is one line of info per test. Set
to 1 to get one char of info per test
or 0 to disable status output completely.
"""
-
+
# $Id: runtests.py,v 1.7 2004/03/31 17:02:00 mstenner Exp $
import sys
@@ -31,7 +31,7 @@ def main():
# it's okay to import now that sys.path is setup.
import test_grabber, test_byterange, test_mirror
suite = TestSuite( (test_grabber.suite(),
- test_byterange.suite(),
+ test_byterange.suite(),
test_mirror.suite()) )
suite.description = 'urlgrabber tests'
runner = TextTestRunner(stream=sys.stdout,
@@ -52,9 +52,9 @@ def parse_args():
elif o in ('-v', '--verbosity'):
verbosity = int(a)
return (descriptions,verbosity)
-
+
def usage():
print __doc__
-
+
if __name__ == '__main__':
main()
diff --git a/test/test_byterange.py b/test/test_byterange.py
index 0f75807..3322b1b 100644
--- a/test/test_byterange.py
+++ b/test/test_byterange.py
@@ -11,9 +11,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -32,17 +32,17 @@ from base_test_code import *
class RangeableFileObjectTestCase(TestCase):
"""Test range.RangeableFileObject class"""
-
+
def setUp(self):
# 0 1 2 3 4 5 6 7 8 9
# 0123456789012345678901234567890123456789012345678901234567 890123456789012345678901234567890
self.test = 'Why cannot we write the entire 24 volumes of Encyclopaedia\nBrittanica on the head of a pin?\n'
self.fo = StringIO(self.test)
self.rfo = RangeableFileObject(self.fo, (20,69))
-
+
def tearDown(self):
pass
-
+
def test_seek(self):
"""RangeableFileObject.seek()"""
self.rfo.seek(11)
@@ -51,25 +51,25 @@ class RangeableFileObjectTestCase(TestCase):
self.assertEquals('volumes', self.rfo.read(7))
self.rfo.seek(1,1)
self.assertEquals('of', self.rfo.read(2))
-
+
def test_read(self):
"""RangeableFileObject.read()"""
self.assertEquals('the', self.rfo.read(3))
self.assertEquals(' entire 24 volumes of ', self.rfo.read(22))
self.assertEquals('Encyclopaedia\nBrittanica', self.rfo.read(50))
self.assertEquals('', self.rfo.read())
-
+
def test_readall(self):
"""RangeableFileObject.read(): to end of file."""
rfo = RangeableFileObject(StringIO(self.test),(11,))
self.assertEquals(self.test[11:],rfo.read())
-
+
def test_readline(self):
"""RangeableFileObject.readline()"""
self.assertEquals('the entire 24 volumes of Encyclopaedia\n', self.rfo.readline())
self.assertEquals('Brittanica', self.rfo.readline())
self.assertEquals('', self.rfo.readline())
-
+
def test_tell(self):
"""RangeableFileObject.tell()"""
self.assertEquals(0,self.rfo.tell())
@@ -77,20 +77,20 @@ class RangeableFileObjectTestCase(TestCase):
self.assertEquals(5,self.rfo.tell())
self.rfo.readline()
self.assertEquals(39,self.rfo.tell())
-
+
class RangeModuleTestCase(TestCase):
"""Test module level functions defined in range.py"""
def setUp(self):
pass
-
+
def tearDown(self):
pass
-
+
def test_range_tuple_normalize(self):
"""byterange.range_tuple_normalize()"""
from urlgrabber.byterange import range_tuple_normalize
from urlgrabber.byterange import RangeError
- tests = (
+ tests = (
((None,50), (0,50)),
((500,600), (500,600)),
((500,), (500,'')),
@@ -101,15 +101,15 @@ class RangeModuleTestCase(TestCase):
)
for test, ex in tests:
self.assertEquals( range_tuple_normalize(test), ex )
-
+
try: range_tuple_normalize( (10,8) )
except RangeError: pass
else: self.fail("range_tuple_normalize( (10,8) ) should have raised RangeError")
-
+
def test_range_header_to_tuple(self):
"""byterange.range_header_to_tuple()"""
from urlgrabber.byterange import range_header_to_tuple
- tests = (
+ tests = (
('bytes=500-600', (500,601)),
('bytes=500-', (500,'')),
('bla bla', ()),
@@ -117,11 +117,11 @@ class RangeModuleTestCase(TestCase):
)
for test, ex in tests:
self.assertEquals( range_header_to_tuple(test), ex )
-
+
def test_range_tuple_to_header(self):
"""byterange.range_tuple_to_header()"""
from urlgrabber.byterange import range_tuple_to_header
- tests = (
+ tests = (
((500,600), 'bytes=500-599'),
((500,''), 'bytes=500-'),
((500,), 'bytes=500-'),
@@ -131,15 +131,15 @@ class RangeModuleTestCase(TestCase):
)
for test, ex in tests:
self.assertEquals( range_tuple_to_header(test), ex )
-
+
try: range_tuple_to_header( ('not an int',500) )
except ValueError: pass
else: self.fail("range_tuple_to_header( ('not an int',500) ) should have raised ValueError")
-
+
try: range_tuple_to_header( (0,'not an int') )
except ValueError: pass
else: self.fail("range_tuple_to_header( (0, 'not an int') ) should have raised ValueError")
-
+
def suite():
tl = TestLoader()
return tl.loadTestsFromModule(sys.modules[__name__])
diff --git a/test/test_grabber.py b/test/test_grabber.py
index 8e45d25..2bfb1b8 100644
--- a/test/test_grabber.py
+++ b/test/test_grabber.py
@@ -11,9 +11,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -38,7 +38,7 @@ from urlgrabber.grabber import URLGrabber, URLGrabError, CallbackObject, \
from urlgrabber.progress import text_progress_meter
class FileObjectTests(TestCase):
-
+
def setUp(self):
self.filename = tempfile.mktemp()
fo = file(self.filename, 'wb')
@@ -83,7 +83,7 @@ class FileObjectTests(TestCase):
self.fo_output.write(s)
if not s: break
self.assert_(reference_data == self.fo_output.getvalue())
-
+
class HTTPTests(TestCase):
def test_reference_file(self):
"download reference file via HTTP"
@@ -110,41 +110,41 @@ class URLGrabberModuleTestCase(TestCase):
"""Test module level functions defined in grabber.py"""
def setUp(self):
pass
-
+
def tearDown(self):
pass
-
+
def test_urlopen(self):
"module-level urlopen() function"
fo = urlgrabber.urlopen('http://www.python.org')
fo.close()
-
+
def test_urlgrab(self):
"module-level urlgrab() function"
outfile = tempfile.mktemp()
- filename = urlgrabber.urlgrab('http://www.python.org',
+ filename = urlgrabber.urlgrab('http://www.python.org',
filename=outfile)
os.unlink(outfile)
-
+
def test_urlread(self):
"module-level urlread() function"
s = urlgrabber.urlread('http://www.python.org')
-
+
class URLGrabberTestCase(TestCase):
"""Test grabber.URLGrabber class"""
-
+
def setUp(self):
-
+
self.meter = text_progress_meter( fo=cStringIO.StringIO() )
pass
-
+
def tearDown(self):
pass
-
+
def testKeywordArgs(self):
"""grabber.URLGrabber.__init__() **kwargs handling.
-
+
This is a simple test that just passes some arbitrary
values into the URLGrabber constructor and checks that
they've been set properly.
@@ -171,8 +171,8 @@ class URLGrabberTestCase(TestCase):
self.assertEquals( opts.user_agent, 'test ua/1.0' )
self.assertEquals( opts.proxies, {'http' : 'http://www.proxy.com:9090'} )
self.assertEquals( opts.opener, opener )
-
- nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5,
+
+ nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5,
copy_local=0)
self.assertEquals( nopts.progress_obj, self.meter )
self.assertEquals( nopts.throttle, 0.5 )
@@ -185,7 +185,7 @@ class URLGrabberTestCase(TestCase):
self.assertEquals( nopts.proxies, {'http' : 'http://www.proxy.com:9090'} )
nopts.opener = None
self.assertEquals( nopts.opener, None )
-
+
def test_make_callback(self):
"""grabber.URLGrabber._make_callback() tests"""
def cb(e): pass
@@ -197,7 +197,7 @@ class URLGrabberTestCase(TestCase):
class URLParserTestCase(TestCase):
def setUp(self):
pass
-
+
def tearDown(self):
pass
@@ -207,7 +207,7 @@ class URLParserTestCase(TestCase):
bases = [base, base+'/']
filename = 'bar/baz'
target = base + '/' + filename
-
+
for b in bases:
g = URLGrabber(prefix=b)
(url, parts) = g.opts.urlparser.parse(filename, g.opts)
@@ -219,7 +219,7 @@ class URLParserTestCase(TestCase):
except IndexError: quote = None
g.opts.quote = quote
(url, parts) = g.opts.urlparser.parse(urllist[0], g.opts)
-
+
if 1:
self.assertEquals(url, urllist[1])
self.assertEquals(parts, urllist[2])
@@ -232,7 +232,7 @@ class URLParserTestCase(TestCase):
print ' ' + url
print ' ' + urllist[2]
print ' ' + parts
-
+
url_tests_all = (
['http://host.com/path/basename.ext?arg1=val1&arg2=val2#hash',
@@ -252,13 +252,13 @@ class URLParserTestCase(TestCase):
'http://host.com/Should%2520Not',
('http', 'host.com', '/Should%2520Not', '', '', ''), 1],
)
-
+
url_tests_posix = (
['/etc/passwd',
'file:///etc/passwd',
('file', '', '/etc/passwd', '', '', '')],
)
-
+
url_tests_nt = (
[r'\\foo.com\path\file.ext',
'file://foo.com/path/file.ext',
@@ -296,7 +296,7 @@ class FailureTestCase(TestCase):
self.obj = obj
self.args = args
self.kwargs = kwargs
-
+
def test_failure_callback_called(self):
"failure callback is called on retry"
self.failure_callback_called = 0
@@ -340,7 +340,7 @@ class InterruptTestCase(TestCase):
self.kwargs = kwargs
if kwargs.get('exception', None):
raise kwargs['exception']
-
+
def test_interrupt_callback_called(self):
"interrupt callback is called on retry"
self.interrupt_callback_called = 0
@@ -367,12 +367,12 @@ class CheckfuncTestCase(TestCase):
self.g = grabber.URLGrabber(checkfunc=cf)
self.filename = tempfile.mktemp()
self.data = short_reference_data
-
+
def tearDown(self):
try: os.unlink(self.filename)
except: pass
if hasattr(self, 'obj'): del self.obj
-
+
def _checkfunc(self, obj, *args, **kwargs):
self.obj = obj
self.args = args
@@ -389,7 +389,7 @@ class CheckfuncTestCase(TestCase):
if data == self.data: return
else: raise URLGrabError(-2, "data doesn't match")
-
+
def _check_common_args(self):
"check the args that are common to both urlgrab and urlread"
self.assert_(hasattr(self, 'obj'))
@@ -456,7 +456,7 @@ class RegetTestBase:
data = fo.read()
fo.close()
return data
-
+
class CommonRegetTests(RegetTestBase, TestCase):
def test_bad_reget_type(self):
"exception raised for illegal reget mode"
@@ -487,14 +487,14 @@ class HTTPRegetTests(FTPRegetTests):
def setUp(self):
RegetTestBase.setUp(self)
self.url = short_ref_http
-
+
def test_older_check_timestamp(self):
try:
# define this here rather than in the FTP tests because currently,
# we get no timestamp information back from ftp servers.
self._make_half_zero_file()
ts = 1600000000 # set local timestamp to 2020
- os.utime(self.filename, (ts, ts))
+ os.utime(self.filename, (ts, ts))
self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
data = self._read_file()
@@ -502,21 +502,21 @@ class HTTPRegetTests(FTPRegetTests):
self.assertEquals(data[self.hl:], self.ref[self.hl:])
except NotImplementedError:
self.skip()
-
+
def test_newer_check_timestamp(self):
try:
# define this here rather than in the FTP tests because currently,
# we get no timestamp information back from ftp servers.
self._make_half_zero_file()
ts = 1 # set local timestamp to 1969
- os.utime(self.filename, (ts, ts))
+ os.utime(self.filename, (ts, ts))
self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
data = self._read_file()
self.assertEquals(data, self.ref)
except:
self.skip()
-
+
class FileRegetTests(HTTPRegetTests):
def setUp(self):
self.ref = short_reference_data
@@ -525,7 +525,7 @@ class FileRegetTests(HTTPRegetTests):
tmpfo.write(self.ref)
tmpfo.close()
self.tmp = tmp
-
+
(url, parts) = grabber.default_grabber.opts.urlparser.parse(
tmp, grabber.default_grabber.opts)
self.url = url
@@ -552,7 +552,7 @@ class ProFTPDSucksTests(TestCase):
def test_restart_workaround(self):
inst = grabber.URLGrabber()
rslt = inst.urlread(self.url, range=(500, 1000))
-
+
class BaseProxyTests(TestCase):
good_p = '%s://%s:%s@%s:%i' % (proxy_proto, proxy_user,
good_proxy_pass, proxy_host, proxy_port)
@@ -605,4 +605,4 @@ if __name__ == '__main__':
grabber.DEBUG = 0
runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
runner.run(suite())
-
+
diff --git a/test/test_mirror.py b/test/test_mirror.py
index a6bb6cb..921c6f3 100644
--- a/test/test_mirror.py
+++ b/test/test_mirror.py
@@ -11,9 +11,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -110,7 +110,7 @@ class CallbackTests(TestCase):
# test assumes mirrors are not re-ordered
urlgrabber.grabber._TH.hosts.clear()
self.mg = MirrorGroup(self.g, fullmirrors)
-
+
def test_failure_callback(self):
"test that MG executes the failure callback correctly"
tricky_list = []
@@ -158,7 +158,7 @@ class FailoverTests(TestCase):
fo = open(filename)
contents = fo.read()
fo.close()
-
+
# first be sure that the first mirror failed and that the
# callback was called
self.assertEqual(len(elist), 1)
@@ -172,7 +172,7 @@ class FakeGrabber:
self.index = 0
self.calls = []
self.opts = URLGrabberOptions()
-
+
def urlgrab(self, url, filename=None, **kwargs):
self.calls.append( (url, filename) )
res = self.resultlist[self.index]
@@ -191,7 +191,7 @@ class ActionTests(TestCase):
def tearDown(self):
urlgrabber.mirror.DEBUG = self.db
-
+
def test_defaults(self):
'test default action policy'
self.mg.urlgrab('somefile')
@@ -207,10 +207,10 @@ class ActionTests(TestCase):
'GR mirrors: [c d e f] 0',
'MAIN mirrors: [a b c d e f] 2',
'MIRROR: trying somefile -> c/somefile']
-
+
self.assertEquals(self.g.calls, expected_calls)
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
-
+
def test_instance_action(self):
'test the effects of passed-in default_action'
self.mg.default_action = {'remove_master': 1}
@@ -227,10 +227,10 @@ class ActionTests(TestCase):
'GR mirrors: [c d e f] 0',
'MAIN mirrors: [c d e f] 0',
'MIRROR: trying somefile -> c/somefile']
-
+
self.assertEquals(self.g.calls, expected_calls)
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
-
+
def test_method_action(self):
'test the effects of method-level default_action'
self.mg.urlgrab('somefile', default_action={'remove_master': 1})
@@ -246,13 +246,13 @@ class ActionTests(TestCase):
'GR mirrors: [c d e f] 0',
'MAIN mirrors: [c d e f] 0',
'MIRROR: trying somefile -> c/somefile']
-
+
self.assertEquals(self.g.calls, expected_calls)
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
-
+
def callback(self, e): return {'fail': 1}
-
+
def test_callback_action(self):
'test the effects of a callback-returned action'
self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile',
@@ -267,7 +267,7 @@ class ActionTests(TestCase):
self.assertEquals(self.g.calls, expected_calls)
self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
-
+
import threading, socket
LOCALPORT = 'localhost', 2000
@@ -387,4 +387,4 @@ def suite():
if __name__ == '__main__':
runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
runner.run(suite())
-
+
diff --git a/test/threading/batchgrabber.py b/test/threading/batchgrabber.py
index ce2b34a..9fab7fc 100644
--- a/test/threading/batchgrabber.py
+++ b/test/threading/batchgrabber.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -19,10 +19,10 @@
"""Module for testing urlgrabber under multiple threads.
-This module can be used from the command line. Each argument is
+This module can be used from the command line. Each argument is
a URL to grab.
-The BatchURLGrabber class has an interface similar to URLGrabber
+The BatchURLGrabber class has an interface similar to URLGrabber
but instead of pulling files when urlgrab is called, the request
is queued. Calling BatchURLGrabber.batchgrab causes all files to
be pulled in multiple threads.
@@ -48,10 +48,10 @@ class BatchURLGrabber:
self.queue = []
self.threads = []
self.sem = Semaphore()
-
+
def urlgrab(self, url, filename=None, **kwargs):
self.queue.append( (url, filename, kwargs) )
-
+
def batchgrab(self):
if hasattr(self.grabber.opts.progress_obj, 'start'):
self.grabber.opts.progress_obj.start(len(self.queue))
@@ -71,7 +71,7 @@ class BatchURLGrabber:
#if len(self.threads) == self.maxthreads:
# sleep(0.2)
sleep(0.2)
-
+
class Worker(Thread):
def __init__(self, parent, url, filename, kwargs):
Thread.__init__(self)
@@ -79,7 +79,7 @@ class Worker(Thread):
self.url = url
self.filename = filename
self.kwargs = kwargs
-
+
def run(self):
if DEBUG: print "worker thread started."
grabber = self.parent.grabber
@@ -90,7 +90,7 @@ class Worker(Thread):
rslt = self.parent.grabber.urlgrab(self.url, self.filename, **self.kwargs)
except URLGrabError as e:
print '%s, %s' % (e, self.url)
-
+
def main():
progress_obj = None
# uncomment to play with BatchProgressMeter (doesn't work right now)
@@ -103,8 +103,8 @@ def main():
g.batchgrab()
except KeyboardInterrupt:
sys.exit(1)
-
+
if DEBUG: print "after batchgrab"
-
+
if __name__ == '__main__':
main()
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index d2d26ea..fadc9c0 100644
--- a/urlgrabber/byterange.py
+++ b/urlgrabber/byterange.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -26,46 +26,46 @@ import rfc822
DEBUG = None
-try:
+try:
from cStringIO import StringIO
-except ImportError as msg:
+except ImportError as msg:
from StringIO import StringIO
class RangeError(IOError):
"""Error raised when an unsatisfiable range is requested."""
pass
-
+
class HTTPRangeHandler(urllib2.BaseHandler):
"""Handler that enables HTTP Range headers.
-
+
This was extremely simple. The Range header is a HTTP feature to
- begin with so all this class does is tell urllib2 that the
- "206 Partial Content" response from the HTTP server is what we
+ begin with so all this class does is tell urllib2 that the
+ "206 Partial Content" response from the HTTP server is what we
expected.
-
+
Example:
import urllib2
import byterange
-
+
range_handler = range.HTTPRangeHandler()
opener = urllib2.build_opener(range_handler)
-
+
# install it
urllib2.install_opener(opener)
-
+
# create Request and set Range header
req = urllib2.Request('http://www.python.org/')
req.header['Range'] = 'bytes=30-50'
f = urllib2.urlopen(req)
"""
-
+
def http_error_206(self, req, fp, code, msg, hdrs):
# 206 Partial Content Response
r = urllib.addinfourl(fp, hdrs, req.get_full_url())
r.code = code
r.msg = msg
return r
-
+
def http_error_416(self, req, fp, code, msg, hdrs):
# HTTP's Range Not Satisfiable error
raise RangeError(9, 'Requested Range Not Satisfiable')
@@ -81,13 +81,13 @@ class HTTPSRangeHandler(HTTPRangeHandler):
class RangeableFileObject:
"""File object wrapper to enable raw range handling.
- This was implemented primarilary for handling range
- specifications for file:// urls. This object effectively makes
- a file object look like it consists only of a range of bytes in
+ This was implemented primarilary for handling range
+ specifications for file:// urls. This object effectively makes
+ a file object look like it consists only of a range of bytes in
the stream.
-
+
Examples:
- # expose 10 bytes, starting at byte position 20, from
+ # expose 10 bytes, starting at byte position 20, from
# /etc/aliases.
>>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
# seek seeks within the range (to position 23 in this case)
@@ -99,11 +99,11 @@ class RangeableFileObject:
# byte in the range. the following will return only 7 bytes.
>>> fo.read(30)
"""
-
+
def __init__(self, fo, rangetup):
"""Create a RangeableFileObject.
- fo -- a file like object. only the read() method need be
- supported but supporting an optimized seek() is
+ fo -- a file like object. only the read() method need be
+ supported but supporting an optimized seek() is
preferable.
rangetup -- a (firstbyte,lastbyte) tuple specifying the range
to work over.
@@ -113,7 +113,7 @@ class RangeableFileObject:
(self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
self.realpos = 0
self._do_seek(self.firstbyte)
-
+
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
Any attribute not found in _this_ object will be searched for
@@ -121,16 +121,16 @@ class RangeableFileObject:
if hasattr(self.fo, name):
return getattr(self.fo, name)
raise AttributeError(name)
-
+
def tell(self):
"""Return the position within the range.
- This is different from fo.seek in that position 0 is the
+ This is different from fo.seek in that position 0 is the
first byte position of the range tuple. For example, if
this object was created with a range tuple of (500,899),
tell() will return 0 when at byte position 500 of the file.
"""
return (self.realpos - self.firstbyte)
-
+
def seek(self,offset,whence=0):
"""Seek within the byte range.
Positioning is identical to that described under tell().
@@ -143,13 +143,13 @@ class RangeableFileObject:
elif whence == 2: # absolute from end of file
# XXX: are we raising the right Error here?
raise IOError('seek from end of file not supported.')
-
+
# do not allow seek past lastbyte in range
if self.lastbyte and (realoffset >= self.lastbyte):
realoffset = self.lastbyte
-
+
self._do_seek(realoffset - self.realpos)
-
+
def read(self, size=-1):
"""Read within the range.
This method will limit the size read based on the range.
@@ -158,7 +158,7 @@ class RangeableFileObject:
rslt = self.fo.read(size)
self.realpos += len(rslt)
return rslt
-
+
def readline(self, size=-1):
"""Read lines within the range.
This method will limit the size read based on the range.
@@ -167,7 +167,7 @@ class RangeableFileObject:
rslt = self.fo.readline(size)
self.realpos += len(rslt)
return rslt
-
+
def _calc_read_size(self, size):
"""Handles calculating the amount of data to read based on
the range.
@@ -179,7 +179,7 @@ class RangeableFileObject:
else:
size = (self.lastbyte - self.realpos)
return size
-
+
def _do_seek(self,offset):
"""Seek based on whether wrapped object supports seek().
offset is relative to the current position (self.realpos).
@@ -190,7 +190,7 @@ class RangeableFileObject:
else:
self.fo.seek(self.realpos + offset)
self.realpos+= offset
-
+
def _poor_mans_seek(self,offset):
"""Seek by calling the wrapped file objects read() method.
This is used for file like objects that do not have native
@@ -198,7 +198,7 @@ class RangeableFileObject:
to manually seek to the desired position.
offset -- read this number of bytes from the wrapped
file object.
- raise RangeError if we encounter EOF before reaching the
+ raise RangeError if we encounter EOF before reaching the
specified offset.
"""
pos = 0
@@ -247,10 +247,10 @@ class FileRangeHandler(urllib2.FileHandler):
return urllib.addinfourl(fo, headers, 'file:'+file)
-# FTP Range Support
+# FTP Range Support
# Unfortunately, a large amount of base FTP code had to be copied
# from urllib and urllib2 in order to insert the FTP REST command.
-# Code modifications for range support have been commented as
+# Code modifications for range support have been commented as
# follows:
# -- range support modifications start/end here
@@ -282,7 +282,7 @@ class FTPRangeHandler(urllib2.FTPHandler):
host = unquote(host)
user = unquote(user or '')
passwd = unquote(passwd or '')
-
+
try:
host = socket.gethostbyname(host)
except socket.error as msg:
@@ -301,22 +301,22 @@ class FTPRangeHandler(urllib2.FTPHandler):
if attr.lower() == 'type' and \
value in ('a', 'A', 'i', 'I', 'd', 'D'):
type = value.upper()
-
+
# -- range support modifications start here
rest = None
- range_tup = range_header_to_tuple(req.headers.get('Range',None))
+ range_tup = range_header_to_tuple(req.headers.get('Range',None))
assert range_tup != ()
if range_tup:
(fb,lb) = range_tup
if fb > 0: rest = fb
# -- range support modifications end here
-
+
fp, retrlen = fw.retrfile(file, type, rest)
-
+
# -- range support modifications start here
if range_tup:
(fb,lb) = range_tup
- if lb == '':
+ if lb == '':
if retrlen is None or retrlen == 0:
raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.')
lb = retrlen
@@ -328,7 +328,7 @@ class FTPRangeHandler(urllib2.FTPHandler):
retrlen = lb - fb
fp = RangeableFileObject(fp, (0,retrlen))
# -- range support modifications end here
-
+
headers = ""
mtype = mimetypes.guess_type(req.get_full_url())[0]
if mtype:
@@ -400,17 +400,17 @@ class ftpwrapper(urllib.ftpwrapper):
_rangere = None
def range_header_to_tuple(range_header):
"""Get a (firstbyte,lastbyte) tuple from a Range header value.
-
+
Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
function pulls the firstbyte and lastbyte values and returns
a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
the header value, it is returned as an empty string in the
tuple.
-
+
Return None if range_header is None
- Return () if range_header does not conform to the range spec
+ Return () if range_header does not conform to the range spec
pattern.
-
+
"""
global _rangere
if range_header is None: return None
@@ -418,9 +418,9 @@ def range_header_to_tuple(range_header):
import re
_rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
match = _rangere.match(range_header)
- if match:
+ if match:
tup = range_tuple_normalize(match.group(1,2))
- if tup and tup[1]:
+ if tup and tup[1]:
tup = (tup[0],tup[1]+1)
return tup
return ()
@@ -433,14 +433,14 @@ def range_tuple_to_header(range_tup):
if range_tup is None: return None
range_tup = range_tuple_normalize(range_tup)
if range_tup:
- if range_tup[1]:
+ if range_tup[1]:
range_tup = (range_tup[0],range_tup[1] - 1)
return 'bytes=%s-%s' % range_tup
-
+
def range_tuple_normalize(range_tup):
"""Normalize a (first_byte,last_byte) range tuple.
Return a tuple whose first element is guaranteed to be an int
- and whose second element will be '' (meaning: the last byte) or
+ and whose second element will be '' (meaning: the last byte) or
an int. Finally, return None if the normalized tuple == (0,'')
as that is equivalent to retrieving the entire file.
"""
@@ -452,7 +452,7 @@ def range_tuple_normalize(range_tup):
# handle last byte
try: lb = range_tup[1]
except IndexError: lb = ''
- else:
+ else:
if lb is None: lb = ''
elif lb != '': lb = int(lb)
# check if range is over the entire file
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index 1e09e5a..64da884 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -70,7 +70,7 @@ GENERAL ARGUMENTS (kwargs)
are shown but there's no in-progress display.
text = None
-
+
specifies alternative text to be passed to the progress meter
object. If not given, the default progress meter will use the
basename of the file.
@@ -116,7 +116,7 @@ GENERAL ARGUMENTS (kwargs)
the first 10 bytes of the file.
If set to None, no range will be used.
-
+
reget = None [None|'simple'|'check_timestamp']
whether to attempt to reget a partially-downloaded file. Reget
@@ -206,7 +206,7 @@ GENERAL ARGUMENTS (kwargs)
option. Note that python 2.2 handles the case of these
badly and if you do not use the proper case (shown here), your
values will be overridden with the defaults.
-
+
urlparser = URLParser()
The URLParser class handles pre-processing of URLs, including
@@ -246,12 +246,12 @@ GENERAL ARGUMENTS (kwargs)
ssl_context = None
No-op when using the curl backend (default)
-
+
ssl_verify_peer = True
Check the server's certificate to make sure it is valid with what our CA validates
-
+
ssl_verify_host = True
Check the server's hostname to make sure it matches the certificate DN
@@ -263,7 +263,7 @@ GENERAL ARGUMENTS (kwargs)
ssl_key_type = 'PEM'
PEM or DER - format of key
-
+
ssl_cert = None
Path to the ssl certificate the client should use to to authenticate with
@@ -271,20 +271,20 @@ GENERAL ARGUMENTS (kwargs)
ssl_cert_type = 'PEM'
PEM or DER - format of certificate
-
+
ssl_key_pass = None
password to access the ssl_key
-
+
size = None
- size (in bytes) or Maximum size of the thing being downloaded.
+ size (in bytes) or Maximum size of the thing being downloaded.
This is mostly to keep us from exploding with an endless datastream
-
+
max_header_size = 2097152
Maximum size (in bytes) of the headers.
-
+
ip_resolve = 'whatever'
What type of name to IP resolving to use, default is to do both IPV4 and
@@ -347,7 +347,7 @@ RETRY RELATED ARGUMENTS
retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
if 12 not in retrycodes:
retrycodes.append(12)
-
+
checkfunc = None
a function to do additional checks. This defaults to None, which
@@ -378,7 +378,7 @@ RETRY RELATED ARGUMENTS
function(obj, 'arg1', 2, kwarg=3)
# obj.filename = '/tmp/stuff'
# obj.url = 'http://foo.com/stuff'
-
+
NOTE: both the "args" tuple and "kwargs" dict must be present if
you use this syntax, but either (or both) can be empty.
@@ -437,7 +437,7 @@ RETRY RELATED ARGUMENTS
This callback is very similar to failure_callback. They are
passed the same arguments, so you could use the same function for
both.
-
+
retry_no_cache = False
When True, automatically enable no_cache for future retries if
@@ -557,7 +557,7 @@ try:
from i18n import _
except ImportError as msg:
def _(st): return st
-
+
########################################################################
# functions for debugging output. These functions are here because they
# are also part of the module initialization.
@@ -587,7 +587,7 @@ def _init_default_logger(logspec=None):
the form
URLGRABBER_DEBUG=level,filename
-
+
where "level" can be either an integer or a log level from the
logging module (DEBUG, INFO, etc). If the integer is zero or
less, logging will be disabled. Filename is the filename where
@@ -600,7 +600,7 @@ def _init_default_logger(logspec=None):
URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt
URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout
URLGRABBER_DEBUG=INFO # log info and higher to stderr
-
+
This function is called during module initialization. It is not
intended to be called from outside. The only reason it is a
function at all is to keep the module-level namespace tidy and to
@@ -634,7 +634,7 @@ def _log_package_state():
if not DEBUG: return
DEBUG.debug('urlgrabber version = %s' % __version__)
DEBUG.debug('trans function "_" = %s' % _)
-
+
_init_default_logger()
_log_package_state()
@@ -694,7 +694,7 @@ class URLGrabError(IOError):
14 - HTTPError (includes .code and .exception attributes)
15 - user abort
16 - error writing to local file
-
+
MirrorGroup error codes (256 -- 511)
256 - No more mirrors left to try
@@ -749,7 +749,7 @@ def urlgrab(url, filename=None, **kwargs):
If filename is none, the basename of the url is used.
urlgrab returns the filename of the local file, which may be different
from the passed-in filename if the copy_local kwarg == 0.
-
+
See module documentation for a description of possible kwargs.
"""
return default_grabber.urlgrab(url, filename, **kwargs)
@@ -759,7 +759,7 @@ def urlopen(url, **kwargs):
If a progress object or throttle specifications exist, then
a special file object will be returned that supports them.
The file object can be treated like any other file object.
-
+
See module documentation for a description of possible kwargs.
"""
return default_grabber.urlopen(url, **kwargs)
@@ -769,7 +769,7 @@ def urlread(url, limit=None, **kwargs):
If the limit is exceeded, an exception will be thrown. Note that urlread
is NOT intended to be used as a way of saying "I want the first N bytes"
but rather 'read the whole file into memory, but don't use too much'
-
+
See module documentation for a description of possible kwargs.
"""
return default_grabber.urlread(url, limit, **kwargs)
@@ -807,10 +807,10 @@ class URLParser:
"""
url = _to_utf8(url)
quote = opts.quote
-
+
if opts.prefix:
url = self.add_prefix(url, opts.prefix)
-
+
parts = urlparse.urlparse(url)
(scheme, host, path, parm, query, frag) = parts
@@ -820,15 +820,15 @@ class URLParser:
url = 'file:' + urllib.pathname2url(url)
parts = urlparse.urlparse(url)
quote = 0 # pathname2url quotes, so we won't do it again
-
+
if scheme in ['http', 'https']:
parts = self.process_http(parts, url)
-
+
if quote is None:
quote = self.guess_should_quote(parts)
if quote:
parts = self.quote(parts)
-
+
url = urlparse.urlunparse(parts)
return url, parts
@@ -882,7 +882,7 @@ class URLParser:
ind = string.find(path, '%', ind+1)
return 0
return 1
-
+
class URLGrabberOptions:
"""Class to ease kwargs handling."""
@@ -895,23 +895,23 @@ class URLGrabberOptions:
if delegate is None:
self._set_defaults()
self._set_attributes(**kwargs)
-
+
def __getattr__(self, name):
if self.delegate and hasattr(self.delegate, name):
return getattr(self.delegate, name)
raise AttributeError(name)
-
+
def raw_throttle(self):
- """Calculate raw throttle value from throttle and bandwidth
+ """Calculate raw throttle value from throttle and bandwidth
values.
"""
- if self.throttle <= 0:
+ if self.throttle <= 0:
return 0
- elif type(self.throttle) == type(0):
+ elif type(self.throttle) == type(0):
return float(self.throttle)
else: # throttle is a float
return self.bandwidth * self.throttle
-
+
def find_proxy(self, url, scheme):
"""Find the proxy to use for this URL.
Use the proxies dictionary first, then libproxy.
@@ -953,7 +953,7 @@ class URLGrabberOptions:
options specified in kwargs.
"""
return URLGrabberOptions(delegate=self, **kwargs)
-
+
def _set_attributes(self, **kwargs):
"""Update object attributes with those provided in kwargs."""
self.__dict__.update(kwargs)
@@ -965,7 +965,7 @@ class URLGrabberOptions:
% (self.reget, ))
def _set_defaults(self):
- """Set all options to their default values.
+ """Set all options to their default values.
When adding new options, make sure a default is
provided here.
"""
@@ -1023,10 +1023,10 @@ class URLGrabberOptions:
self.ftp_disable_epsv = False
self.no_cache = False
self.retry_no_cache = False
-
+
def __repr__(self):
return self.format()
-
+
def format(self, indent=' '):
keys = self.__dict__.keys()
if self.delegate is not None:
@@ -1055,16 +1055,16 @@ def _run_callback(cb, obj):
class URLGrabber(object):
"""Provides easy opening of URLs with a variety of options.
-
+
All options are specified as kwargs. Options may be specified when
the class is created and may be overridden on a per request basis.
-
+
New objects inherit default values from default_grabber.
"""
-
+
def __init__(self, **kwargs):
self.opts = URLGrabberOptions(**kwargs)
-
+
def _retry(self, opts, func, *args):
tries = 0
while 1:
@@ -1114,33 +1114,33 @@ class URLGrabber(object):
raise
if retrycode is not None and retrycode < 0 and opts.retry_no_cache:
opts.no_cache = True
-
+
def urlopen(self, url, opts=None, **kwargs):
"""open the url and return a file object
- If a progress object or throttle value specified when this
- object was created, then a special file object will be
- returned that supports them. The file object can be treated
+ If a progress object or throttle value specified when this
+ object was created, then a special file object will be
+ returned that supports them. The file object can be treated
like any other file object.
"""
url = _to_utf8(url)
opts = (opts or self.opts).derive(**kwargs)
if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
- (url,parts) = opts.urlparser.parse(url, opts)
+ (url,parts) = opts.urlparser.parse(url, opts)
opts.find_proxy(url, parts[0])
def retryfunc(opts, url):
return PyCurlFileObject(url, filename=None, opts=opts)
return self._retry(opts, retryfunc, url)
-
+
def urlgrab(self, url, filename=None, opts=None, **kwargs):
"""grab the file at <url> and make a local copy at <filename>
If filename is none, the basename of the url is used.
- urlgrab returns the filename of the local file, which may be
+ urlgrab returns the filename of the local file, which may be
different from the passed-in filename if copy_local == 0.
"""
url = _to_utf8(url)
opts = (opts or self.opts).derive(**kwargs)
if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
- (url,parts) = opts.urlparser.parse(url, opts)
+ (url,parts) = opts.urlparser.parse(url, opts)
(scheme, host, path, parm, query, frag) = parts
opts.find_proxy(url, scheme)
if filename is None:
@@ -1149,18 +1149,18 @@ class URLGrabber(object):
# This is better than nothing.
filename = 'index.html'
if scheme == 'file' and not opts.copy_local:
- # just return the name of the local file - don't make a
+ # just return the name of the local file - don't make a
# copy currently
path = urllib.url2pathname(path)
if host:
path = os.path.normpath('//' + host + path)
if not os.path.exists(path):
- err = URLGrabError(2,
+ err = URLGrabError(2,
_('Local file does not exist: %s') % (path, ))
err.url = url
raise err
elif not os.path.isfile(path):
- err = URLGrabError(3,
+ err = URLGrabError(3,
_('Not a normal file: %s') % (path, ))
err.url = url
raise err
@@ -1170,7 +1170,7 @@ class URLGrabber(object):
obj = CallbackObject(filename=path, url=url)
_run_callback(opts.checkfunc, obj)
return path
-
+
if opts.async:
opts.url = url
opts.filename = filename
@@ -1192,29 +1192,29 @@ class URLGrabber(object):
finally:
fo.close()
return filename
-
+
try:
return self._retry(opts, retryfunc, url, filename)
except URLGrabError as e:
_TH.update(url, 0, 0, e)
opts.exception = e
return _run_callback(opts.failfunc, opts)
-
+
def urlread(self, url, limit=None, opts=None, **kwargs):
"""read the url into a string, up to 'limit' bytes
If the limit is exceeded, an exception will be thrown. Note
- that urlread is NOT intended to be used as a way of saying
- "I want the first N bytes" but rather 'read the whole file
+ that urlread is NOT intended to be used as a way of saying
+ "I want the first N bytes" but rather 'read the whole file
into memory, but don't use too much'
"""
url = _to_utf8(url)
opts = (opts or self.opts).derive(**kwargs)
if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
- (url,parts) = opts.urlparser.parse(url, opts)
+ (url,parts) = opts.urlparser.parse(url, opts)
opts.find_proxy(url, parts[0])
if limit is not None:
limit = limit + 1
-
+
def retryfunc(opts, url, limit):
fo = PyCurlFileObject(url, filename=None, opts=opts)
s = ''
@@ -1232,16 +1232,16 @@ class URLGrabber(object):
finally:
fo.close()
return s
-
+
s = self._retry(opts, retryfunc, url, limit)
if limit and len(s) > limit:
- err = URLGrabError(8,
+ err = URLGrabError(8,
_('Exceeded limit (%i): %s') % (limit, url))
err.url = url
raise err
return s
-
+
def _make_callback(self, callback_obj):
# not used, left for compatibility
if callable(callback_obj):
@@ -1282,7 +1282,7 @@ class PyCurlFileObject(object):
self._tm_first = None
self._tm_last = None
self._do_open()
-
+
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
@@ -1304,9 +1304,9 @@ class PyCurlFileObject(object):
if not self._prog_running:
if self.opts.progress_obj:
size = self.size + self._reget_length
- self.opts.progress_obj.start(self._prog_reportname,
- urllib.unquote(self.url),
- self._prog_basename,
+ self.opts.progress_obj.start(self._prog_reportname,
+ urllib.unquote(self.url),
+ self._prog_basename,
size=size,
text=self.opts.text)
self._prog_running = True
@@ -1329,14 +1329,14 @@ class PyCurlFileObject(object):
return len(buf)
except KeyboardInterrupt:
return -1
-
+
def _hdr_retrieve(self, buf):
if self._hdr_ended:
self._hdr_dump = ''
self.size = 0
self._hdr_ended = False
- if self._over_max_size(cur=len(self._hdr_dump),
+ if self._over_max_size(cur=len(self._hdr_dump),
max_size=self.opts.max_header_size):
return -1
try:
@@ -1366,18 +1366,18 @@ class PyCurlFileObject(object):
s = parse150(buf)
if s:
self.size = int(s)
-
+
if buf.lower().find('location') != -1:
location = ':'.join(buf.split(':')[1:])
location = location.strip()
self.scheme = urlparse.urlsplit(location)[0]
self.url = location
-
+
self._hdr_dump += buf
if len(self._hdr_dump) != 0 and buf == '\r\n':
self._hdr_ended = True
if DEBUG: DEBUG.debug('header ended:')
-
+
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
@@ -1392,7 +1392,7 @@ class PyCurlFileObject(object):
hdrfp.seek(0)
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
-
+
hdr = property(_return_hdr_obj)
http_code = property(fget=
lambda self: self.curl_obj.getinfo(pycurl.RESPONSE_CODE))
@@ -1415,7 +1415,7 @@ class PyCurlFileObject(object):
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-
+
if DEBUG and DEBUG.level <= 10:
self.curl_obj.setopt(pycurl.VERBOSE, True)
if opts.user_agent:
@@ -1429,11 +1429,11 @@ class PyCurlFileObject(object):
self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
if ipr == 'ipv6':
self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
-
+
# maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
-
+
# timeouts
timeout = 300
if hasattr(opts, 'timeout'):
@@ -1458,7 +1458,7 @@ class PyCurlFileObject(object):
self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
# if we have a client side cert - turn off reuse b/c nss is odd
self.curl_obj.setopt(pycurl.FORBID_REUSE, 1)
- if opts.ssl_cert_type:
+ if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
@@ -1479,11 +1479,11 @@ class PyCurlFileObject(object):
range_str = self._build_range()
if range_str:
self.curl_obj.setopt(pycurl.RANGE, range_str)
-
+
# throttle/bandwidth
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
-
+
# proxy
if opts.proxy is not None:
self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
@@ -1511,50 +1511,50 @@ class PyCurlFileObject(object):
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
-
-
+
+
def _do_perform(self):
if self._complete:
return
-
+
try:
self.curl_obj.perform()
except pycurl.error as e:
# XXX - break some of these out a bit more clearly
- # to other URLGrabErrors from
+ # to other URLGrabErrors from
# http://curl.haxx.se/libcurl/c/libcurl-errors.html
# this covers e.args[0] == 22 pretty well - which will be common
-
+
code = self.http_code
errcode = e.args[0]
errurl = urllib.unquote(self.url)
-
+
if self._error[0]:
errcode = self._error[0]
-
+
if errcode == 23 and 200 <= code <= 299:
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
# since we cannot know what it is -I'm banking on it being
- # a ctrl-c. XXXX - if there's a way of going back two raises to
+ # a ctrl-c. XXXX - if there's a way of going back two raises to
# figure out what aborted the pycurl process FIXME
raise getattr(self, '_cb_error', KeyboardInterrupt)
-
+
elif errcode == 28:
err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
err.url = errurl
raise err
-
+
elif errcode == 42:
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
# since we cannot know what it is -I'm banking on it being
- # a ctrl-c. XXXX - if there's a way of going back two raises to
+ # a ctrl-c. XXXX - if there's a way of going back two raises to
# figure out what aborted the pycurl process FIXME
raise KeyboardInterrupt
-
+
else:
pyerr2str = { 5 : _("Couldn't resolve proxy"),
6 : _("Couldn't resolve host"),
@@ -1631,7 +1631,7 @@ class PyCurlFileObject(object):
def _add_headers(self):
pass
-
+
def _build_range(self):
reget_length = 0
rt = None
@@ -1646,19 +1646,19 @@ class PyCurlFileObject(object):
reget_length = s[stat.ST_SIZE]
# Set initial length when regetting
- self._amount_read = reget_length
+ self._amount_read = reget_length
self._reget_length = reget_length # set where we started from, too
rt = reget_length, ''
self.append = 1
-
+
if self.opts.range:
rt = self.opts.range
-
+
if rt[0] is None:
rt = (0, rt[1])
rt = (rt[0] + reget_length, rt[1])
-
+
if rt:
header = range_tuple_to_header(rt)
@@ -1670,10 +1670,10 @@ class PyCurlFileObject(object):
def _make_request(self, req, opener):
#XXXX
# This doesn't do anything really, but we could use this
- # instead of do_open() to catch a lot of crap errors as
+ # instead of do_open() to catch a lot of crap errors as
# mstenner did before here
return (self.fo, self.hdr)
-
+
try:
if self.opts.timeout:
old_to = socket.getdefaulttimeout()
@@ -1723,7 +1723,7 @@ class PyCurlFileObject(object):
else:
return (fo, hdr)
-
+
def _do_grab(self):
"""dump the file to a filename or StringIO buffer"""
@@ -1734,7 +1734,7 @@ class PyCurlFileObject(object):
_was_filename = True
self._prog_reportname = str(self.filename)
self._prog_basename = os.path.basename(self.filename)
-
+
if self.append: mode = 'ab'
else: mode = 'wb'
@@ -1752,20 +1752,20 @@ class PyCurlFileObject(object):
self._prog_reportname = 'MEMORY'
self._prog_basename = 'MEMORY'
-
+
self.fo = StringIO()
# if this is to be a tempfile instead....
# it just makes crap in the tempdir
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
- try:
+ try:
self._do_perform()
except URLGrabError as e:
self.fo.flush()
self.fo.close()
raise e
-
+
if _was_filename:
# close it up
self.fo.flush()
@@ -1786,7 +1786,7 @@ class PyCurlFileObject(object):
os.utime(self.filename, (mod_time, mod_time))
except OSError as e:
err = URLGrabError(16, _(\
- 'error setting timestamp on file %s from %s, OSError: %s')
+ 'error setting timestamp on file %s from %s, OSError: %s')
% (self.filename, self.url, e))
err.url = self.url
raise err
@@ -1798,13 +1798,13 @@ class PyCurlFileObject(object):
'error opening file from %s, IOError: %s') % (self.url, e))
err.url = self.url
raise err
-
+
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
self._complete = True
-
+
def _fill_buffer(self, amt=None):
"""fill the buffer to contain at least 'amt' bytes by reading
from the underlying file object. If amt is None, then it will
@@ -1821,9 +1821,9 @@ class PyCurlFileObject(object):
# if we've made it here, then we don't have enough in the buffer
# and we need to read more.
-
+
if not self._complete: self._do_grab() #XXX cheater - change on ranges
-
+
buf = [self._rbuf]
bufsize = len(self._rbuf)
while amt is None or amt:
@@ -1833,7 +1833,7 @@ class PyCurlFileObject(object):
(time.time() - self._ttime)
if diff > 0: time.sleep(diff)
self._ttime = time.time()
-
+
# now read some data, up to self._rbufsize
if amt is None: readamount = self._rbufsize
else: readamount = min(amt, self._rbufsize)
@@ -1878,7 +1878,7 @@ class PyCurlFileObject(object):
self.opts.progress_obj.update(downloaded)
except (KeyboardInterrupt, IOError):
return -1
-
+
def _over_max_size(self, cur, max_size=None):
if not max_size:
@@ -1896,7 +1896,7 @@ class PyCurlFileObject(object):
self._error = (pycurl.E_FILESIZE_EXCEEDED, msg)
return True
return False
-
+
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
@@ -1908,7 +1908,7 @@ class PyCurlFileObject(object):
def readline(self, limit=-1):
if not self._complete: self._do_grab()
return self.fo.readline()
-
+
i = string.find(self._rbuf, '\n')
while i < 0 and not (0 < limit <= len(self._rbuf)):
L = len(self._rbuf)
@@ -1927,12 +1927,12 @@ class PyCurlFileObject(object):
if self._prog_running:
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
-
+
def geturl(self):
""" Provide the geturl() method, used to be got from
urllib.addinfourl, via. urllib.URLopener.* """
return self.url
-
+
if hasattr(pycurl, 'GLOBAL_ACK_EINTR'):
# fail immediately on ctrl-c
pycurl.global_init(pycurl.GLOBAL_DEFAULT | pycurl.GLOBAL_ACK_EINTR)
@@ -1945,7 +1945,7 @@ def reset_curl_obj():
_curl_cache = pycurl.Curl()
_libproxy_cache = None
-
+
#####################################################################
# DEPRECATED FUNCTIONS
@@ -1964,23 +1964,23 @@ def set_progress_obj(new_progress_obj):
def set_user_agent(new_user_agent):
"""Deprecated. Use: default_grabber.user_agent = new_user_agent"""
default_grabber.user_agent = new_user_agent
-
+
def retrygrab(url, filename=None, copy_local=0, close_connection=0,
progress_obj=None, throttle=None, bandwidth=None,
numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None):
"""Deprecated. Use: urlgrab() with the retry arg instead"""
- kwargs = {'copy_local' : copy_local,
+ kwargs = {'copy_local' : copy_local,
'close_connection' : close_connection,
- 'progress_obj' : progress_obj,
- 'throttle' : throttle,
+ 'progress_obj' : progress_obj,
+ 'throttle' : throttle,
'bandwidth' : bandwidth,
'retry' : numtries,
'retrycodes' : retrycodes,
- 'checkfunc' : checkfunc
+ 'checkfunc' : checkfunc
}
return urlgrab(url, filename, **kwargs)
-
+
#####################################################################
# Serializer + parser: A replacement of the rather bulky Json code.
#
@@ -2494,7 +2494,7 @@ def _main_test():
set_throttle(1.0)
set_bandwidth(32 * 1024)
- print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
+ print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
default_grabber.bandwidth)
try: from progress import text_progress_meter
@@ -2534,7 +2534,7 @@ def _retry_test():
raise URLGrabError(-2, 'forcing immediate failure')
print 'success'
return
-
+
kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
try: name = apply(retrygrab, (url, filename), kwargs)
except URLGrabError as e: print e
@@ -2561,7 +2561,7 @@ def _file_object_test(filename=None):
s_output = fo_output.getvalue()
if s_output == s_input: print 'passed'
else: print 'FAILED'
-
+
def _test_file_object_smallread(wrapper, fo_output):
while 1:
s = wrapper.read(23)
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
index a2202fe..e4aac7e 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -100,7 +100,7 @@ from grabber import _run_callback, _do_raise
from grabber import exception2msg
from grabber import _TH
-def _(st):
+def _(st):
return st
class GrabRequest:
@@ -142,7 +142,7 @@ class MirrorGroup:
In addition to the required arguments "grabber" and "mirrors",
MirrorGroup also takes the following optional arguments:
-
+
default_action
A dict that describes the actions to be taken upon failure
@@ -173,7 +173,7 @@ class MirrorGroup:
or by returning an action dict from the failure_callback
return {'fail':0}
in increasing precedence.
-
+
If all three of these were done, the net result would be:
{'increment': 0, # set in method
'increment_master': 1, # class default
@@ -278,11 +278,11 @@ class MirrorGroup:
# methods, they will be stripped before getting passed on to the
# grabber
options = ['default_action', 'failure_callback']
-
+
def _process_kwargs(self, kwargs):
self.failure_callback = kwargs.get('failure_callback')
self.default_action = kwargs.get('default_action')
-
+
def _parse_mirrors(self, mirrors):
parsed_mirrors = []
for m in mirrors:
@@ -290,7 +290,7 @@ class MirrorGroup:
m = {'mirror': _to_utf8(m)}
parsed_mirrors.append(m)
return parsed_mirrors
-
+
def _load_gr(self, gr):
# OVERRIDE IDEAS:
# shuffle gr list
@@ -351,7 +351,7 @@ class MirrorGroup:
urlopen, there's no good way for the mirror group to know that
an error occurs mid-download (it's already returned and given
you the file object).
-
+
remove --- can have several values
0 do not remove the mirror from the list
1 remove the mirror for this download only
@@ -373,7 +373,7 @@ class MirrorGroup:
self._next += 1
if self._next >= len(self.mirrors): self._next = 0
self._lock.release()
-
+
if action.get('remove', 1):
del gr.mirrors[gr._next]
elif action.get('increment', 1):
@@ -398,7 +398,7 @@ class MirrorGroup:
return base_url + rel_url
else:
return base_url + '/' + rel_url
-
+
def _mirror_try(self, func, url, kw):
gr = GrabRequest()
gr.func = func
@@ -449,7 +449,7 @@ class MirrorGroup:
except URLGrabError as e:
obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs)
return _run_callback(kwargs.get('failfunc', _do_raise), obj)
-
+
def urlopen(self, url, **kwargs):
kw = dict(kwargs)
func = 'urlopen'
@@ -460,7 +460,7 @@ class MirrorGroup:
kw['limit'] = limit
func = 'urlread'
return self._mirror_try(func, url, kw)
-
+
class MGRandomStart(MirrorGroup):
"""A mirror group that starts at a random mirror in the list.
diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
index 9b77c54..9359f16 100644
--- a/urlgrabber/progress.py
+++ b/urlgrabber/progress.py
@@ -9,9 +9,9 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
@@ -107,7 +107,7 @@ class BaseMeter:
self.last_amount_read = 0
self.last_update_time = None
self.re = RateEstimator()
-
+
def start(self, filename=None, url=None, basename=None,
size=None, now=None, text=None):
self.filename = filename
@@ -125,7 +125,7 @@ class BaseMeter:
self.last_amount_read = 0
self.last_update_time = now
self._do_start(now)
-
+
def _do_start(self, now=None):
pass
@@ -152,7 +152,7 @@ class BaseMeter:
def _do_end(self, amount_read, now=None):
pass
-
+
# This is kind of a hack, but progress is gotten from grabber which doesn't
# know about the total size to download. So we do this so we can get the data
# out of band here. This will be "fixed" one way or anther soon.
@@ -167,7 +167,7 @@ def text_meter_total_size(size, downloaded=0):
#
# update: No size (minimal: 17 chars)
# -----------------------------------
-# <text> <rate> | <current size> <elapsed time>
+# <text> <rate> | <current size> <elapsed time>
# 8-48 1 8 3 6 1 9 5
#
# Order: 1. <text>+<current size> (17)
@@ -202,7 +202,7 @@ def text_meter_total_size(size, downloaded=0):
#
# end
# ---
-# <text> | <current size> <elapsed time>
+# <text> | <current size> <elapsed time>
# 8-56 3 6 1 9 5
#
# Order: 1. <text> ( 8)
@@ -360,7 +360,7 @@ class MultiFileMeter:
else:
self._lock = _FakeLock()
self.update_period = 0.3 # seconds
-
+
self.numfiles = None
self.finished_files = 0
self.failed_files = 0
@@ -393,7 +393,7 @@ class MultiFileMeter:
if now is None: now = time.time()
self.re.update(self._amount_read(), now)
self._do_end(now)
-
+
def _do_end(self, now):
pass
@@ -406,10 +406,10 @@ class MultiFileMeter:
newmeter = self.helperclass(self)
self.meters.append(newmeter)
return newmeter
-
+
def removeMeter(self, meter):
self.meters.remove(meter)
-
+
###########################################################
# child functions - these should only be called by helpers
def start_meter(self, meter, now):
@@ -423,10 +423,10 @@ class MultiFileMeter:
finally:
self._lock.release()
self._do_start_meter(meter, now)
-
+
def _do_start_meter(self, meter, now):
pass
-
+
def update_meter(self, meter, now):
if not meter in self.meters:
raise ValueError('attempt to use orphaned meter')
@@ -507,7 +507,7 @@ class TextMultiFileMeter(MultiFileMeter):
# 8-22 1 3-4 1 6-12 1 8 3 6 1 7-9 1 3 1
# end
# ---
-# <text> | <file size> <file elapsed time>
+# <text> | <file size> <file elapsed time>
# 8-56 3 6 1 9 5
def _do_update_meter(self, meter, now):
self._lock.acquire()
@@ -622,7 +622,7 @@ class TextMultiFileMeter(MultiFileMeter):
pass
finally:
self._lock.release()
-
+
######################################################################
# support classes and functions
@@ -637,7 +637,7 @@ class RateEstimator:
self.last_update_time = now
self.last_amount_read = 0
self.ave_rate = None
-
+
def update(self, amount_read, now=None):
if now is None: now = time.time()
# libcurl calls the progress callback when fetching headers
@@ -661,7 +661,7 @@ class RateEstimator:
time_diff, read_diff, self.ave_rate, self.timescale)
self.last_amount_read = amount_read
#print 'results', time_diff, read_diff, self.ave_rate
-
+
#####################################################################
# result methods
def average_rate(self):
@@ -697,14 +697,14 @@ class RateEstimator:
epsilon = time_diff / timescale
if epsilon > 1: epsilon = 1.0
return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
-
+
def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
"""perform a "rolling average" iteration
a rolling average "folds" new data into an existing average with
some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
a value of 0.0 means only the old value (initial value) counts,
and a value of 1.0 means only the newest value is considered."""
-
+
try:
recent_rate = read_diff / time_diff
except ZeroDivisionError:
@@ -733,7 +733,7 @@ class RateEstimator:
rt = int(rt)
if shift <= 0: return rt
return float(int(rt) >> shift << shift)
-
+
def format_time(seconds, use_hours=0):
if seconds is None or seconds < 0:
@@ -751,7 +751,7 @@ def format_time(seconds, use_hours=0):
return '%02i:%02i:%02i' % (hours, minutes, seconds)
else:
return '%02i:%02i' % (minutes, seconds)
-
+
def format_number(number, SI=0, space=' '):
"""Turn numbers into human-readable metric-like numbers"""
symbols = ['', # (none)
@@ -763,14 +763,14 @@ def format_number(number, SI=0, space=' '):
'E', # exa
'Z', # zetta
'Y'] # yotta
-
+
if SI: step = 1000.0
else: step = 1024.0
thresh = 999
depth = 0
max_depth = len(symbols) - 1
-
+
# we want numbers between 0 and thresh, but don't exceed the length
# of our list. In that event, the formatting will be screwed up,
# but it'll still show the right number.
@@ -788,7 +788,7 @@ def format_number(number, SI=0, space=' '):
format = '%.1f%s%s'
else:
format = '%.0f%s%s'
-
+
return(format % (float(number or 0), space, symbols[depth]))
def _tst(fn, cur, tot, beg, size, *args):
@@ -850,8 +850,8 @@ def _mtst(datas, *args):
assert not tm.meters
if __name__ == "__main__":
- # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28
- # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08
+ # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28
+ # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08
if len(sys.argv) >= 2 and sys.argv[1] == 'multi':
_mtst((("sm-1.0.0-1.fc8.i386.rpm", 1000),
("s-1.0.1-1.fc8.i386.rpm", 5000),