summaryrefslogtreecommitdiff
path: root/python3/crawl.py
diff options
context:
space:
mode:
Diffstat (limited to 'python3/crawl.py')
-rw-r--r--python3/crawl.py18
1 files changed, 11 insertions, 7 deletions
diff --git a/python3/crawl.py b/python3/crawl.py
index 10e35c3..46b8f7f 100644
--- a/python3/crawl.py
+++ b/python3/crawl.py
@@ -1,3 +1,5 @@
+"""Compare the speed of downloading URLs sequentially vs. using futures."""
+
import datetime
import functools
import futures.thread
@@ -11,7 +13,11 @@ URLS = ['http://www.google.com/',
'http://www.thisurlprobablydoesnotexist.com',
'http://www.slashdot.org/',
'http://www.python.org/',
- 'http://www.sweetapp.com/'] * 5
+ 'http://www.bing.com/',
+ 'http://www.facebook.com/',
+ 'http://www.yahoo.com/',
+ 'http://www.youtube.com/',
+ 'http://www.blogger.com/']
def load_url(url, timeout):
return urllib.request.urlopen(url, timeout=timeout).read()
@@ -41,17 +47,15 @@ def download_urls_with_executor(urls, executor, timeout=60):
def main():
for name, fn in [('sequential',
functools.partial(download_urls_sequential, URLS)),
- ('processes',
- functools.partial(download_urls_with_executor,
- URLS,
- futures.ProcessPoolExecutor(10))),
('threads',
functools.partial(download_urls_with_executor,
URLS,
futures.ThreadPoolExecutor(10)))]:
print('%s: ' % name.ljust(12), end='')
start = time.time()
- fn()
- print('%.2f seconds' % (time.time() - start))
+ url_map = fn()
+ print('%.2f seconds (%d of %d downloaded)' % (time.time() - start,
+ len(url_map),
+ len(URLS)))
main()