diff options
Diffstat (limited to 'python3/crawl.py')
-rw-r--r-- | python3/crawl.py | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/python3/crawl.py b/python3/crawl.py index 10e35c3..46b8f7f 100644 --- a/python3/crawl.py +++ b/python3/crawl.py @@ -1,3 +1,5 @@ +"""Compare the speed of downloading URLs sequentially vs. using futures.""" + import datetime import functools import futures.thread @@ -11,7 +13,11 @@ URLS = ['http://www.google.com/', 'http://www.thisurlprobablydoesnotexist.com', 'http://www.slashdot.org/', 'http://www.python.org/', - 'http://www.sweetapp.com/'] * 5 + 'http://www.bing.com/', + 'http://www.facebook.com/', + 'http://www.yahoo.com/', + 'http://www.youtube.com/', + 'http://www.blogger.com/'] def load_url(url, timeout): return urllib.request.urlopen(url, timeout=timeout).read() @@ -41,17 +47,15 @@ def download_urls_with_executor(urls, executor, timeout=60): def main(): for name, fn in [('sequential', functools.partial(download_urls_sequential, URLS)), - ('processes', - functools.partial(download_urls_with_executor, - URLS, - futures.ProcessPoolExecutor(10))), ('threads', functools.partial(download_urls_with_executor, URLS, futures.ThreadPoolExecutor(10)))]: print('%s: ' % name.ljust(12), end='') start = time.time() - fn() - print('%.2f seconds' % (time.time() - start)) + url_map = fn() + print('%.2f seconds (%d of %d downloaded)' % (time.time() - start, + len(url_map), + len(URLS))) main() |