summaryrefslogtreecommitdiff
path: root/python3/crawl.py
diff options
context:
space:
mode:
Diffstat (limited to 'python3/crawl.py')
-rw-r--r--python3/crawl.py68
1 files changed, 0 insertions, 68 deletions
diff --git a/python3/crawl.py b/python3/crawl.py
deleted file mode 100644
index 7135682..0000000
--- a/python3/crawl.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""Compare the speed of downloading URLs sequentially vs. using futures."""
-
-import datetime
-import functools
-import futures.thread
-import time
-import timeit
-import urllib.request
-
-URLS = ['http://www.google.com/',
- 'http://www.apple.com/',
- 'http://www.ibm.com',
- 'http://www.thisurlprobablydoesnotexist.com',
- 'http://www.slashdot.org/',
- 'http://www.python.org/',
- 'http://www.bing.com/',
- 'http://www.facebook.com/',
- 'http://www.yahoo.com/',
- 'http://www.youtube.com/',
- 'http://www.blogger.com/']
-
-def load_url(url, timeout):
- return urllib.request.urlopen(url, timeout=timeout).read()
-
-def download_urls_sequential(urls, timeout=60):
- url_to_content = {}
- for url in urls:
- try:
- url_to_content[url] = load_url(url, timeout=timeout)
- except:
- pass
- return url_to_content
-
-def download_urls_with_executor(urls, executor, timeout=60):
- try:
- url_to_content = {}
- future_to_url = dict((executor.submit(load_url, url, timeout), url)
- for url in urls)
-
- for future in futures.as_completed(future_to_url):
- try:
- url_to_content[future_to_url[future]] = future.result()
- except:
- pass
- return url_to_content
- finally:
- executor.shutdown()
-
-def main():
- for name, fn in [('sequential',
- functools.partial(download_urls_sequential, URLS)),
- ('processes',
- functools.partial(download_urls_with_executor,
- URLS,
- futures.ProcessPoolExecutor(10))),
- ('threads',
- functools.partial(download_urls_with_executor,
- URLS,
- futures.ThreadPoolExecutor(10)))]:
- print('%s: ' % name.ljust(12), end='')
- start = time.time()
- url_map = fn()
- print('%.2f seconds (%d of %d downloaded)' % (time.time() - start,
- len(url_map),
- len(URLS)))
-
-if __name__ == '__main__':
- main()