"""Compare the speed of downloading URLs sequentially vs. using futures.""" import functools import time import timeit import sys try: from urllib2 import urlopen except ImportError: from urllib.request import urlopen from concurrent.futures import (as_completed, ThreadPoolExecutor, ProcessPoolExecutor) URLS = ['http://www.google.com/', 'http://www.apple.com/', 'http://www.ibm.com', 'http://www.thisurlprobablydoesnotexist.com', 'http://www.slashdot.org/', 'http://www.python.org/', 'http://www.bing.com/', 'http://www.facebook.com/', 'http://www.yahoo.com/', 'http://www.youtube.com/', 'http://www.blogger.com/'] def load_url(url, timeout): kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {} return urlopen(url, **kwargs).read() def download_urls_sequential(urls, timeout=60): url_to_content = {} for url in urls: try: url_to_content[url] = load_url(url, timeout=timeout) except: pass return url_to_content def download_urls_with_executor(urls, executor, timeout=60): try: url_to_content = {} future_to_url = dict((executor.submit(load_url, url, timeout), url) for url in urls) for future in as_completed(future_to_url): try: url_to_content[future_to_url[future]] = future.result() except: pass return url_to_content finally: executor.shutdown() def main(): for name, fn in [('sequential', functools.partial(download_urls_sequential, URLS)), ('processes', functools.partial(download_urls_with_executor, URLS, ProcessPoolExecutor(10))), ('threads', functools.partial(download_urls_with_executor, URLS, ThreadPoolExecutor(10)))]: sys.stdout.write('%s: ' % name.ljust(12)) start = time.time() url_map = fn() sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' % (time.time() - start, len(url_map), len(URLS))) if __name__ == '__main__': main()