From 35a66ee20837957a6247602fbc653e0857c0b531 Mon Sep 17 00:00:00 2001 From: "brian.quinlan" Date: Fri, 2 Oct 2009 05:29:54 +0000 Subject: Remove use of multiprocess executor in crawl example. Also generate a warning if a ProcessPoolExecutor is used. --- python2/crawl.py | 31 +++++++++++++++++-------------- python2/futures/process.py | 3 +++ python3/crawl.py | 18 +++++++++++------- python3/futures/process.py | 3 +++ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/python2/crawl.py b/python2/crawl.py index a597c76..88d7c9e 100644 --- a/python2/crawl.py +++ b/python2/crawl.py @@ -1,3 +1,5 @@ +"""Compare the speed of downloading URLs sequentially vs. using futures.""" + import datetime import functools import futures.thread @@ -11,26 +13,29 @@ URLS = ['http://www.google.com/', 'http://www.thisurlprobablydoesnotexist.com', 'http://www.slashdot.org/', 'http://www.python.org/', - 'http://www.sweetapp.com/'] * 5 + 'http://www.bing.com/', + 'http://www.facebook.com/', + 'http://www.yahoo.com/', + 'http://www.youtube.com/', + 'http://www.blogger.com/'] -def load_url(url, timeout): - return urllib2.urlopen(url, timeout=timeout).read() +def load_url(url): + return urllib2.urlopen(url).read() -def download_urls_sequential(urls, timeout=60): +def download_urls_sequential(urls): url_to_content = {} for url in urls: try: - url_to_content[url] = load_url(url, timeout=timeout) + url_to_content[url] = load_url(url) except: pass return url_to_content -def download_urls_with_executor(urls, executor, timeout=60): +def download_urls_with_executor(urls, executor): try: url_to_content = {} fs = executor.run_to_futures( - (functools.partial(load_url, url, timeout) for url in urls), - timeout=timeout) + (functools.partial(load_url, url) for url in urls)) for future in fs.successful_futures(): url = urls[future.index] url_to_content[url] = future.result() @@ -41,17 +46,15 @@ def download_urls_with_executor(urls, executor, timeout=60): def main(): for name, fn in [('sequential', functools.partial(download_urls_sequential, URLS)), - ('processes', - functools.partial(download_urls_with_executor, - URLS, - futures.ProcessPoolExecutor(10))), ('threads', functools.partial(download_urls_with_executor, URLS, futures.ThreadPoolExecutor(10)))]: print '%s: ' % name.ljust(12), start = time.time() - fn() - print '%.2f seconds' % (time.time() - start) + url_map = fn() + print '%.2f seconds (%d of %d downloaded)' % (time.time() - start, + len(url_map), + len(URLS)) main() diff --git a/python2/futures/process.py b/python2/futures/process.py index 03deb60..b96203b 100644 --- a/python2/futures/process.py +++ b/python2/futures/process.py @@ -119,6 +119,9 @@ def _result(executor_reference, class ProcessPoolExecutor(Executor): def __init__(self, max_processes=None): + import warnings + warnings.warn('ProcessPoolExecutor has known deadlocking behavior') + if max_processes is None: max_processes = multiprocessing.cpu_count() diff --git a/python3/crawl.py b/python3/crawl.py index 10e35c3..46b8f7f 100644 --- a/python3/crawl.py +++ b/python3/crawl.py @@ -1,3 +1,5 @@ +"""Compare the speed of downloading URLs sequentially vs. using futures.""" + import datetime import functools import futures.thread @@ -11,7 +13,11 @@ URLS = ['http://www.google.com/', 'http://www.thisurlprobablydoesnotexist.com', 'http://www.slashdot.org/', 'http://www.python.org/', - 'http://www.sweetapp.com/'] * 5 + 'http://www.bing.com/', + 'http://www.facebook.com/', + 'http://www.yahoo.com/', + 'http://www.youtube.com/', + 'http://www.blogger.com/'] def load_url(url, timeout): return urllib.request.urlopen(url, timeout=timeout).read() @@ -41,17 +47,15 @@ def download_urls_with_executor(urls, executor, timeout=60): def main(): for name, fn in [('sequential', functools.partial(download_urls_sequential, URLS)), - ('processes', - functools.partial(download_urls_with_executor, - URLS, - futures.ProcessPoolExecutor(10))), ('threads', functools.partial(download_urls_with_executor, URLS, futures.ThreadPoolExecutor(10)))]: print('%s: ' % name.ljust(12), end='') start = time.time() - fn() - print('%.2f seconds' % (time.time() - start)) + url_map = fn() + print('%.2f seconds (%d of %d downloaded)' % (time.time() - start, + len(url_map), + len(URLS))) main() diff --git a/python3/futures/process.py b/python3/futures/process.py index 94d7988..71dd602 100644 --- a/python3/futures/process.py +++ b/python3/futures/process.py @@ -13,6 +13,7 @@ import atexit import queue import multiprocessing import threading +import warnings import weakref _thread_references = set() @@ -119,6 +120,8 @@ def _result(executor_reference, class ProcessPoolExecutor(Executor): def __init__(self, max_processes=None): + warnings.warn('ProcessPoolExecutor has known deadlocking behavior') + if max_processes is None: max_processes = multiprocessing.cpu_count() -- cgit v1.2.1