summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbrian.quinlan <devnull@localhost>2009-10-02 05:29:54 +0000
committerbrian.quinlan <devnull@localhost>2009-10-02 05:29:54 +0000
commit35a66ee20837957a6247602fbc653e0857c0b531 (patch)
tree1d3d0d07c45c6a4c35fd589fc90a6124f60cc56a
parent302fdf3ce110469902de4d8e84557f20356ef312 (diff)
downloadfutures-35a66ee20837957a6247602fbc653e0857c0b531.tar.gz
Remove use of multiprocess executor in crawl example. Also generate a warning if a ProcessPoolExecutor is used.
-rw-r--r--python2/crawl.py31
-rw-r--r--python2/futures/process.py3
-rw-r--r--python3/crawl.py18
-rw-r--r--python3/futures/process.py3
4 files changed, 34 insertions, 21 deletions
diff --git a/python2/crawl.py b/python2/crawl.py
index a597c76..88d7c9e 100644
--- a/python2/crawl.py
+++ b/python2/crawl.py
@@ -1,3 +1,5 @@
+"""Compare the speed of downloading URLs sequentially vs. using futures."""
+
import datetime
import functools
import futures.thread
@@ -11,26 +13,29 @@ URLS = ['http://www.google.com/',
'http://www.thisurlprobablydoesnotexist.com',
'http://www.slashdot.org/',
'http://www.python.org/',
- 'http://www.sweetapp.com/'] * 5
+ 'http://www.bing.com/',
+ 'http://www.facebook.com/',
+ 'http://www.yahoo.com/',
+ 'http://www.youtube.com/',
+ 'http://www.blogger.com/']
-def load_url(url, timeout):
- return urllib2.urlopen(url, timeout=timeout).read()
+def load_url(url):
+ return urllib2.urlopen(url).read()
-def download_urls_sequential(urls, timeout=60):
+def download_urls_sequential(urls):
url_to_content = {}
for url in urls:
try:
- url_to_content[url] = load_url(url, timeout=timeout)
+ url_to_content[url] = load_url(url)
except:
pass
return url_to_content
-def download_urls_with_executor(urls, executor, timeout=60):
+def download_urls_with_executor(urls, executor):
try:
url_to_content = {}
fs = executor.run_to_futures(
- (functools.partial(load_url, url, timeout) for url in urls),
- timeout=timeout)
+ (functools.partial(load_url, url) for url in urls))
for future in fs.successful_futures():
url = urls[future.index]
url_to_content[url] = future.result()
@@ -41,17 +46,15 @@ def download_urls_with_executor(urls, executor, timeout=60):
def main():
for name, fn in [('sequential',
functools.partial(download_urls_sequential, URLS)),
- ('processes',
- functools.partial(download_urls_with_executor,
- URLS,
- futures.ProcessPoolExecutor(10))),
('threads',
functools.partial(download_urls_with_executor,
URLS,
futures.ThreadPoolExecutor(10)))]:
print '%s: ' % name.ljust(12),
start = time.time()
- fn()
- print '%.2f seconds' % (time.time() - start)
+ url_map = fn()
+ print '%.2f seconds (%d of %d downloaded)' % (time.time() - start,
+ len(url_map),
+ len(URLS))
main()
diff --git a/python2/futures/process.py b/python2/futures/process.py
index 03deb60..b96203b 100644
--- a/python2/futures/process.py
+++ b/python2/futures/process.py
@@ -119,6 +119,9 @@ def _result(executor_reference,
class ProcessPoolExecutor(Executor):
def __init__(self, max_processes=None):
+ import warnings
+ warnings.warn('ProcessPoolExecutor has known deadlocking behavior')
+
if max_processes is None:
max_processes = multiprocessing.cpu_count()
diff --git a/python3/crawl.py b/python3/crawl.py
index 10e35c3..46b8f7f 100644
--- a/python3/crawl.py
+++ b/python3/crawl.py
@@ -1,3 +1,5 @@
+"""Compare the speed of downloading URLs sequentially vs. using futures."""
+
import datetime
import functools
import futures.thread
@@ -11,7 +13,11 @@ URLS = ['http://www.google.com/',
'http://www.thisurlprobablydoesnotexist.com',
'http://www.slashdot.org/',
'http://www.python.org/',
- 'http://www.sweetapp.com/'] * 5
+ 'http://www.bing.com/',
+ 'http://www.facebook.com/',
+ 'http://www.yahoo.com/',
+ 'http://www.youtube.com/',
+ 'http://www.blogger.com/']
def load_url(url, timeout):
return urllib.request.urlopen(url, timeout=timeout).read()
@@ -41,17 +47,15 @@ def download_urls_with_executor(urls, executor, timeout=60):
def main():
for name, fn in [('sequential',
functools.partial(download_urls_sequential, URLS)),
- ('processes',
- functools.partial(download_urls_with_executor,
- URLS,
- futures.ProcessPoolExecutor(10))),
('threads',
functools.partial(download_urls_with_executor,
URLS,
futures.ThreadPoolExecutor(10)))]:
print('%s: ' % name.ljust(12), end='')
start = time.time()
- fn()
- print('%.2f seconds' % (time.time() - start))
+ url_map = fn()
+ print('%.2f seconds (%d of %d downloaded)' % (time.time() - start,
+ len(url_map),
+ len(URLS)))
main()
diff --git a/python3/futures/process.py b/python3/futures/process.py
index 94d7988..71dd602 100644
--- a/python3/futures/process.py
+++ b/python3/futures/process.py
@@ -13,6 +13,7 @@ import atexit
import queue
import multiprocessing
import threading
+import warnings
import weakref
_thread_references = set()
@@ -119,6 +120,8 @@ def _result(executor_reference,
class ProcessPoolExecutor(Executor):
def __init__(self, max_processes=None):
+ warnings.warn('ProcessPoolExecutor has known deadlocking behavior')
+
if max_processes is None:
max_processes = multiprocessing.cpu_count()