Remove use of multiprocess executor in crawl example. Also generate a warning if a ProcessPoolExecutor is used.

author: brian.quinlan <devnull@localhost> 2009-10-02 05:29:54 +0000
committer: brian.quinlan <devnull@localhost> 2009-10-02 05:29:54 +0000
commit: 35a66ee20837957a6247602fbc653e0857c0b531 (patch)
tree: 1d3d0d07c45c6a4c35fd589fc90a6124f60cc56a
parent: 302fdf3ce110469902de4d8e84557f20356ef312 (diff)
download: futures-35a66ee20837957a6247602fbc653e0857c0b531.tar.gz
4 files changed, 34 insertions, 21 deletions
diff --git a/python2/crawl.py b/python2/crawl.py
index a597c76..88d7c9e 100644
--- a/python2/crawl.py
+++ b/python2/crawl.py
@@ -1,3 +1,5 @@
+"""Compare the speed of downloading URLs sequentially vs. using futures."""
+
 import datetime
 import functools
 import futures.thread
@@ -11,26 +13,29 @@ URLS = ['http://www.google.com/',
         'http://www.thisurlprobablydoesnotexist.com',
         'http://www.slashdot.org/',
         'http://www.python.org/',
-        'http://www.sweetapp.com/'] * 5
+        'http://www.bing.com/',
+        'http://www.facebook.com/',
+        'http://www.yahoo.com/',
+        'http://www.youtube.com/',
+        'http://www.blogger.com/']
 
-def load_url(url, timeout):
-    return urllib2.urlopen(url, timeout=timeout).read()
+def load_url(url):
+    return urllib2.urlopen(url).read()
 
-def download_urls_sequential(urls, timeout=60):
+def download_urls_sequential(urls):
     url_to_content = {}
     for url in urls:
         try:
-            url_to_content[url] = load_url(url, timeout=timeout)
+            url_to_content[url] = load_url(url)
         except:
             pass
     return url_to_content
 
-def download_urls_with_executor(urls, executor, timeout=60):
+def download_urls_with_executor(urls, executor):
     try:
         url_to_content = {}
         fs = executor.run_to_futures(
-                (functools.partial(load_url, url, timeout) for url in urls),
-                timeout=timeout)
+                (functools.partial(load_url, url) for url in urls))
         for future in fs.successful_futures():
             url = urls[future.index]
             url_to_content[url] = future.result()
@@ -41,17 +46,15 @@ def download_urls_with_executor(urls, executor, timeout=60):
 def main():
     for name, fn in [('sequential',
                       functools.partial(download_urls_sequential, URLS)),
-                     ('processes',
-                      functools.partial(download_urls_with_executor,
-                                        URLS,
-                                        futures.ProcessPoolExecutor(10))),
                      ('threads',
                       functools.partial(download_urls_with_executor,
                                         URLS,
                                         futures.ThreadPoolExecutor(10)))]:
         print '%s: ' % name.ljust(12),
         start = time.time()
-        fn()
-        print '%.2f seconds' % (time.time() - start)
+        url_map = fn()
+        print '%.2f seconds (%d of %d downloaded)' % (time.time() - start,
+                                                     len(url_map),
+                                                     len(URLS))
 
 main()
diff --git a/python2/futures/process.py b/python2/futures/process.py
index 03deb60..b96203b 100644
--- a/python2/futures/process.py
+++ b/python2/futures/process.py
@@ -119,6 +119,9 @@ def _result(executor_reference,
 
 class ProcessPoolExecutor(Executor):
     def __init__(self, max_processes=None):
+        import warnings
+        warnings.warn('ProcessPoolExecutor has known deadlocking behavior')
+
         if max_processes is None:
             max_processes = multiprocessing.cpu_count()
 
diff --git a/python3/crawl.py b/python3/crawl.py
index 10e35c3..46b8f7f 100644
--- a/python3/crawl.py
+++ b/python3/crawl.py
@@ -1,3 +1,5 @@
+"""Compare the speed of downloading URLs sequentially vs. using futures."""
+
 import datetime
 import functools
 import futures.thread
@@ -11,7 +13,11 @@ URLS = ['http://www.google.com/',
         'http://www.thisurlprobablydoesnotexist.com',
         'http://www.slashdot.org/',
         'http://www.python.org/',
-        'http://www.sweetapp.com/'] * 5
+        'http://www.bing.com/',
+        'http://www.facebook.com/',
+        'http://www.yahoo.com/',
+        'http://www.youtube.com/',
+        'http://www.blogger.com/']
 
 def load_url(url, timeout):
     return urllib.request.urlopen(url, timeout=timeout).read()
@@ -41,17 +47,15 @@ def download_urls_with_executor(urls, executor, timeout=60):
 def main():
     for name, fn in [('sequential',
                       functools.partial(download_urls_sequential, URLS)),
-                     ('processes',
-                      functools.partial(download_urls_with_executor,
-                                        URLS,
-                                        futures.ProcessPoolExecutor(10))),
                      ('threads',
                       functools.partial(download_urls_with_executor,
                                         URLS,
                                         futures.ThreadPoolExecutor(10)))]:
         print('%s: ' % name.ljust(12), end='')
         start = time.time()
-        fn()
-        print('%.2f seconds' % (time.time() - start))
+        url_map = fn()
+        print('%.2f seconds (%d of %d downloaded)' % (time.time() - start,
+                                                      len(url_map),
+                                                      len(URLS)))
 
 main()
diff --git a/python3/futures/process.py b/python3/futures/process.py
index 94d7988..71dd602 100644
--- a/python3/futures/process.py
+++ b/python3/futures/process.py
@@ -13,6 +13,7 @@ import atexit
 import queue
 import multiprocessing
 import threading
+import warnings
 import weakref
 
 _thread_references = set()
@@ -119,6 +120,8 @@ def _result(executor_reference,
 
 class ProcessPoolExecutor(Executor):
     def __init__(self, max_processes=None):
+        warnings.warn('ProcessPoolExecutor has known deadlocking behavior')
+
         if max_processes is None:
             max_processes = multiprocessing.cpu_count()
author	brian.quinlan <devnull@localhost>	2009-10-02 05:29:54 +0000
committer	brian.quinlan <devnull@localhost>	2009-10-02 05:29:54 +0000
commit	35a66ee20837957a6247602fbc653e0857c0b531 (patch)
tree	1d3d0d07c45c6a4c35fd589fc90a6124f60cc56a
parent	302fdf3ce110469902de4d8e84557f20356ef312 (diff)
download	futures-35a66ee20837957a6247602fbc653e0857c0b531.tar.gz