Cleaned up examples.

author: Markus F.X.J. Oberhumer <markus@oberhumer.com> 2005-02-13 08:28:01 +0000
committer: Markus F.X.J. Oberhumer <markus@oberhumer.com> 2005-02-13 08:28:01 +0000
commit: 8dba3b225c22379c82612a0bbf378eb123526482 (patch)
tree: 3e753920da8252477ecd39baf9dd1e3ed5901855 /examples
parent: 940ab47b86e96833f56e8bab5454bf10f4f4e8bf (diff)
download: pycurl-8dba3b225c22379c82612a0bbf378eb123526482.tar.gz
3 files changed, 66 insertions, 54 deletions
diff --git a/examples/file_upload.py b/examples/file_upload.py
index 1ea9c9f..286b47a 100644
--- a/examples/file_upload.py
+++ b/examples/file_upload.py
@@ -3,26 +3,22 @@
 # vi:ts=4:et
 # $Id$
 
+import os, sys
 import pycurl
-import sys
-import os.path
 
 # Class which holds a file reference and the read callback
-class filereader:
-
-    def __init__(self, f):
-        self.f = f
-
+class FileReader:
+    def __init__(self, fp):
+        self.fp = fp
     def read_callback(self, size):
-        return self.f.read(size)
+        return self.fp.read(size)
 
 # Check commandline arguments
 if len(sys.argv) < 3:
     print "Usage: %s <url> <file to upload>" % sys.argv[0]
     raise SystemExit
-else:
-    url = sys.argv[1]
-    filename = sys.argv[2]
+url = sys.argv[1]
+filename = sys.argv[2]
 
 if not os.path.exists(filename):
     print "Error: the file '%s' does not exist" % filename
@@ -36,17 +32,14 @@ c.setopt(pycurl.UPLOAD, 1)
 # Two versions with the same semantics here, but the filereader version
 # is useful when you have to process the data which is read before returning
 if 1:
-    c.setopt(pycurl.READFUNCTION, filereader(open(filename, 'rb')).read_callback)
+    c.setopt(pycurl.READFUNCTION, FileReader(open(filename, 'rb')).read_callback)
 else:
     c.setopt(pycurl.READFUNCTION, open(filename, 'rb').read)
 
-# Set size of file to be uploaded, use LARGE option if file size is
-# greater than 2GB
+# Set size of file to be uploaded, we use xxx_LARGE option in case that
+# file size is greater than 2GB
 filesize = os.path.getsize(filename)
-if filesize > 2**31:
-    c.setopt(pycurl.INFILESIZE_LARGE, filesize)
-else:
-    c.setopt(pycurl.INFILESIZE, filesize)
+c.setopt(pycurl.INFILESIZE_LARGE, filesize)
 
 # Start transfer
 print 'Uploading file %s to url %s' % (filename, url)
diff --git a/examples/retriever-multi.py b/examples/retriever-multi.py
index a30c41c..2ec3f9a 100644
--- a/examples/retriever-multi.py
+++ b/examples/retriever-multi.py
@@ -8,8 +8,9 @@
 #          concurrent connections>]
 #
 
-import string, sys
+import sys
 import pycurl
+
 # We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
 # the libcurl tutorial for more info.
 try:
@@ -23,7 +24,10 @@ except ImportError:
 # Get args
 num_conn = 10
 try:
-    urls = open(sys.argv[1]).readlines()
+    if sys.argv[1] == "-":
+        urls = sys.stdin.readlines()
+    else:
+        urls = open(sys.argv[1]).readlines()
     if len(sys.argv) >= 3:
         num_conn = int(sys.argv[2])
 except:
@@ -33,15 +37,12 @@ except:
 
 # Make a queue with (url, filename) tuples
 queue = []
-fileno = 1
 for url in urls:
-    url = string.strip(url)
+    url = url.strip()
     if not url or url[0] == "#":
         continue
-    filename = "doc_%d" % (fileno)
+    filename = "doc_%03d.dat" % (len(queue) + 1)
     queue.append((url, filename))
-    fileno = fileno + 1
-del fileno, url, urls
 
 
 # Check args
@@ -53,7 +54,7 @@ print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBC
 print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
 
 
-# Preallocate a list of curl objects
+# Pre-allocate a list of curl objects
 m = pycurl.CurlMulti()
 m.handles = []
 for i in range(num_conn):
@@ -119,6 +120,3 @@ for c in m.handles:
     c.close()
 m.close()
 
-# Delete objects (just for testing the refcounts)
-del c, m, freelist, queue
-
diff --git a/examples/retriever.py b/examples/retriever.py
index a53e559..2c91d07 100644
--- a/examples/retriever.py
+++ b/examples/retriever.py
@@ -3,6 +3,11 @@
 # vi:ts=4:et
 # $Id$
 
+#
+# Usage: python retriever.py <file with URLs to fetch> [<# of
+#          concurrent connections>]
+#
+
 import sys, threading, Queue
 import pycurl
 
@@ -16,6 +21,39 @@ except ImportError:
     pass
 
 
+# Get args
+num_conn = 10
+try:
+    if sys.argv[1] == "-":
+        urls = sys.stdin.readlines()
+    else:
+        urls = open(sys.argv[1]).readlines()
+    if len(sys.argv) >= 3:
+        num_conn = int(sys.argv[2])
+except:
+    print "Usage: %s <file with URLs to fetch> [<# of concurrent connections>]" % sys.argv[0]
+    raise SystemExit
+
+
+# Make a queue with (url, filename) tuples
+queue = Queue.Queue()
+for url in urls:
+    url = url.strip()
+    if not url or url[0] == "#":
+        continue
+    filename = "doc_%03d.dat" % (len(queue.queue) + 1)
+    queue.put((url, filename))
+
+
+# Check args
+assert queue.queue, "no URLs given"
+num_urls = len(queue.queue)
+num_conn = min(num_conn, num_urls)
+assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
+print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
+print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
+
+
 class WorkerThread(threading.Thread):
     def __init__(self, queue):
         threading.Thread.__init__(self)
@@ -27,15 +65,15 @@ class WorkerThread(threading.Thread):
                 url, filename = self.queue.get_nowait()
             except Queue.Empty:
                 raise SystemExit
-            f = open(filename, "wb")
+            fp = open(filename, "wb")
             curl = pycurl.Curl()
+            curl.setopt(pycurl.URL, url)
             curl.setopt(pycurl.FOLLOWLOCATION, 1)
             curl.setopt(pycurl.MAXREDIRS, 5)
-            curl.setopt(pycurl.URL, url)
-            curl.setopt(pycurl.WRITEDATA, f)
-            curl.setopt(pycurl.NOSIGNAL, 1)
             curl.setopt(pycurl.CONNECTTIMEOUT, 30)
             curl.setopt(pycurl.TIMEOUT, 300)
+            curl.setopt(pycurl.NOSIGNAL, 1)
+            curl.setopt(pycurl.WRITEDATA, fp)
             try:
                 curl.perform()
             except:
@@ -43,36 +81,19 @@ class WorkerThread(threading.Thread):
                 traceback.print_exc(file=sys.stderr)
                 sys.stderr.flush()
             curl.close()
-            f.close()
+            fp.close()
             sys.stdout.write(".")
             sys.stdout.flush()
 
-# Read list of URLs from file specified on commandline
-try:
-    urls = open(sys.argv[1]).readlines()
-    num_workers = int(sys.argv[2])
-except:
-    # File or number of workers was not specified, show usage string
-    print "Usage: %s <file with URLs to fetch> <number of worker threads>" % sys.argv[0]
-    raise SystemExit
-
-# Initialize thread array and the file number used to store documents
-threads = []
-fileno = 0
-queue = Queue.Queue()
-
-# Fill the work input queue with URLs
-for url in urls:
-    fileno = fileno + 1
-    filename = "doc_%d" % (fileno,)
-    queue.put((url, filename))
 
 # Start a bunch of threads
-for num_threads in range(num_workers):
+threads = []
+for dummy in range(num_conn):
     t = WorkerThread(queue)
     t.start()
     threads.append(t)
 
+
 # Wait for all threads to finish
 for thread in threads:
     thread.join()
author	Markus F.X.J. Oberhumer <markus@oberhumer.com>	2005-02-13 08:28:01 +0000
committer	Markus F.X.J. Oberhumer <markus@oberhumer.com>	2005-02-13 08:28:01 +0000
commit	8dba3b225c22379c82612a0bbf378eb123526482 (patch)
tree	3e753920da8252477ecd39baf9dd1e3ed5901855 /examples
parent	940ab47b86e96833f56e8bab5454bf10f4f4e8bf (diff)
download	pycurl-8dba3b225c22379c82612a0bbf378eb123526482.tar.gz