diff options
author | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2005-02-13 08:28:01 +0000 |
---|---|---|
committer | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2005-02-13 08:28:01 +0000 |
commit | 8dba3b225c22379c82612a0bbf378eb123526482 (patch) | |
tree | 3e753920da8252477ecd39baf9dd1e3ed5901855 /examples | |
parent | 940ab47b86e96833f56e8bab5454bf10f4f4e8bf (diff) | |
download | pycurl-8dba3b225c22379c82612a0bbf378eb123526482.tar.gz |
Cleaned up examples.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/file_upload.py | 29 | ||||
-rw-r--r-- | examples/retriever-multi.py | 20 | ||||
-rw-r--r-- | examples/retriever.py | 71 |
3 files changed, 66 insertions, 54 deletions
diff --git a/examples/file_upload.py b/examples/file_upload.py index 1ea9c9f..286b47a 100644 --- a/examples/file_upload.py +++ b/examples/file_upload.py @@ -3,26 +3,22 @@ # vi:ts=4:et # $Id$ +import os, sys import pycurl -import sys -import os.path # Class which holds a file reference and the read callback -class filereader: - - def __init__(self, f): - self.f = f - +class FileReader: + def __init__(self, fp): + self.fp = fp def read_callback(self, size): - return self.f.read(size) + return self.fp.read(size) # Check commandline arguments if len(sys.argv) < 3: print "Usage: %s <url> <file to upload>" % sys.argv[0] raise SystemExit -else: - url = sys.argv[1] - filename = sys.argv[2] +url = sys.argv[1] +filename = sys.argv[2] if not os.path.exists(filename): print "Error: the file '%s' does not exist" % filename @@ -36,17 +32,14 @@ c.setopt(pycurl.UPLOAD, 1) # Two versions with the same semantics here, but the filereader version # is useful when you have to process the data which is read before returning if 1: - c.setopt(pycurl.READFUNCTION, filereader(open(filename, 'rb')).read_callback) + c.setopt(pycurl.READFUNCTION, FileReader(open(filename, 'rb')).read_callback) else: c.setopt(pycurl.READFUNCTION, open(filename, 'rb').read) -# Set size of file to be uploaded, use LARGE option if file size is -# greater than 2GB +# Set size of file to be uploaded, we use xxx_LARGE option in case that +# file size is greater than 2GB filesize = os.path.getsize(filename) -if filesize > 2**31: - c.setopt(pycurl.INFILESIZE_LARGE, filesize) -else: - c.setopt(pycurl.INFILESIZE, filesize) +c.setopt(pycurl.INFILESIZE_LARGE, filesize) # Start transfer print 'Uploading file %s to url %s' % (filename, url) diff --git a/examples/retriever-multi.py b/examples/retriever-multi.py index a30c41c..2ec3f9a 100644 --- a/examples/retriever-multi.py +++ b/examples/retriever-multi.py @@ -8,8 +8,9 @@ # concurrent connections>] # -import string, sys +import sys import pycurl + # We should ignore SIGPIPE when using pycurl.NOSIGNAL - see # the libcurl tutorial for more info. try: @@ -23,7 +24,10 @@ except ImportError: # Get args num_conn = 10 try: - urls = open(sys.argv[1]).readlines() + if sys.argv[1] == "-": + urls = sys.stdin.readlines() + else: + urls = open(sys.argv[1]).readlines() if len(sys.argv) >= 3: num_conn = int(sys.argv[2]) except: @@ -33,15 +37,12 @@ except: # Make a queue with (url, filename) tuples queue = [] -fileno = 1 for url in urls: - url = string.strip(url) + url = url.strip() if not url or url[0] == "#": continue - filename = "doc_%d" % (fileno) + filename = "doc_%03d.dat" % (len(queue) + 1) queue.append((url, filename)) - fileno = fileno + 1 -del fileno, url, urls # Check args @@ -53,7 +54,7 @@ print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBC print "----- Getting", num_urls, "URLs using", num_conn, "connections -----" -# Preallocate a list of curl objects +# Pre-allocate a list of curl objects m = pycurl.CurlMulti() m.handles = [] for i in range(num_conn): @@ -119,6 +120,3 @@ for c in m.handles: c.close() m.close() -# Delete objects (just for testing the refcounts) -del c, m, freelist, queue - diff --git a/examples/retriever.py b/examples/retriever.py index a53e559..2c91d07 100644 --- a/examples/retriever.py +++ b/examples/retriever.py @@ -3,6 +3,11 @@ # vi:ts=4:et # $Id$ +# +# Usage: python retriever.py <file with URLs to fetch> [<# of +# concurrent connections>] +# + import sys, threading, Queue import pycurl @@ -16,6 +21,39 @@ except ImportError: pass +# Get args +num_conn = 10 +try: + if sys.argv[1] == "-": + urls = sys.stdin.readlines() + else: + urls = open(sys.argv[1]).readlines() + if len(sys.argv) >= 3: + num_conn = int(sys.argv[2]) +except: + print "Usage: %s <file with URLs to fetch> [<# of concurrent connections>]" % sys.argv[0] + raise SystemExit + + +# Make a queue with (url, filename) tuples +queue = Queue.Queue() +for url in urls: + url = url.strip() + if not url or url[0] == "#": + continue + filename = "doc_%03d.dat" % (len(queue.queue) + 1) + queue.put((url, filename)) + + +# Check args +assert queue.queue, "no URLs given" +num_urls = len(queue.queue) +num_conn = min(num_conn, num_urls) +assert 1 <= num_conn <= 10000, "invalid number of concurrent connections" +print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM) +print "----- Getting", num_urls, "URLs using", num_conn, "connections -----" + + class WorkerThread(threading.Thread): def __init__(self, queue): threading.Thread.__init__(self) @@ -27,15 +65,15 @@ class WorkerThread(threading.Thread): url, filename = self.queue.get_nowait() except Queue.Empty: raise SystemExit - f = open(filename, "wb") + fp = open(filename, "wb") curl = pycurl.Curl() + curl.setopt(pycurl.URL, url) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) - curl.setopt(pycurl.URL, url) - curl.setopt(pycurl.WRITEDATA, f) - curl.setopt(pycurl.NOSIGNAL, 1) curl.setopt(pycurl.CONNECTTIMEOUT, 30) curl.setopt(pycurl.TIMEOUT, 300) + curl.setopt(pycurl.NOSIGNAL, 1) + curl.setopt(pycurl.WRITEDATA, fp) try: curl.perform() except: @@ -43,36 +81,19 @@ class WorkerThread(threading.Thread): traceback.print_exc(file=sys.stderr) sys.stderr.flush() curl.close() - f.close() + fp.close() sys.stdout.write(".") sys.stdout.flush() -# Read list of URLs from file specified on commandline -try: - urls = open(sys.argv[1]).readlines() - num_workers = int(sys.argv[2]) -except: - # File or number of workers was not specified, show usage string - print "Usage: %s <file with URLs to fetch> <number of worker threads>" % sys.argv[0] - raise SystemExit - -# Initialize thread array and the file number used to store documents -threads = [] -fileno = 0 -queue = Queue.Queue() - -# Fill the work input queue with URLs -for url in urls: - fileno = fileno + 1 - filename = "doc_%d" % (fileno,) - queue.put((url, filename)) # Start a bunch of threads -for num_threads in range(num_workers): +threads = [] +for dummy in range(num_conn): t = WorkerThread(queue) t.start() threads.append(t) + # Wait for all threads to finish for thread in threads: thread.join() |