summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMarkus F.X.J. Oberhumer <markus@oberhumer.com>2005-02-13 08:28:01 +0000
committerMarkus F.X.J. Oberhumer <markus@oberhumer.com>2005-02-13 08:28:01 +0000
commit8dba3b225c22379c82612a0bbf378eb123526482 (patch)
tree3e753920da8252477ecd39baf9dd1e3ed5901855 /examples
parent940ab47b86e96833f56e8bab5454bf10f4f4e8bf (diff)
downloadpycurl-8dba3b225c22379c82612a0bbf378eb123526482.tar.gz
Cleaned up examples.
Diffstat (limited to 'examples')
-rw-r--r--examples/file_upload.py29
-rw-r--r--examples/retriever-multi.py20
-rw-r--r--examples/retriever.py71
3 files changed, 66 insertions, 54 deletions
diff --git a/examples/file_upload.py b/examples/file_upload.py
index 1ea9c9f..286b47a 100644
--- a/examples/file_upload.py
+++ b/examples/file_upload.py
@@ -3,26 +3,22 @@
# vi:ts=4:et
# $Id$
+import os, sys
import pycurl
-import sys
-import os.path
# Class which holds a file reference and the read callback
-class filereader:
-
- def __init__(self, f):
- self.f = f
-
+class FileReader:
+ def __init__(self, fp):
+ self.fp = fp
def read_callback(self, size):
- return self.f.read(size)
+ return self.fp.read(size)
# Check commandline arguments
if len(sys.argv) < 3:
print "Usage: %s <url> <file to upload>" % sys.argv[0]
raise SystemExit
-else:
- url = sys.argv[1]
- filename = sys.argv[2]
+url = sys.argv[1]
+filename = sys.argv[2]
if not os.path.exists(filename):
print "Error: the file '%s' does not exist" % filename
@@ -36,17 +32,14 @@ c.setopt(pycurl.UPLOAD, 1)
# Two versions with the same semantics here, but the filereader version
# is useful when you have to process the data which is read before returning
if 1:
- c.setopt(pycurl.READFUNCTION, filereader(open(filename, 'rb')).read_callback)
+ c.setopt(pycurl.READFUNCTION, FileReader(open(filename, 'rb')).read_callback)
else:
c.setopt(pycurl.READFUNCTION, open(filename, 'rb').read)
-# Set size of file to be uploaded, use LARGE option if file size is
-# greater than 2GB
+# Set size of file to be uploaded, we use xxx_LARGE option in case that
+# file size is greater than 2GB
filesize = os.path.getsize(filename)
-if filesize > 2**31:
- c.setopt(pycurl.INFILESIZE_LARGE, filesize)
-else:
- c.setopt(pycurl.INFILESIZE, filesize)
+c.setopt(pycurl.INFILESIZE_LARGE, filesize)
# Start transfer
print 'Uploading file %s to url %s' % (filename, url)
diff --git a/examples/retriever-multi.py b/examples/retriever-multi.py
index a30c41c..2ec3f9a 100644
--- a/examples/retriever-multi.py
+++ b/examples/retriever-multi.py
@@ -8,8 +8,9 @@
# concurrent connections>]
#
-import string, sys
+import sys
import pycurl
+
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
@@ -23,7 +24,10 @@ except ImportError:
# Get args
num_conn = 10
try:
- urls = open(sys.argv[1]).readlines()
+ if sys.argv[1] == "-":
+ urls = sys.stdin.readlines()
+ else:
+ urls = open(sys.argv[1]).readlines()
if len(sys.argv) >= 3:
num_conn = int(sys.argv[2])
except:
@@ -33,15 +37,12 @@ except:
# Make a queue with (url, filename) tuples
queue = []
-fileno = 1
for url in urls:
- url = string.strip(url)
+ url = url.strip()
if not url or url[0] == "#":
continue
- filename = "doc_%d" % (fileno)
+ filename = "doc_%03d.dat" % (len(queue) + 1)
queue.append((url, filename))
- fileno = fileno + 1
-del fileno, url, urls
# Check args
@@ -53,7 +54,7 @@ print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBC
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
-# Preallocate a list of curl objects
+# Pre-allocate a list of curl objects
m = pycurl.CurlMulti()
m.handles = []
for i in range(num_conn):
@@ -119,6 +120,3 @@ for c in m.handles:
c.close()
m.close()
-# Delete objects (just for testing the refcounts)
-del c, m, freelist, queue
-
diff --git a/examples/retriever.py b/examples/retriever.py
index a53e559..2c91d07 100644
--- a/examples/retriever.py
+++ b/examples/retriever.py
@@ -3,6 +3,11 @@
# vi:ts=4:et
# $Id$
+#
+# Usage: python retriever.py <file with URLs to fetch> [<# of
+# concurrent connections>]
+#
+
import sys, threading, Queue
import pycurl
@@ -16,6 +21,39 @@ except ImportError:
pass
+# Get args
+num_conn = 10
+try:
+ if sys.argv[1] == "-":
+ urls = sys.stdin.readlines()
+ else:
+ urls = open(sys.argv[1]).readlines()
+ if len(sys.argv) >= 3:
+ num_conn = int(sys.argv[2])
+except:
+ print "Usage: %s <file with URLs to fetch> [<# of concurrent connections>]" % sys.argv[0]
+ raise SystemExit
+
+
+# Make a queue with (url, filename) tuples
+queue = Queue.Queue()
+for url in urls:
+ url = url.strip()
+ if not url or url[0] == "#":
+ continue
+ filename = "doc_%03d.dat" % (len(queue.queue) + 1)
+ queue.put((url, filename))
+
+
+# Check args
+assert queue.queue, "no URLs given"
+num_urls = len(queue.queue)
+num_conn = min(num_conn, num_urls)
+assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
+print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
+print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
+
+
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
@@ -27,15 +65,15 @@ class WorkerThread(threading.Thread):
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
- f = open(filename, "wb")
+ fp = open(filename, "wb")
curl = pycurl.Curl()
+ curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
- curl.setopt(pycurl.URL, url)
- curl.setopt(pycurl.WRITEDATA, f)
- curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 300)
+ curl.setopt(pycurl.NOSIGNAL, 1)
+ curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
@@ -43,36 +81,19 @@ class WorkerThread(threading.Thread):
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
- f.close()
+ fp.close()
sys.stdout.write(".")
sys.stdout.flush()
-# Read list of URLs from file specified on commandline
-try:
- urls = open(sys.argv[1]).readlines()
- num_workers = int(sys.argv[2])
-except:
- # File or number of workers was not specified, show usage string
- print "Usage: %s <file with URLs to fetch> <number of worker threads>" % sys.argv[0]
- raise SystemExit
-
-# Initialize thread array and the file number used to store documents
-threads = []
-fileno = 0
-queue = Queue.Queue()
-
-# Fill the work input queue with URLs
-for url in urls:
- fileno = fileno + 1
- filename = "doc_%d" % (fileno,)
- queue.put((url, filename))
# Start a bunch of threads
-for num_threads in range(num_workers):
+threads = []
+for dummy in range(num_conn):
t = WorkerThread(queue)
t.start()
threads.append(t)
+
# Wait for all threads to finish
for thread in threads:
thread.join()