summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrey Petrov <andrey.petrov@shazow.net>2013-11-29 17:11:45 -0500
committerAndrey Petrov <andrey.petrov@shazow.net>2013-11-29 17:11:45 -0500
commiteefde5ca3c2d07764b02475d84a0b8b41dc15b31 (patch)
treeceab1b3cabbdaaac989981b4c9dfa03a121869ec
parent9796a765ceab81aa15b4ad3c61e70e4f71748cf9 (diff)
downloadurllib3-filepost-stream.tar.gz
Importing old branch for posterity.filepost-stream
Original message: My very ambitious attempt at allowing streamable iterator files for POST uploading. Not too happy with it yet.
-rw-r--r--test/dummy_server.py3
-rw-r--r--test/test_filepost.py31
-rw-r--r--test/test_withdummy.py20
-rw-r--r--urllib3/connectionpool.py30
-rw-r--r--urllib3/filepost.py181
5 files changed, 232 insertions, 33 deletions
diff --git a/test/dummy_server.py b/test/dummy_server.py
index 676ba5f8..8e79b0bd 100644
--- a/test/dummy_server.py
+++ b/test/dummy_server.py
@@ -39,6 +39,9 @@ class TestingApp(object):
size = int(request.params.get('upload_size', '0'))
file = request.params.get(param)
+ if file == None:
+ return Response("Missing parameter: %s" % param, status='400')
+
if not isinstance(file, FieldStorage):
return Response("Not a file: %s" % param, status='400')
diff --git a/test/test_filepost.py b/test/test_filepost.py
new file mode 100644
index 00000000..599cd037
--- /dev/null
+++ b/test/test_filepost.py
@@ -0,0 +1,31 @@
+import unittest
+
+import sys
+sys.path.append('../')
+
+from urllib3 import filepost
+from StringIO import StringIO
+
+class TestFilePost(unittest.TestCase):
+ def test_generator(self):
+ fields = {
+ 'foo': 'bar',
+ 'somefile': ('name.txt', StringIO('trolololol')),
+ }
+
+ stream = filepost.MultipartEncoderGenerator(fields, boundary="boundary")
+ body = ''.join(chunk for chunk in filepost.IterStreamer(stream))
+ self.assertEqual(body, u'--boundary\r\nContent-Disposition: form-data; name="somefile"; filename="name.txt"\r\nContent-Type: text/plain\r\n\r\ntrolololol\r\n--boundary\r\nContent-Disposition: form-data; name="foo"\r\nContent-Type: text/plain\r\n\r\nbar\r\n--boundary--\r\n')
+
+ def test_len(self):
+ fields = {
+ 'foo': 'bar',
+ 'somefile': ('name.txt', StringIO('trolololol')),
+ }
+
+ iterdata = filepost.MultipartEncoderGenerator(fields, boundary="boundary")
+ predicted_size = len(iterdata)
+
+ body = ''.join(chunk for chunk in filepost.IterStreamer(iterdata))
+
+ self.assertEqual(len(body), predicted_size)
diff --git a/test/test_withdummy.py b/test/test_withdummy.py
index a5f79f33..69e4efb7 100644
--- a/test/test_withdummy.py
+++ b/test/test_withdummy.py
@@ -21,7 +21,7 @@ class TestConnectionPool(unittest.TestCase):
r = self.http_pool.get_url('/', retries=1)
if r.data != "Dummy server!":
raise Exception("Got unexpected response: %s" % r.data)
- except Exception, e:
+ except MaxRetryError, e:
raise Exception("Dummy server not running, make sure HOST and PORT correspond to the dummy server: %s" % e.message)
return super(TestConnectionPool, self).__init__(*args, **kw)
@@ -57,11 +57,23 @@ class TestConnectionPool(unittest.TestCase):
r = self.http_pool.post_url('/upload', fields=fields)
self.assertEquals(r.status, 200, r.data)
- def test_unicode_upload(self):
- fields = {
- u'\xe2\x99\xa5': (u'\xe2\x99\xa5.txt', u'\xe2\x99\xa5'),
+ def _make_fields(self, s):
+ return {
+ u'upload_param': s,
+ u'upload_filename': '%s.txt' % s,
+ u'upload_size': u'3',
+ s: ('%s.txt' % s, s),
}
+ def test_unicode_decoded(self):
+ fields = self._make_fields('\xe2\x99\xa5')
+
+ r = self.http_pool.post_url('/upload', fields=fields)
+ self.assertEquals(r.status, 200, r.data)
+
+ def test_unicode_encoded(self):
+ fields = self._make_fields(u'\u2665')
+
r = self.http_pool.post_url('/upload', fields=fields)
self.assertEquals(r.status, 200, r.data)
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 3fc073ae..35cc6c4d 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -10,7 +10,8 @@ except ImportError, e:
try:
import ssl
except ImportError, e:
- ssl = None
+ class ssl(object):
+ SSLError = None
from urllib import urlencode
from httplib import HTTPConnection, HTTPSConnection, HTTPException
@@ -106,7 +107,28 @@ class HTTPResponse(object):
## Connection objects
-class VerifiedHTTPSConnection(HTTPSConnection):
+class StreamableMixin(object):
+ def send(self, data):
+ if isinstance(data, str):
+ HTTPConnection.send(self, data)
+ elif hasattr(data, '__iter__'):
+ for chunk in data:
+ HTTPConnection.send(self, chunk)
+ else:
+ raise TypeError("data object is not an iterable", data)
+
+class StreamableHTTPConnection(HTTPConnection, StreamableMixin):
+ # FIXME: Hack for old-style Python classes with broken inheritance
+ def send(self, data):
+ StreamableMixin.send(self, data)
+
+class StreamableHTTPSConnection(HTTPSConnection, StreamableMixin):
+ # FIXME: Hack for old-style Python classes with broken inheritance
+ def send(self, data):
+ StreamableMixin.send(self, data)
+
+
+class VerifiedHTTPSConnection(StreamableHTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with SSL certification.
"""
@@ -189,7 +211,7 @@ class HTTPConnectionPool(object):
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host))
- return HTTPConnection(host=self.host, port=self.port)
+ return StreamableHTTPConnection(host=self.host, port=self.port)
def _get_conn(self, timeout=None):
"""
@@ -369,7 +391,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host))
if not ssl:
- return HTTPSConnection(host=self.host, port=self.port)
+ return StreamableHTTPSConnection(host=self.host, port=self.port)
connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
diff --git a/urllib3/filepost.py b/urllib3/filepost.py
index a4f54645..ebc8214a 100644
--- a/urllib3/filepost.py
+++ b/urllib3/filepost.py
@@ -5,40 +5,171 @@ try:
except:
from StringIO import StringIO
-import codecs
-writer = codecs.lookup('utf-8')[3]
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
-def encode_multipart_formdata(fields):
- body = StringIO()
- BOUNDARY = mimetools.choose_boundary()
- for fieldname, value in fields.iteritems():
- body.write("--%s\r\n" % (BOUNDARY))
+def encode(str):
+ return str.encode('utf-8')
- if isinstance(value, tuple):
- filename, data = value
- body.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (fieldname, filename))
- body.write('Content-Type: %s\r\n\r\n' % (get_content_type(filename)))
- else:
- data = value
- body.write('Content-Disposition: form-data; name="%s"\r\n' % (fieldname))
- body.write('Content-Type: text/plain\r\n\r\n')
+def file_size(fp):
+ pos = fp.tell()
+ fp.seek(0, 2)
+ size = fp.tell()
+ fp.seek(pos)
+ return size-pos
- if isinstance(data, int):
- data = str(data) # Backwards compatibility
+class IterStreamer(object):
+ """
+ File-like streaming iterator.
+ """
+ def __init__(self, generator):
+ self.generator = generator
+ self.iterator = iter(generator)
+ self.leftover = ''
- if isinstance(data, unicode):
- writer(body).write(data)
- else:
- body.write(data)
+ def __len__(self):
+ return self.generator.__len__()
- body.write('\r\n')
+ def __iter__(self):
+ return self.iterator
- body.write('--%s--\r\n' % (BOUNDARY))
+ def next(self):
+ return self.iterator.next()
- content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+ def read(self, size):
+ data = self.leftover
+ count = len(self.leftover)
+ try:
+ while count < size:
+ chunk = self.next()
+ data += chunk
+ count += len(chunk)
+ except StopIteration, e:
+ pass
- return body.getvalue(), content_type
+ if count > size:
+ self.leftover = data[size:]
+
+ return data[:size]
+
+
+class MultipartEncoderGenerator(object):
+ """
+ Generator yielding chunk-by-chunk streaming data from fields, with proper
+ headers and boundary separators along the way. This is useful for streaming
+ large files as iterators without loading the entire data body into memory.
+
+ ``fields`` is a dictionary where the parameter name is the key and the value
+ is either a (filename, data) tuple or just data.
+
+ The data can be a unicode string, an iterator producing strings, or a file-like
+ object. File-like objects are read ``chunk_size`` bytes at a time.
+
+ If no ``boundary`` is specified then a random one is used.
+ """
+ def __init__(self, fields, boundary=None, chunk_size=8192):
+ self.fields = fields
+ self.chunk_size = chunk_size
+ self.boundary = boundary or mimetools.choose_boundary()
+
+ def get_content_type(self):
+ return 'multipart/form-data; boundary=%s' % self.boundary
+
+ def __len__(self):
+ """
+ Figure out the expected body size by iterating over the fields as if they
+ contained empty files, while accumulating the value file sizes as
+ efficiently as we can.
+ """
+ empty_fields = {}
+ size = 0
+ for fieldname, value in self.fields.iteritems():
+ if isinstance(value, tuple):
+ filename, data = value
+ empty_fields[fieldname] = (filename, '')
+ else:
+ data = value
+ empty_fields[fieldname] = ''
+
+ if hasattr(data, '__len__'):
+ size += len(data)
+ elif isinstance(data, int):
+ size += len(str(data))
+ elif hasattr(data, 'seek'):
+ size += file_size(data)
+ elif hasattr(data, 'read'):
+ size += len(data.read()) # This is undesired
+ elif hasattr(data, '__iter__'):
+ size += sum(len(chunk) for chunk in data) # This is also undesired
+ else:
+ size += len(unicode(data)) # Hope for the best
+
+ return size + sum(len(chunk) for chunk in iter(MultipartEncoderGenerator(empty_fields, boundary=self.boundary)))
+
+ def __iter__(self):
+
+ for fieldname, value in self.fields.iteritems():
+ yield encode(u'--%s\r\n' % (self.boundary))
+
+ if isinstance(value, tuple):
+ filename, data = value
+ yield encode(u'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (fieldname, filename))
+ yield encode(u'Content-Type: %s\r\n\r\n' % (get_content_type(filename)))
+ else:
+ data = value
+ yield encode(u'Content-Disposition: form-data; name="%s"\r\n' % fieldname)
+ yield encode(u'Content-Type: text/plain\r\n\r\n')
+
+ if isinstance(data, unicode):
+ yield encode(data)
+
+ elif isinstance(data, int):
+ # Handle integers for backwards compatibility
+ yield str(data)
+
+ elif hasattr(data, 'read'):
+ # Stream from a file-like object
+ while True:
+ chunk = data.read(self.chunk_size)
+ if not chunk:
+ break
+ yield encode(chunk)
+
+ elif hasattr(data, '__iter__'):
+ # Stream from an iterator
+ for chunk in data:
+ yield encode(chunk)
+
+ else:
+ # Hope for the best
+ yield unicode(data)
+
+ yield encode(u'\r\n')
+
+ yield encode(u'--%s--\r\n' % (self.boundary))
+
+
+def encode_multipart_formdata(fields, boundary=None, chunk_size=8192):
+ """
+ ``fields`` is a dictionary where the parameter name is the key and the value
+ is either a (filename, data) tuple or just data. Data can be a string, file-like
+ object, or iterator.
+
+ Example:
+ fields = {
+ 'foo': 'bar',
+ 'upload_file': ('file.txt', 'data'),
+ 'huge_huge': ('video.mpg', fp),
+ 'hihihi_42_times': ('hi' for i in xrange(42)),
+ }
+
+ File-like objects are read ``chunk_size`` bytes at a time.
+
+ If no ``boundary`` is given, a random one is chosen.
+
+ See MultipartEncoderGenerator for more details.
+ """
+ stream = MultipartEncoderGenerator(fields, boundary=boundary, chunk_size=chunk_size)
+ return IterStreamer(stream), stream.get_content_type()