diff options
-rw-r--r-- | paste/fileapp.py | 163 | ||||
-rw-r--r-- | paste/fixture.py | 20 | ||||
-rw-r--r-- | paste/httpexceptions.py | 2 | ||||
-rwxr-xr-x | paste/util/httpserver.py | 16 | ||||
-rw-r--r-- | tests/test_fileapp.py | 93 |
5 files changed, 233 insertions, 61 deletions
diff --git a/paste/fileapp.py b/paste/fileapp.py index 118ba50..4a4df60 100644 --- a/paste/fileapp.py +++ b/paste/fileapp.py @@ -7,17 +7,16 @@ files. At this time it has cache helpers and understands the if-modified-since request header. """ -#@@: this still needs Range support for large files - import os, time import mimetypes import httpexceptions -from response import has_header, replace_header +from response import has_header, replace_header, header_value from rfc822 import formatdate, parsedate_tz, mktime_tz from httpexceptions import HTTPBadRequest CACHE_SIZE = 4096 -BLOCK_SIZE = 4096 +BLOCK_SIZE = 4096 * 16 +U_MIMETYPE = 'application/octet-stream' __all__ = ['DataApp','FileApp'] @@ -43,7 +42,7 @@ class DataApp(object): ``content_encoding`` ``content_location`` - ``cache()`` + ``cache_control()`` This method provides validated construction of the ``Cache-Control`` header as well as providing for automated filling out of the @@ -60,20 +59,22 @@ class DataApp(object): assert isinstance(headers,(type(None),list)) self.expires = None self.content = None + self.content_length = None self.last_modified = 0 self.headers = headers or [] for (k,v) in kwargs.items(): hk = k.replace("_","-") if not headers or not has_header(self.headers,hk): self.headers.append((hk,v)) + replace_header(self.headers,'accept-ranges','bytes') if not has_header(self.headers,'content-type'): - self.headers.append(('content-type','application/octet-stream')) + self.headers.append(('content-type',U_MIMETYPE)) if content: self.set_content(content) - def cache(self, public=None, private=None, no_cache=None, - no_store=False, max_age=None, s_maxage=None, - no_transform=False, **extensions): + def cache_control(self, public=None, private=None, no_cache=None, + no_store=False, max_age=None, s_maxage=None, + no_transform=False, **extensions): """ Sets the ``Cache-Control`` according to the arguments provided. See RFC 2616 section 14.9 for more details. @@ -153,36 +154,91 @@ class DataApp(object): if s_maxage is not None: result.append('s-maxage=%d' % s_maxage) for (k,v) in extensions.items(): + assert 'age' not in k assert '"' not in v result.append('%s="%s"' % (k.replace("_","-"),v)) replace_header(self.headers,'cache-control',", ".join(result)) return self def set_content(self, content): + assert content is not None self.last_modified = time.time() - self.content = [content] - replace_header(self.headers,'content-length', str(len(content))) + self.content = content + self.content_length = len(content) replace_header(self.headers,'last-modified', formatdate(self.last_modified)) return self + def content_disposition(self, attachment=None, inline=None, + filename=None): + """ + Sets the ``Content-Disposition`` header according to RFC 1806, + as specified in 19.5.1 of RFC 2616. Note that this is not an + approved HTTP/1.1 header, but it is very common and useful. + + ``attachment`` if True, this specifies that the content + should not be shown in the browser and + should be handled externally, even if the + browser could render the content + + ``inline`` exclusive with attachment; indicates that the + content should be rendered in the browser if + possible, but otherwise it should be handled + externally + + Only one of the above 2 may be True. If both are None, then + the disposition is assumed to be an ``attachment``. These are + distinct fields since support for field enumeration may be + added in the future. + + ``filename`` the filename parameter, if any, to be reported; + if this is None, then the current object's + 'filename' attribute is used + + If filename is provided, and Content-Type is not set or is + 'application/octet-stream', then the mimetypes.guess is used + to upgrade the Content-Type setting. + """ + assert not (attachment and inline) + if filename is None: + filename = getattr(self,'filename',None) + else: + if header_value(self.headers,'content-type') == U_MIMETYPE: + content_type, _ = mimetypes.guess_type(filename) + replace_header(self.headers,'content-type',content_type) + result = [] + if inline is True: + assert not attachment + result.append('inline') + else: + assert not inline + result.append('attachment') + if filename: + assert '"' not in filename + filename = filename.split("/")[-1] + filename = filename.split("\\")[-1] + result.append('filename="%s"' % filename) + replace_header(self.headers,'content-disposition',"; ".join(result)) + return self + def __call__(self, environ, start_response): + headers = self.headers[:] if self.expires is not None: - replace_header(self.headers,'expires', + replace_header(headers,'expires', formatdate(time.time()+self.expires)) checkmod = environ.get('HTTP_IF_MODIFIED_SINCE') if checkmod: try: - client_clock = mktime_tz(parsedate_tz(checkmod)) + client_clock = mktime_tz(parsedate_tz(checkmod.strip())) except TypeError: - return HTTPBadRequest(detail=( + return HTTPBadRequest(( "Client program provided an ill-formed timestamp for\r\n" "its If-Modified-Since header:\r\n" " %s\r\n") % checkmod ).wsgi_application(environ, start_response) if client_clock > time.time(): - return HTTPBadRequest(detail=( + return HTTPBadRequest(( "Please check your system clock.\r\n" "According to this server, the time provided in the\r\n" "If-Modified-Since header is in the future:\r\n" @@ -190,24 +246,55 @@ class DataApp(object): ).wsgi_application(environ, start_response) elif client_clock <= self.last_modified: # the client has a recent copy - start_response('304 Not Modified',[]) + headers = [] + for head in ('etag','content-location','vary', + 'expires','cache-control'): + value = header_value(self.headers,head) + if value: + headers.apppend((head, value)) + start_response('304 Not Modified',headers) return [''] # empty body - start_response('200 OK',self.headers) - return self.content + (lower,upper) = (0, self.content_length - 1) + if 'HTTP_RANGE' in environ: + print environ['HTTP_RANGE'] + range = environ['HTTP_RANGE'].split(",")[0] + range = range.strip().lower().replace(" ","") + if not range.startswith("bytes=") or 1 != range.count("-"): + return HTTPBadRequest(( + "A malformed range request was given.\r\n" + " Range: %s\r\n") % range + ).wsgi_application(environ, start_response) + (lower,upper) = range[len("bytes="):].split("-") + upper = upper and int(upper) or (self.content_length - 1) + lower = lower and int(lower) or 0 + if upper >= self.content_length or lower >= self.content_length: + return HTTPBadRequest(( + "Range request was made beyond the end of the content,\r\n" + "which is %s long.\r\n Range: %s\r\n") % ( + self.content_length, range) + ).wsgi_application(environ, start_response) + + content_length = 1 + (upper - lower) + replace_header(headers,'content-length', str(content_length)) + replace_header(headers,'content-range', + "%d-%d/%d" % (lower, upper, self.content_length)) + + start_response('200 OK',headers) + if self.content is not None: + return [self.content[lower:upper+1]] + assert self.__class__ != DataApp, "DataApp must call set_content" + return (lower, content_length) class FileApp(DataApp): """ Returns an application that will send the file at the given filename. Adds a mime type based on ``mimetypes.guess_type()``. See DataApp for the arguments beyond ``filename``. - - """ def __init__(self, filename, headers=None, **kwargs): self.filename = filename - self.last_size = None content_type, content_encoding = mimetypes.guess_type(self.filename) if content_type and 'content_type' not in kwargs: kwargs['content_type'] = content_type @@ -215,24 +302,24 @@ class FileApp(DataApp): kwargs['content_encoding'] = content_encoding DataApp.__init__(self, None, headers, **kwargs) - def update(self): + def update(self, force=False): stat = os.stat(self.filename) - if (stat.st_mtime == self.last_modified and - stat.st_size == self.last_size): + if not force and stat.st_mtime == self.last_modified: return - self.last_size = stat.st_size if stat.st_size < CACHE_SIZE: fh = open(self.filename,"rb") self.set_content(fh.read()) fh.close() else: self.content = None - replace_header(self.headers, 'content-length', - str(stat.st_size)) + self.content_length = stat.st_size self.last_modified = stat.st_mtime def __call__(self, environ, start_response): - self.update() + if 'max-age=0' in environ.get("HTTP_CACHE_CONTROL",''): + self.update(force=True) # RFC 2616 13.2.6 + else: + self.update() if not self.content: try: file = open(self.filename, 'rb') @@ -242,22 +329,30 @@ class FileApp(DataApp): return exc.wsgi_application( environ, start_response) retval = DataApp.__call__(self, environ, start_response) - if retval is not None: + if isinstance(retval,list): # cached content, exception, or not-modified return retval - return _FileIter(file) + (lower, content_length) = retval + file.seek(lower) + return _FileIter(file, size=content_length) class _FileIter: - def __init__(self, fp, blocksize=BLOCK_SIZE): - self.file = fp - self.blocksize = blocksize + def __init__(self, file, block_size=None, size=None): + self.file = file + self.size = size + self.block_size = block_size or BLOCK_SIZE def __iter__(self): return self def next(self): - data = self.file.read(self.blocksize) + chunk_size = self.block_size + if self.size is not None: + if chunk_size > self.size: + chunk_size = self.size + self.size -= chunk_size + data = self.file.read(chunk_size) if not data: raise StopIteration return data diff --git a/paste/fixture.py b/paste/fixture.py index 95b4710..949503d 100644 --- a/paste/fixture.py +++ b/paste/fixture.py @@ -54,7 +54,7 @@ class DummyMethod(object): def __call__(self, *args, **kw): return self.return_value - + def capture_stdout(func, *args, **kw): newstdout = StringIO() oldstdout = sys.stdout @@ -127,7 +127,7 @@ class Dummy_smtplib(object): assert not self.open, ( "SMTP connection not quit") self.__class__.existing = None - + class FakeFilesystem(object): def __init__(self): @@ -187,9 +187,7 @@ class WriterFile(object): assert self.open, ( "Closing an open file") self.open = False - - - + class AppError(Exception): pass @@ -261,7 +259,7 @@ class TestApp(object): environ.update(extra_environ) req = TestRequest(url, environ, expect_errors) return self.do_request(req, status=status) - + def encode_multipart(self, params, files): """ Encodes a set of parameters (typically a name/value list) and @@ -368,7 +366,7 @@ class TestApp(object): if res.errors: raise AppError( "Application had errors logged:\n%s" % res.errors) - + def make_response(self, (status, headers, body, errors), total_time): return TestResponse(self, status, headers, body, errors, total_time) @@ -433,7 +431,7 @@ class TestResponse(object): forms[i] = form if form.id: forms[form.id] = form - + def header(self, name, default=NoDefault): """ Returns the named header; an error if there is not exactly one @@ -707,7 +705,7 @@ class TestResponse(object): f.close() url = 'file:' + fn.replace(os.sep, '/') webbrowser.open_new(url) - + class TestRequest(object): # for py.test @@ -941,7 +939,7 @@ class Form(object): continue submit.append((name, value)) return submit - + _attr_re = re.compile(r'([^= \n\r\t]*)[ \n\r\t]*=[ \n\r\t]*(?:"([^"]*)"|([^"][^ \n\r\t>]*))', re.S) @@ -1270,7 +1268,7 @@ class ProcResult(object): ``stdout``, ``stderr``: What is produced - + ``files_created``, ``files_deleted``, ``files_updated``: Dictionaries mapping filenames (relative to the ``base_dir``) to ``FoundFile`` or ``FoundDir`` objects. diff --git a/paste/httpexceptions.py b/paste/httpexceptions.py index 8e46224..0ccd83d 100644 --- a/paste/httpexceptions.py +++ b/paste/httpexceptions.py @@ -379,7 +379,7 @@ class HTTPNotFound(HTTPClientError): explanation = ('The resource could not be found.') class HTTPMethodNotAllowed(HTTPClientError): - required_headers = ('allowed',) + required_headers = ('allow',) code = 405 title = 'Method Not Allowed' # override template since we need an environment variable diff --git a/paste/util/httpserver.py b/paste/util/httpserver.py index c054ce0..05c29f6 100755 --- a/paste/util/httpserver.py +++ b/paste/util/httpserver.py @@ -189,6 +189,13 @@ class WSGIHandlerMixin: self.wsgi_curr_headers = None self.wsgi_headers_sent = False + def wsgi_connection_drop(self, environ, exce): + """ + Override this if you're interested in socket exceptions, such + as when the user clicks 'Cancel' during a file download. + """ + pass + def wsgi_execute(self, environ=None): """ Invoke the server's ``wsgi_application``. @@ -205,12 +212,9 @@ class WSGIHandlerMixin: finally: if hasattr(result,'close'): result.close() - except socket.error: - # @@: what do we do with this exception? Sending a 500 - # isn't smart in this case, which is why it is being singled - # out here. Yet, SocketServer@218 just ignores this sort of - # error... is this acceptable? Let's just punt. - raise + except socket.error, exce: + self.wsgi_connection_drop(environ, exce) + return except: if not self.wsgi_headers_sent: self.wsgi_curr_headers = ('500 Internal Server Error', diff --git a/tests/test_fileapp.py b/tests/test_fileapp.py index 8d8c1b3..90b0e5c 100644 --- a/tests/test_fileapp.py +++ b/tests/test_fileapp.py @@ -4,7 +4,7 @@ from paste.fileapp import * from paste.fixture import * from rfc822 import parsedate_tz, mktime_tz -import time +import time, string def test_data(): harness = TestApp(DataApp('mycontent')) @@ -18,7 +18,7 @@ def test_data(): def test_cache(): def build(*args,**kwargs): app = DataApp("SomeContent") - app.cache(*args,**kwargs) + app.cache_control(*args,**kwargs) return TestApp(app).get("/") res = build() assert 'public' == res.header('cache-control') @@ -34,11 +34,37 @@ def test_cache(): expires = mktime_tz(parsedate_tz(res.header('expires'))) assert expires > time.time()+58 and expires < time.time()+61 res = build(private=True, max_age=60, no_transform=True, no_store=True) - reshead = res.header('cache-control') - assert 'private, no-store, no-transform, max-age=60' == reshead + assert 'private, no-store, no-transform, max-age=60' == \ + res.header('cache-control') expires = mktime_tz(parsedate_tz(res.header('expires'))) assert mktime_tz(parsedate_tz(res.header('expires'))) < time.time() +def test_disposition(): + def build(*args,**kwargs): + app = DataApp("SomeContent") + app.content_disposition(*args,**kwargs) + return TestApp(app).get("/") + res = build() + assert 'attachment' == res.header('content-disposition') + assert 'application/octet-stream' == res.header('content-type') + res = build(filename="bing.txt") + assert 'attachment; filename="bing.txt"' == \ + res.header('content-disposition') + assert 'text/plain' == res.header('content-type') + res = build(inline=True) + assert 'inline' == res.header('content-disposition') + assert 'application/octet-stream' == res.header('content-type') + res = build(inline=True, filename="/some/path/bing.txt") + assert 'inline; filename="bing.txt"' == \ + res.header('content-disposition') + assert 'text/plain' == res.header('content-type') + try: + res = build(inline=True,attachment=True) + except AssertionError: + pass + else: + assert False, "should be an exception" + def test_modified(): harness = TestApp(DataApp('mycontent')) res = harness.get("/") @@ -68,27 +94,76 @@ def test_file(): assert len(content) == int(res.header('content-length')) assert 'text/plain' == res.header('content-type') assert content == res.body - assert [content] == app.content # this is cashed + assert content == app.content # this is cashed lastmod = res.header('last-modified') print "updating", tempfile file = open(tempfile,"a+") file.write("0123456789") file.close() - res = TestApp(app).get("/") + res = TestApp(app).get("/",headers={'Cache-Control': 'max-age=0'}) assert len(content)+10 == int(res.header('content-length')) assert 'text/plain' == res.header('content-type') assert content + "0123456789" == res.body assert app.content # we are still cached file = open(tempfile,"a+") file.write("X" * fileapp.CACHE_SIZE) # exceed the cashe size + file.write("YZ") file.close() - res = TestApp(app).get("/") - newsize = fileapp.CACHE_SIZE + len(content)+10 + res = TestApp(app).get("/",headers={'Cache-Control': 'max-age=0'}) + newsize = fileapp.CACHE_SIZE + len(content)+12 assert newsize == int(res.header('content-length')) assert newsize == len(res.body) - assert res.body.startswith(content) and res.body.endswith('X') + assert res.body.startswith(content) and res.body.endswith('XYZ') assert not app.content # we are no longer cached finally: import os os.unlink(tempfile) +def _excercize_range(build,content): + res = build("bytes=0-%d" % (len(content)-1)) + assert res.header('accept-ranges') == 'bytes' + assert res.body == content + assert res.header('content-length') == str(len(content)) + res = build("bytes=-%d" % (len(content)-1)) + assert res.body == content + assert res.header('content-length') == str(len(content)) + res = build("bytes=0-") + assert res.body == content + assert res.header('content-length') == str(len(content)) + res = build("bytes=0-9") + assert res.body == content[:10] + assert res.header('content-length') == '10' + res = build("bytes=%d-" % (len(content)-1)) + assert res.body == 'Z' + assert res.header('content-length') == '1' + res = build("bytes=%d-%d" % (3,17)) + assert res.body == content[3:18] + assert res.header('content-length') == '15' + +def test_range(): + content = string.letters * 5 + def build(range): + app = DataApp(content) + return TestApp(app).get("/",headers={'Range': range}) + _excercize_range(build,content) + +def test_file_range(): + from paste import fileapp + import random, string, os + tempfile = "test_fileapp.%s.txt" % (random.random()) + content = string.letters * (1+(fileapp.CACHE_SIZE / len(string.letters))) + assert len(content) > fileapp.CACHE_SIZE + file = open(tempfile,"w") + file.write(content) + file.close() + try: + def build(range): + app = fileapp.FileApp(tempfile) + return TestApp(app).get("/",headers={'Range': range}) + _excercize_range(build,content) + for size in (13,len(string.letters),len(string.letters)-1): + fileapp.BLOCK_SIZE = size + _excercize_range(build,content) + finally: + import os + os.unlink(tempfile) |