diff options
author | cce <devnull@localhost> | 2005-12-27 20:25:07 +0000 |
---|---|---|
committer | cce <devnull@localhost> | 2005-12-27 20:25:07 +0000 |
commit | c8be1aa79910ff3d8a45e0744eb1f42371b5eb82 (patch) | |
tree | 660b211fed154ae79943bb81cb96a8025b10dd03 /paste/httpheaders.py | |
parent | c03aa906ac6bf43cdb818cb60497eed8ac207fb0 (diff) | |
download | paste-c8be1aa79910ff3d8a45e0744eb1f42371b5eb82.tar.gz |
- overhauled description of httpheaders module
- added delete and update methods
- added regression tests
Diffstat (limited to 'paste/httpheaders.py')
-rw-r--r-- | paste/httpheaders.py | 337 |
1 files changed, 232 insertions, 105 deletions
diff --git a/paste/httpheaders.py b/paste/httpheaders.py index 7c23ec1..aa5e432 100644 --- a/paste/httpheaders.py +++ b/paste/httpheaders.py @@ -5,91 +5,129 @@ """ HTTP Message Headers -This contains general support for message headers as defined by HTTP/1.1 -specification, RFC 2616 (in particular section 4.2). This module -defines the ``HTTPHeader`` class, and corresponding instances for common -headers. Here are some snippets of how you'd use it: - - environ.get('HTTP_ACCEPT_LANGUAGE') - -> AcceptLanguage(environ) - - In this usage, the header is passed the ``environ``, and extracts - the appropriate field-value. The primary advantage is that a typo - in the header is a NameError; environ.get('HTTP_ACCEPT_LANGUAGES'), - by contrast, might be a rather hard bug to track down. - - header_value(response_headers, 'content-type') # from paste.response - -> ContentType(response_headers) - - This usage is similar in that typos are easily noticed; but also - the syntax is the same -- the HTTPHeader can hide the technical - difference between ``environ`` and ``response_headers`` so that - your code remains focused on the task. - - response_headers.append(('content-type','text/html')) - -> ContentType.append(response_headers, 'text/html') - - Although in most cases these two forms have similar result, - there are a few differences: - - - Since the ContentType header knows that it is a singleton, it - will raise an exception if already exists in the response_headers - - - The ContentType version uses the recommended RFC capitalization, - 'Content-Type'; while this is easy in this case, it is not easy - to remember in every case, such as 'ETag' or 'WWW-Authenticate'. - - - The ContentType version can validate the content; while this case - is easy to inspect that the former is correct -- this isn't - always true, for example in ContentDisposition or more - complicated headers. - - remove_header(response_headers, 'content-type') # from paste.response - -> ContentType.remove(response_headers) - - No pratical difference other than consistency with the rest - of the module; same as the ``replace`` method. - - "public, no-store, max-age=%d" % 7*24*60*60 - -> CacheControl(public=True, no_store=True, - max_age= CacheControl.ONE_WEEK) - - While the former is the actual header that should be sent, it is - quite easy to make mistakes in header construction; or specify - invalid values that look correct but violate the specification. +This contains general support for HTTP/1.1 message headers [1] in a +manner that supports WSGI ``environ`` [2] and ``response_headers``[3]. +Specifically, this module defines a ``HTTPHeader`` class whose instances +correspond to field-name items. The actual field-content for the +message-header is stored in the appropriate WSGI collection (either +the ``environ`` for requests, or ``response_headers`` for responses). + +Each ``HTTPHeader`` instance is a callable (defining ``__call__(``) +that takes one of the following: + + - an ``environ`` dictionary, returning the corresponding header + value by according to the WSGI's HTTP_ prefix mechanism, e.g., + ``UserAgent(environ)`` returns ``environ.get('HTTP_USER_AGENT')`` + + - a ``response_headers`` list, giving a comma-delimited string for + each corresponding ``header_value`` tuple entries (see below). + + - a sequence of string ``*args`` that are comma-delimited into a + single string value, e.g. ``ContentType("text/html","text/plain")`` + returns ``"text/html, text/plain"`` + + - a set of ``*kwargs`` keyword arguments that are used to create + a header value, in a manner dependent upon the particular header in + question (to make value construction easier and error-free): + ``ContentDisposition(max_age=ContentDisposition.ONEWEEK)`` returns + ``"public, max-age=60480"`` + +Each ``HTTPHeader`` instance also provides several methods wich act on +a WSGI collection, for setting or getting header values. The first +argument of these methods is the collection, and all remaining arguments +are equivalent to invoking ``__call__(*args, **kwargs)``. + + ``delete(collection)`` + + This method removes all entries of the corresponding header from + the given collection (``environ`` or ``response_headers``), e.g., + ``UserAgent.remove(environ)`` deletes the 'HTTP_USER_AGENT' entry + from the ``environ``. + + ``update(collection, *args, **kwargs)`` + + This method does an in-place replacement of the given header entry, + for example: ``ContentLength(response_headers,len(body))`` + +This particular approach to managing headers within a WSGI collection +has several advantages: + + 1. Typos in the header name are easily detected since they become a + ``NameError`` when executed. The approach of using header strings + directly can be problematic; for example, the following should + allways return ``None``: ``environ.get("HTTP_ACCEPT_LANGUAGES")`` + + 2. For specific headers with validation, using ``__call__`` will + result in an automatic header value check. For example, the + ContentDisposition header will reject a value having ``maxage`` + or ``max_age`` (the appropriate parameter is ``max_age``). + + 3. When appending/replacing headers, the field-name has the suggested + RFC capitalization (e.g. ``Content-Type`` or ``ETag``) for + user-agents that incorrectly use case-sensitive matches. + + 4. Some headers (such as ``Content-Type``) are singeltons, that is, + only one entry of this type may occur in a given set of + ``response_headers``. This module knows about those cases and + enforces this cardnality constraint. + + 5. The exact details of WSGI header management are abstracted so + the programmer need not worry about operational differences + between ``environ`` dictionary or ``response_headers`` list. + + 6. Sorting of ``HTTPHeaders`` is done following the RFC suggestion + that general-headers come first, followed by request and response + headers, and finishing with entity-headers. + + 7. Special care is given to exceptional cases such as ``Set-Cookie`` + which violates the RFC's recommendation about combining header + content into a single entry using comma separation [1] + +A particular difficulty with HTTPHeaders is a categorization of +sorts as described in section 4.2: + + Multiple message-header fields with the same field-name MAY be + present in a message if and only if the entire field-value for that + header field is defined as a comma-separated list [i.e., #(values)]. + It MUST be possible to combine the multiple header fields into one + "field-name: field-value" pair, without changing the semantics of + the message, by appending each subsequent field-value to the first, + each separated by a comma. + +This creates three fundamentally different kinds of headers: + + - Those that do not have a #(values) production, and hence are + singular and may only occur once in a set of response fields; + this case is handled by the ``SingleValueHeader`` subclass. + + - Those which have the #(values) production and follow the + combining rule outlined above; our ``MultiValueHeader`` case. + + - Those which are multi-valued, but cannot be combined (such as the + ``Set-Cookie`` header due to its ``Expires`` parameter); or where + combining them into a single header entry would cause common + user-agents to fail (``WWW-Authenticate``, ``Warning``) since + they fail to handle dates even when properly quoted. This case + is handled by ``MultiEntryHeader``. + +Since this project does not have time to provide rigorous support +and validation for all headers, it does a basic construction of +headers listed in RFC 2616 (plus a few others) so that they can +be obtained by simply doing ``from paste.httpheaders import *``; +the name of the header instance is the "common name" less any +dashes to give CamelCase style names. + +[1] http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 +[2] http://www.python.org/peps/pep-0333.html#environ-variables +[3] http://www.python.org/peps/pep-0333.html#the-start-response-callable """ -__all__ = ['get_header', 'HTTPHeader', 'normalize_headers' ] +__all__ = ['get_header', 'HTTPHeader', 'normalize_headers' + # additionally, all headers are exported +] _headers = {} -def get_header(name, raiseError=True): - """ - This function finds the corresponding ``HTTPHeader`` for the - ``name`` provided. So that python-style names can be used, - underscores are converted to dashes before the lookup. - """ - if isinstance(name,HTTPHeader): - return name - retval = _headers.get(name.strip().lower().replace("_","-")) - if not retval and raiseError: - raise AssertionError("'%s' is an unknown header" % name) - return retval - -def list_headers(general=True, request=True, response=True, entity=True): - " list all headers for a given category " - search = [] - for (bool,strval) in ((general,'general'), (request,'request'), - (response,'response'), (entity,'entity')): - if bool: - search.append(strval) - search = tuple(search) - for head in _headers.values(): - if head.category in search: - retval.append(head) - retval.sort() - return retval - class HTTPHeader(object): """ HTTPHeader instances represent a particular ``field-name`` of an @@ -132,17 +170,19 @@ class HTTPHeader(object): Collection Methods: - ``append()`` appends the given field-value onto a WSGI - ``response_headers`` list object - - ``remove()`` removes all field-value occurances of this + ``delete()`` remove the all occurances (if any) of the given header in the collection provided - ``replace()`` replaces (if they exist) all field-value items + ``update()`` replaces (if they exist) all field-value items in the given collection with the value provided + ``tuples()`` returns a set of (field-name, field-value) tuples + sutable for extending ``response_headers`` + The collected versions of initialized header instances are immediately - registered and accessable through the ``get_header`` function. + registered and accessable through the ``get_header`` function. Do not + inherit from this directly, use one of ``SingleValueHeader``, + ``MultiValueHeader``, or ``MultiEntryHeader`` as appropriate. """ #@@: add field-name validation def __new__(cls, name, category): @@ -170,6 +210,7 @@ class HTTPHeader(object): 'response': 3, 'entity': 4 }[category] _headers[name.lower()] = self self._environ_name = 'HTTP_'+ self.name.upper().replace("-","_") + self._headers_name = self.name.lower() assert self.version in ('1.1','1.0','0.9') assert isinstance(self,(SingleValueHeader,MultiValueHeader, MultiEntryHeader)) @@ -192,7 +233,7 @@ class HTTPHeader(object): def __repr__(self): return '<HTTPHeader %s>' % self.name - def construct(**kwargs): + def construct(self, **kwargs): """ construct field-value(s) via keyword arguments @@ -202,7 +243,11 @@ class HTTPHeader(object): be specialized for specific headers. """ result = [] - for (k,v) in kwargs.items(): + keys = kwargs.keys() + keys.sort() + for k in keys: + v = kwargs[k] + k = k.lower().replace("_","-") if v in (None,True): result.append(str(k)) else: @@ -266,6 +311,57 @@ class HTTPHeader(object): assert type(item) == str return self.format(*args) + def delete(self, collection): + """ + This method removes all occurances of the header in the + given collection. It does not return the value removed. + """ + if type(collection) == dict: + if self._environ_name in collection: + del collection[self._environ_name] + return self + assert list == type(collection) + i = 0 + while i < len(collection): + if collection[i][0].lower() == self._headers_name: + del collection[i] + continue + i += 1 + return self + + def update(self, collection, *args, **kwargs): + """ + This method replaces (in-place when possible) all occurances of + the given header with the provided value. If no value is + provided, this is the same as ``remove``. + """ + value = self.__call__(*args, **kwargs) + if value is None: + return self.remove(connection) + if type(collection) == dict: + collection[self._environ_name] = value + return self + assert list == type(collection) + i = 0 + found = False + while i < len(collection): + if collection[i][0].lower() == self._headers_name: + if found: + del collection[i] + continue + collection[i] = (self.name, value) + found = True + i += 1 + if not found: + collection.append((self.name, value)) + return self + + def tuples(self, *args, **kwargs): + value = self.__call__(*args, **kwargs) + if not value: + return () + return ((self.name, value),) + class SingleValueHeader(HTTPHeader): """ The field-value is a single value and therefore all results @@ -281,22 +377,10 @@ class SingleValueHeader(HTTPHeader): class MultiValueHeader(HTTPHeader): """ - This header is multi-valued, however, results can be combined by - concatinating with a comma, as described by section 4.2 of RFC 2616: - - Multiple message-header fields with the same field-name MAY - be present in a message if and only if the entire - field-value for that header field is defined as a - comma-separated list [i.e., #(values)]. It MUST be possible - to combine the multiple header fields into one "field-name: - field-value" pair, without changing the semantics of the - message, by appending each subsequent field-value to the - first, each separated by a comma. The order in which header - fields with the same field-name are received is therefore - significant to the interpretation of the combined field - value, and thus a proxy MUST NOT change the order of these - field values when a message is forwarded. + This header is multi-valued and values can be combined by + concatinating with a comma, as described by section 4.2 of RFC 2616. """ + def format(self, *values): if not values: return None @@ -306,14 +390,55 @@ class MultiEntryHeader(HTTPHeader): """ This header is multi-valued, but the values should not be combined with a comma since the header is not in compliance with RFC 2616 - (Set-Cookie) or which common user-agents do not behave well when the - header values are combined. + (Set-Cookie due to Expires parameter) or which common user-agents do + not behave well when the header values are combined. + + The values returned for this case are _always_ a list instead + of a string. """ + + def update(self, collection, *args, **kwargs): + #@@: This needs to be implemented to handle lists + raise NotImplementedError() + def format(self, *values): if not values: return None return list(values) + def tuples(self, *args, **kwargs): + values = self.__call__(*args, **kwargs) + if not values: + return () + return tuple([(self.name, value) for value in values]) + +def get_header(name, raiseError=True): + """ + This function finds the corresponding ``HTTPHeader`` for the + ``name`` provided. So that python-style names can be used, + underscores are converted to dashes before the lookup. + """ + if isinstance(name,HTTPHeader): + return name + retval = _headers.get(name.strip().lower().replace("_","-")) + if not retval and raiseError: + raise AssertionError("'%s' is an unknown header" % name) + return retval + +def list_headers(general=True, request=True, response=True, entity=True): + " list all headers for a given category " + search = [] + for (bool,strval) in ((general,'general'), (request,'request'), + (response,'response'), (entity,'entity')): + if bool: + search.append(strval) + search = tuple(search) + for head in _headers.values(): + if head.category in search: + retval.append(head) + retval.sort() + return retval + def normalize_headers(response_headers, strict=True): """ This alters the underlying response_headers to use the common @@ -358,6 +483,7 @@ for (name, category, version, style, comment) in \ ,("Cookie" ,'request' ,'1.0','multi-value','RFC 2109/Netscape') ,("Connection" ,'general' ,'1.1','multi-value','RFC 2616 $14.10') ,("Content-Encoding" ,'entity' ,'1.0','multi-value','RFC 2616 $14.11') +,("Content-Disposition",'entity' ,'1.1','multi-value','RFC 2616 $15.5' ) ,("Content-Language" ,'entity' ,'1.1','multi-value','RFC 2616 $14.12') ,("Content-Length" ,'entity' ,'1.0','singular' ,'RFC 2616 $14.13') ,("Content-Location" ,'entity' ,'1.1','singular' ,'RFC 2616 $14.14') @@ -412,3 +538,4 @@ for head in _headers.values(): headname = head.name.replace("-","") locals()[headname] = head __all__.append(headname) + |