summaryrefslogtreecommitdiff
path: root/paste/httpheaders.py
diff options
context:
space:
mode:
authorcce <devnull@localhost>2005-12-27 09:47:16 +0000
committercce <devnull@localhost>2005-12-27 09:47:16 +0000
commitc03aa906ac6bf43cdb818cb60497eed8ac207fb0 (patch)
tree3e1f69f17feef35cc9f455ca768d0f5d0d7ee1f5 /paste/httpheaders.py
parent5f6623a67f995758deffb9ddfdd71a7134b7c25f (diff)
downloadpaste-c03aa906ac6bf43cdb818cb60497eed8ac207fb0.tar.gz
major re-work of httpheaders
Diffstat (limited to 'paste/httpheaders.py')
-rw-r--r--paste/httpheaders.py364
1 files changed, 277 insertions, 87 deletions
diff --git a/paste/httpheaders.py b/paste/httpheaders.py
index f80a510..7c23ec1 100644
--- a/paste/httpheaders.py
+++ b/paste/httpheaders.py
@@ -3,35 +3,62 @@
# the MIT License: http://www.opensource.org/licenses/mit-license.php
# Some of this code was funded by http://prometheusresearch.com
"""
-HTTP Headers
-
-This contains useful information about various HTTP Headers; eventually
-including parsers/constructors for them. It is modeled after the
-HTTPExceptions class; only that a header such as 'Content-Type' is
-converted into Python as HTTP_CONTENT_TYPE. Each HTTPHeader is a string
-value, with various attributes describing how that header behaves; the
-string value is the "common form" as described in RFC 2616. It also
-overrides sorting so that general headers go first, followed by
-request/response headers, and then entity headers.
-
-It is planned that HTTPHeader will grow three methods:
-
- ``parse()`` This will parse content of the corresponding header
- and return a dictionary of its components for
- ``singular`` headers, and a list of dict items for
- the other headers.
-
- ``compose()`` This will take N keyword arguments corresponding
- to the various parts of the header and will produce
- a well-formed header value. For example, the
- cashe_control and content_disposition code in
- fileapp.py could move here.
-
- ``__call__()`` This will be similar to ``compose()`` only that it
- will return a (header, value) tuple suitable for
- a WSGI ``response_headers`` list.
+HTTP Message Headers
+This contains general support for message headers as defined by HTTP/1.1
+specification, RFC 2616 (in particular section 4.2). This module
+defines the ``HTTPHeader`` class, and corresponding instances for common
+headers. Here are some snippets of how you'd use it:
+
+ environ.get('HTTP_ACCEPT_LANGUAGE')
+ -> AcceptLanguage(environ)
+
+ In this usage, the header is passed the ``environ``, and extracts
+ the appropriate field-value. The primary advantage is that a typo
+ in the header is a NameError; environ.get('HTTP_ACCEPT_LANGUAGES'),
+ by contrast, might be a rather hard bug to track down.
+
+ header_value(response_headers, 'content-type') # from paste.response
+ -> ContentType(response_headers)
+
+ This usage is similar in that typos are easily noticed; but also
+ the syntax is the same -- the HTTPHeader can hide the technical
+ difference between ``environ`` and ``response_headers`` so that
+ your code remains focused on the task.
+
+ response_headers.append(('content-type','text/html'))
+ -> ContentType.append(response_headers, 'text/html')
+
+ Although in most cases these two forms have similar result,
+ there are a few differences:
+
+ - Since the ContentType header knows that it is a singleton, it
+ will raise an exception if already exists in the response_headers
+
+ - The ContentType version uses the recommended RFC capitalization,
+ 'Content-Type'; while this is easy in this case, it is not easy
+ to remember in every case, such as 'ETag' or 'WWW-Authenticate'.
+
+ - The ContentType version can validate the content; while this case
+ is easy to inspect that the former is correct -- this isn't
+ always true, for example in ContentDisposition or more
+ complicated headers.
+
+ remove_header(response_headers, 'content-type') # from paste.response
+ -> ContentType.remove(response_headers)
+
+ No pratical difference other than consistency with the rest
+ of the module; same as the ``replace`` method.
+
+ "public, no-store, max-age=%d" % 7*24*60*60
+ -> CacheControl(public=True, no_store=True,
+ max_age= CacheControl.ONE_WEEK)
+
+ While the former is the actual header that should be sent, it is
+ quite easy to make mistakes in header construction; or specify
+ invalid values that look correct but violate the specification.
"""
+
__all__ = ['get_header', 'HTTPHeader', 'normalize_headers' ]
_headers = {}
@@ -49,43 +76,81 @@ def get_header(name, raiseError=True):
raise AssertionError("'%s' is an unknown header" % name)
return retval
+def list_headers(general=True, request=True, response=True, entity=True):
+ " list all headers for a given category "
+ search = []
+ for (bool,strval) in ((general,'general'), (request,'request'),
+ (response,'response'), (entity,'entity')):
+ if bool:
+ search.append(strval)
+ search = tuple(search)
+ for head in _headers.values():
+ if head.category in search:
+ retval.append(head)
+ retval.sort()
+ return retval
+
class HTTPHeader(object):
"""
- HTTP header field names in their normalized "common form" as given
- by their source specification.
-
- Constructor Arguments:
-
- ``name`` This is the primary string value of the
- header name and is meant to reflect the
- "common form" of the header as provided in
- its corresponding specification.
-
- ``category`` The kind of header field, one of:
- - ``general``
- - ``request``
- - ``response``
- - ``entity``
- Category is there to follow the RFC's suggestion
- that general headers go first and entity headers
- go last.
-
- ``version`` The version of HTTP with which the header
- should be recognized (ie, don't send 1.1
- headers to a 1.0 client).
-
- ``style`` The style of the header is one of three forms:
- - ``singular`` (one entry, one value)
- - ``multi-value`` (one entry, comma separated)
- - ``multi-entry`` (values have their own entry)
- Style is intended to inform wrappers about the
- cardality and storage semantics for the header.
+ HTTPHeader instances represent a particular ``field-name`` of an
+ HTTP message header. They do not hold a field-value, but instead
+ provide operations that work on is corresponding values. Storage of
+ the actual field valies is done with WSGI ``environ`` or
+ ``response_headers`` as appropriate. Typically, a sub-classes that
+ represent a specific HTTP header, such as ContentDisposition, are
+ singeltons. Once constructed the HTTPHeader instances themselves
+ are immutable and stateless.
+
+ For purposes of documentation a "container" refers to either a
+ WSGI ``environ`` dictionary, or a ``response_headers`` list.
+
+ Member variables (and correspondingly constructor arguments).
+
+ ``name`` the ``field-name`` of the header, in "common form"
+ as presented in RFC 2616; e.g. 'Content-Type'
+
+ ``category`` one of 'general', 'request', 'response', or 'entity'
+
+ ``version`` version of HTTP (informational) with which the
+ header should be recognized
+
+ ``sort_order`` sorting order to be applied before sorting on
+ field-name when ordering headers in a response
+
+ Special Methods:
+
+ ``__call__`` The primary method of the HTTPHeader instance is
+ to make it a callable, it takes either a collection,
+ a string value, or keyword arguments and attempts
+ to find/construct a valid field-value
+
+ ``__lt__`` This method is used so that HTTPHeader objects
+ can be sorted in a manner suggested by RFC 2616.
+
+ ``__str__`` The string-value for instances of this class is
+ the ``field-name``.
+
+ Collection Methods:
+
+ ``append()`` appends the given field-value onto a WSGI
+ ``response_headers`` list object
+
+ ``remove()`` removes all field-value occurances of this
+ header in the collection provided
+
+ ``replace()`` replaces (if they exist) all field-value items
+ in the given collection with the value provided
The collected versions of initialized header instances are immediately
registered and accessable through the ``get_header`` function.
"""
#@@: add field-name validation
- def __new__(cls, name, category, version, style):
+ def __new__(cls, name, category):
+ """
+ We use the ``__new__`` operator to ensure that only one
+ ``HTTPHeader`` instance exists for each field-name, and to
+ register the header so that it can be found/enumerated.
+ """
self = get_header(name, raiseError=False)
if self:
# Allow the registration to happen again, but assert
@@ -93,28 +158,21 @@ class HTTPHeader(object):
assert self.name == name, \
"duplicate registration with different capitalization"
assert self.category == category, \
- "duplicate registration with different category "
- assert self.version == version, \
- "duplicate registration with different HTTP version"
- assert self.style == style, \
- "duplicate registration with different value cardnality"
+ "duplicate registration with different category"
assert cls == self.__class__, \
"duplicate registration with different class"
- else:
- assert version, "registration requires a HTTP Version"
- assert isinstance(version,str), "HTTP version is a string"
- assert category in ('general', 'request', 'response', 'entity')
- assert style in ('singular', 'multi-value', 'multi-entry')
- self = object.__new__(cls)
- self.name = name
- self.version = version
- self.style = style
- self.category = category
- self._catsort = {'general': 1, 'request': 2, 'response': 2,
- 'entity': 3}[category]
- assert self.name.lower() not in _headers
- _headers[self.name.lower()] = self
- return self
+ return self
+
+ self = object.__new__(cls)
+ self.name = name
+ self.category = category
+ self.sort_order = {'general': 1, 'request': 2,
+ 'response': 3, 'entity': 4 }[category]
+ _headers[name.lower()] = self
+ self._environ_name = 'HTTP_'+ self.name.upper().replace("-","_")
+ assert self.version in ('1.1','1.0','0.9')
+ assert isinstance(self,(SingleValueHeader,MultiValueHeader,
+ MultiEntryHeader))
def __str__(self):
return self.name
@@ -126,13 +184,135 @@ class HTTPHeader(object):
list.sort() methods use the less-than operator for this purpose.
"""
if isinstance(other,HTTPHeader):
- if self._catsort != other._catsort:
- return self._catsort < other._catsort
+ if self.sort_order != other.sort_order:
+ return self.sort_order < other.sort_order
return self.name < other.name
return False
def __repr__(self):
- return '<%s %s>' % (self.__class__.__name__, self.name)
+ return '<HTTPHeader %s>' % self.name
+
+ def construct(**kwargs):
+ """
+ construct field-value(s) via keyword arguments
+
+ The base implementation of this method simply provides a comma
+ separated list of arguments using the convention that a True
+ value does not include an equal sign. It is intended that this
+ be specialized for specific headers.
+ """
+ result = []
+ for (k,v) in kwargs.items():
+ if v in (None,True):
+ result.append(str(k))
+ else:
+ if isinstance(v,(float,int)):
+ result.append('%s=%s' % (k,v))
+ else:
+ result.append('%s="%s"' % (k,v))
+ return result
+
+ def format(self, *values):
+ """ produce a return value appropriate for this kind of header """
+ if not values:
+ return None
+ raise NotImplementedError()
+
+ def __call__(self, *args, **kwargs):
+ """
+ This finds/constructs field-value(s) for the given header
+ depending upon the arguments:
+
+ - If only keyword arguments are given, then this is equivalent
+ to ``format(*construct(**kwargs))``.
+
+ - If the first (and only) argument is a dict, it is assumed
+ to be a WSGI ``environ`` and the result of the corresponding
+ HTTP_ entry is returned.
+
+ - If the first (and only) argument is a list, it is assumed
+ to be a WSGI ``response_headers`` and the field-value(s)
+ for this header are collected and returned.
+
+ - In all other cases, the arguments are collected, checked that
+ they are string values, possibly verified by the header's
+ logic, and returned.
+
+ At this time it is an error to provide keyword arguments if args
+ is present (this might change). It is an error to provide both
+ a WSGI object and also string arguments. It is possible to not
+ provide any arguments, in which case none of the above
+ constructor functions are called and ``None`` is returned.
+ """
+ if not args:
+ if kwargs:
+ return self.format(*self.construct(**kwargs))
+ return None
+ if list == type(args[0]):
+ assert 1 == len(args)
+ result = []
+ name = self.name.lower()
+ for value in [value for header, value in args[0]
+ if header.lower() == name]:
+ result.append(value)
+ return self.format(*result)
+ if dict == type(args[0]):
+ assert 1 == len(args) and 'wsgi.version' in args[0]
+ value = args[0].get(self._environ_name)
+ if value is None:
+ return None
+ return self.format(value)
+ for item in args:
+ assert type(item) == str
+ return self.format(*args)
+
+class SingleValueHeader(HTTPHeader):
+ """
+ The field-value is a single value and therefore all results
+ constructed or obtained from a collection are asserted to ensure
+ that only one result was there.
+ """
+
+ def format(self, *values):
+ if not values:
+ return None
+ assert len(values) == 1, "found more than one value for singelton"
+ return values[0]
+
+class MultiValueHeader(HTTPHeader):
+ """
+ This header is multi-valued, however, results can be combined by
+ concatinating with a comma, as described by section 4.2 of RFC 2616:
+
+ Multiple message-header fields with the same field-name MAY
+ be present in a message if and only if the entire
+ field-value for that header field is defined as a
+ comma-separated list [i.e., #(values)]. It MUST be possible
+ to combine the multiple header fields into one "field-name:
+ field-value" pair, without changing the semantics of the
+ message, by appending each subsequent field-value to the
+ first, each separated by a comma. The order in which header
+ fields with the same field-name are received is therefore
+ significant to the interpretation of the combined field
+ value, and thus a proxy MUST NOT change the order of these
+ field values when a message is forwarded.
+ """
+ def format(self, *values):
+ if not values:
+ return None
+ return ", ".join(values)
+
+class MultiEntryHeader(HTTPHeader):
+ """
+ This header is multi-valued, but the values should not be combined
+ with a comma since the header is not in compliance with RFC 2616
+ (Set-Cookie) or which common user-agents do not behave well when the
+ header values are combined.
+ """
+ def format(self, *values):
+ if not values:
+ return None
+ return list(values)
def normalize_headers(response_headers, strict=True):
"""
@@ -152,7 +332,7 @@ def normalize_headers(response_headers, strict=True):
category[newhead] = 4
continue
response_headers[idx] = (str(head),val)
- category[str(head)] = head._catsort
+ category[str(head)] = head.sort_order
def compare(a,b):
ac = category[a[0]]
bc = category[b[0]]
@@ -164,7 +344,6 @@ def normalize_headers(response_headers, strict=True):
#
# For now, construct a minimalistic version of the field-names; at a
# later date more complicated headers may sprout content constructors.
-# This creates WSGI style HTTP_HEADER_NAME instances of HTTPHeader.
#
for (name, category, version, style, comment) in \
(("Accept" ,'request' ,'1.1','multi-value','RFC 2616 $14.1' )
@@ -216,9 +395,20 @@ for (name, category, version, style, comment) in \
,("Via" ,'general' ,'1.1','multi-value','RFC 2616 $14.45')
,("Warning" ,'general' ,'1.1','multi-entry','RFC 2616 $14.46')
,("WWW-Authenticate" ,'response','1.0','multi-entry','RFC 2616 $14.47')):
- head = HTTPHeader(name, category, version, style)
- head.__doc__ = comment
- pyname = 'HTTP_' + name.replace("-","_").upper()
- locals()[pyname] = head
- __all__.append(pyname)
+ cname = name.replace("-","")
+ bname = { 'multi-value': 'MultiValueHeader',
+ 'multi-entry': 'MultiEntryHeader',
+ 'singular' : 'SingleValueHeader'}[style]
+ exec """\
+class %(cname)s(%(bname)s):
+ "%(comment)s"
+ version = "%(version)s"
+%(cname)s('%(name)s','%(category)s');
+""" % { 'cname': cname, 'name': name,
+ 'category': category, 'bname': bname,
+ 'comment': comment, 'version': version } in globals(), globals()
+for head in _headers.values():
+ headname = head.name.replace("-","")
+ locals()[headname] = head
+ __all__.append(headname)