summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIra Lun <sammyrosajoe@gmail.com>2017-08-29 20:39:46 +0100
committerIra Lun <sammyrosajoe@gmail.com>2017-08-29 20:39:46 +0100
commitf576cc863b16b41861eda315dd630c6119599b8d (patch)
treec058cee03cb1b182489b790f47961a74cf2b723e /src
parentdab3c4f955c164a4f49baceee4c1cc843b1ff676 (diff)
downloadwebob-f576cc863b16b41861eda315dd630c6119599b8d.tar.gz
Add AcceptCharsetValidHeader class, docs and tests.
Diffstat (limited to 'src')
-rw-r--r--src/webob/acceptparse.py417
1 files changed, 417 insertions, 0 deletions
diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py
index 46e3db4..c318f3f 100644
--- a/src/webob/acceptparse.py
+++ b/src/webob/acceptparse.py
@@ -352,6 +352,423 @@ class AcceptCharset(object):
return generator(value=value)
+class AcceptCharsetValidHeader(AcceptCharset):
+ """
+ Represent a valid ``Accept-Charset`` header.
+
+ A valid header is one that conforms to :rfc:`RFC 7231, section 5.3.3
+ <7231#section-5.3.3>`.
+
+ This object should not be modified. To add to the header, we can use the
+ addition operators (``+`` and ``+=``), which return a new object (see the
+ docstring for :meth:`AcceptCharsetValidHeader.__add__`).
+ """
+
+ @property
+ def header_value(self):
+ """(``str``) The header value."""
+ return self._header_value
+
+ @property
+ def parsed(self):
+ """
+ (``list``) Parsed form of the header.
+
+ A list of (charset, quality value) tuples.
+ """
+ return self._parsed
+
+ def __init__(self, header_value):
+ """
+ Create an :class:`AcceptCharsetValidHeader` instance.
+
+ :param header_value: (``str``) header value.
+ :raises ValueError: if `header_value` is an invalid value for an
+ ``Accept-Charset`` header.
+ """
+ self._header_value = header_value
+ self._parsed = list(self.parse(header_value))
+ self._parsed_nonzero = [
+ item for item in self.parsed if item[1] # item[1] is the qvalue
+ ]
+
+ def __add__(self, other):
+ """
+ Add to header, creating a new header object.
+
+ `other` can be:
+
+ * ``None``
+ * a ``str`` header value
+ * a ``dict``, where keys are charsets and values are qvalues
+ * a ``tuple`` or ``list``, where each item is a charset ``str`` or a
+ ``tuple`` or ``list`` (charset, qvalue) pair (``str``\ s and pairs
+ can be mixed within the ``tuple`` or ``list``)
+ * an :class:`AcceptCharsetValidHeader`, :class:`AcceptCharsetNoHeader`,
+ or :class:`AcceptCharsetInvalidHeader` instance
+ * object of any other type that returns a value for ``__str__``
+
+ If `other` is a valid header value or another
+ :class:`AcceptCharsetValidHeader` instance, the two header values are
+ joined with ``', '``, and a new :class:`AcceptCharsetValidHeader`
+ instance with the new header value is returned.
+
+ If `other` is ``None``, an :class:`AcceptCharsetNoHeader` instance, an
+ invalid header value, or an :class:`AcceptCharsetInvalidHeader`
+ instance, a new :class:`AcceptCharsetValidHeader` instance with the
+ same header value as ``self`` is returned.
+ """
+ if isinstance(other, AcceptCharsetValidHeader):
+ return create_accept_charset_header(
+ header_value=self.header_value + ', ' + other.header_value,
+ )
+
+ if isinstance(
+ other, (AcceptCharsetNoHeader, AcceptCharsetInvalidHeader)
+ ):
+ return self.__class__(header_value=self.header_value)
+
+ return self._add_instance_and_non_accept_charset_type(
+ instance=self, other=other,
+ )
+
+ def __bool__(self):
+ """
+ Return whether ``self`` represents a valid ``Accept-Charset`` header.
+
+ Return ``True`` if ``self`` represents a valid header, and ``False`` if
+ it represents an invalid header, or the header not being in the
+ request.
+
+ For this class, it always returns ``True``.
+ """
+ return True
+ __nonzero__ = __bool__ # Python 2
+
+ def __contains__(self, offer):
+ """
+ Return ``bool`` indicating whether `offer` is acceptable.
+
+ .. warning::
+
+ The behavior of :meth:`AcceptCharsetValidHeader.__contains__` is
+ currently being maintained for backward compatibility, but it will
+ change in the future to better conform to the RFC.
+
+ :param offer: (``str``) charset offer
+ :return: (``bool``) Whether ``offer`` is acceptable according to the
+ header.
+
+ This does not fully conform to :rfc:`RFC 7231, section 5.3.3
+ <7231#section-5.3.3>`: it incorrect interprets ``*`` to mean 'match any
+ charset in the header', rather than 'match any charset that is not
+ mentioned elsewhere in the header'::
+
+ >>> 'UTF-8' in AcceptCharsetValidHeader('UTF-8;q=0, *')
+ True
+ """
+ warnings.warn(
+ 'The behavior of AcceptCharsetValidHeader.__contains__ is '
+ 'currently being maintained for backward compatibility, but it '
+ 'will change in the future to better conform to the RFC.',
+ DeprecationWarning,
+ )
+ for mask, quality in self._parsed_nonzero:
+ if self._old_match(mask, offer):
+ return True
+ return False
+
+ def __iter__(self):
+ """
+ Return all the items with non-0 qvalues, in order of preference.
+
+ .. warning::
+
+ The behavior of this method is currently maintained for backward
+ compatibility, but will change in the future.
+
+ :return: iterator of all the items (charset or ``*``) in the header
+ with non-0 qvalues, in descending order of qvalue. If two
+ items have the same qvalue, they are returned in the order of
+ their positions in the header, from left to right.
+
+ Please note that this is a simple filter for the items in the header
+ with non-0 qvalues, and is not necessarily the same as what the client
+ prefers, e.g. ``'utf-7;q=0, *'`` means 'everything but utf-7', but
+ ``list(instance)`` would return only ``['*']``.
+ """
+ warnings.warn(
+ 'The behavior of AcceptCharsetValidHeader.__iter__ is currently '
+ 'maintained for backward compatibility, but will change in the '
+ 'future.',
+ DeprecationWarning,
+ )
+ for m,q in sorted(
+ self._parsed_nonzero,
+ key=lambda i: i[1],
+ reverse=True
+ ):
+ yield m
+
+ def __radd__(self, other):
+ """
+ Add to header, creating a new header object.
+
+ See the docstring for :meth:`AcceptCharsetValidHeader.__add__`.
+ """
+ return self._add_instance_and_non_accept_charset_type(
+ instance=self, other=other, instance_on_the_right=True,
+ )
+
+ def __repr__(self):
+ return '<{} ({!r})>'.format(self.__class__.__name__, str(self))
+
+ def __str__(self):
+ r"""
+ Return a tidied up version of the header value.
+
+ e.g. If the ``header_value`` is ``', \t,iso-8859-5;q=0.000 \t,
+ utf-8;q=1.000, UTF-7, unicode-1-1;q=0.210 ,'``, ``str(instance)``
+ returns ``'iso-8859-5;q=0, utf-8, UTF-7, unicode-1-1;q=0.21'``.
+ """
+ return ', '.join(
+ _item_qvalue_pair_to_header_element(pair=tuple_)
+ for tuple_ in self.parsed
+ )
+
+ def _add_instance_and_non_accept_charset_type(
+ self, instance, other, instance_on_the_right=False,
+ ):
+ if not other:
+ return self.__class__(header_value=instance.header_value)
+
+ other_header_value = self._python_value_to_header_str(value=other)
+
+ try:
+ self.parse(value=other_header_value)
+ except ValueError: # invalid header value
+ return self.__class__(header_value=instance.header_value)
+
+ new_header_value = (
+ (other_header_value + ', ' + instance.header_value)
+ if instance_on_the_right
+ else (instance.header_value + ', ' + other_header_value)
+ )
+ return self.__class__(header_value=new_header_value)
+
+ def _old_match(self, mask, offer):
+ """
+ Return whether charset offer matches header item (charset or ``*``).
+
+ .. warning::
+
+ This is maintained for backward compatibility, and will be
+ deprecated in the future.
+
+ This method was WebOb's old criterion for deciding whether a charset
+ matches a header item (charset or ``*``), used in
+
+ - :meth:`AcceptCharsetValidHeader.__contains__`
+ - :meth:`AcceptCharsetValidHeader.best_match`
+ - :meth:`AcceptCharsetValidHeader.quality`
+
+ It does not conform to :rfc:`RFC 7231, section 5.3.3
+ <7231#section-5.3.3>` in that it does not interpret ``*`` values in the
+ header correctly: ``*`` should only match charsets not mentioned
+ elsewhere in the header.
+ """
+ return mask == '*' or offer.lower() == mask.lower()
+
+ def acceptable_offers(self, offers):
+ """
+ Return the offers that are acceptable according to the header.
+
+ The offers are returned in descending order of preference, where
+ preference is indicated by the qvalue of the charset or ``*`` in the
+ header matching the offer.
+
+ This uses the matching rules described in :rfc:`RFC 7231, section 5.3.3
+ <7231#section-5.3.3>`.
+
+ :param offers: ``iterable`` of ``str`` charsets
+ :return: A list of tuples of the form (charset, qvalue), in descending
+ order of qvalue. Where two offers have the same qvalue, they
+ are returned in the same order as their order in `offers`.
+ """
+ lowercased_parsed = [
+ (charset.lower(), qvalue) for (charset, qvalue) in self.parsed
+ ]
+ lowercased_offers = [offer.lower() for offer in offers]
+
+ not_acceptable_charsets = set()
+ acceptable_charsets = dict()
+ asterisk_qvalue = None
+
+ for charset, qvalue in lowercased_parsed:
+ if charset == '*':
+ if asterisk_qvalue is None:
+ asterisk_qvalue = qvalue
+ elif (
+ charset not in acceptable_charsets and charset not in
+ not_acceptable_charsets
+ # if we have not already encountered this charset in the header
+ ):
+ if qvalue == 0.0:
+ not_acceptable_charsets.add(charset)
+ else:
+ acceptable_charsets[charset] = qvalue
+ acceptable_charsets = list(acceptable_charsets.items())
+ # Sort acceptable_charsets by qvalue, descending order
+ acceptable_charsets.sort(key=lambda tuple_: tuple_[1], reverse=True)
+
+ filtered_offers = []
+ for index, offer in enumerate(lowercased_offers):
+ # If offer matches a non-* charset with q=0, it is filtered out
+ if any((
+ (offer == charset) for charset in not_acceptable_charsets
+ )):
+ continue
+
+ matched_charset_qvalue = None
+ for charset, qvalue in acceptable_charsets:
+ if offer == charset:
+ matched_charset_qvalue = qvalue
+ break
+ else:
+ if asterisk_qvalue:
+ matched_charset_qvalue = asterisk_qvalue
+ if matched_charset_qvalue is not None: # if there was a match
+ filtered_offers.append((
+ offers[index], matched_charset_qvalue, index
+ ))
+
+ # sort by position in `offers` argument, ascending
+ filtered_offers.sort(key=lambda tuple_: tuple_[2])
+ # When qvalues are tied, position in `offers` is the tiebreaker.
+
+ # sort by qvalue, descending
+ filtered_offers.sort(key=lambda tuple_: tuple_[1], reverse=True)
+
+ return [(item[0], item[1]) for item in filtered_offers]
+ # (offer, qvalue), dropping the position
+
+ def best_match(self, offers, default_match=None):
+ """
+ Return the best match from the sequence of charset `offers`.
+
+ .. warning::
+
+ This is currently maintained for backward compatibility, and will be
+ deprecated in the future.
+
+ :meth:`AcceptCharsetValidHeader.best_match` has many issues, and
+ does not conform to :rfc:`RFC 7231 <7231>`.
+
+ Each charset in `offers` is checked against each non-``q=0`` item
+ (charset or ``*``) in the header. If the two are a match according to
+ WebOb's old criterion for a match, the quality value of the match is
+ the qvalue of the item from the header multiplied by the server quality
+ value of the offer (if the server quality value is not supplied, it is
+ 1).
+
+ The offer in the match with the highest quality value is the best
+ match. If there is more than one match with the highest qvalue, the one
+ that shows up first in `offers` is the best match.
+
+ :param offers: (iterable)
+
+ | Each item in the iterable may be a ``str`` charset, or
+ a (charset, server quality value) ``tuple`` or
+ ``list``. (The two may be mixed in the iterable.)
+
+ :param default_match: (optional, any type) the value to be returned if
+ there is no match
+
+ :return: (``str``, or the type of `default_match`)
+
+ | The offer that is the best match. If there is no match, the
+ value of `default_match` is returned.
+
+ The algorithm behind this method was written for the ``Accept`` header
+ rather than the ``Accept-Charset`` header. It uses the old criterion of
+ a match in :meth:`AcceptCharsetValidHeader._old_match`, which does not
+ conform to :rfc:`RFC 7231, section 5.3.3 <7231#section-5.3.3>`, in that
+ it does not interpret ``*`` values in the header correctly: ``*``
+ should only match charsets not mentioned elsewhere in the header::
+
+ >>> AcceptCharsetValidHeader('utf-8;q=0, *').best_match(['utf-8'])
+ 'utf-8'
+ """
+ warnings.warn(
+ 'The behavior of AcceptCharsetValidHeader.best_match is currently'
+ ' being maintained for backward compatibility, but it will be '
+ 'deprecated in the future, as it does not conform to the RFC.',
+ DeprecationWarning,
+ )
+ best_quality = -1
+ best_offer = default_match
+ matched_by = '*/*'
+ for offer in offers:
+ if isinstance(offer, (tuple, list)):
+ offer, server_quality = offer
+ else:
+ server_quality = 1
+ for mask, quality in self._parsed_nonzero:
+ possible_quality = server_quality * quality
+ if possible_quality < best_quality:
+ continue
+ elif possible_quality == best_quality:
+ # 'text/plain' overrides 'message/*' overrides '*/*'
+ # (if all match w/ the same q=)
+ # [We can see that this was written for the Accept header,
+ # not the Accept-Charset header.]
+ if matched_by.count('*') <= mask.count('*'):
+ continue
+ if self._old_match(mask, offer):
+ best_quality = possible_quality
+ best_offer = offer
+ matched_by = mask
+ return best_offer
+
+ def quality(self, offer):
+ """
+ Return quality value of given offer, or ``None`` if there is no match.
+
+ .. warning::
+
+ This is currently maintained for backward compatibility, and will be
+ deprecated in the future.
+
+ :param offer: (``str``) charset offer
+ :return: (``float`` or ``None``)
+
+ | The quality value from the charset that matches the `offer`,
+ or ``None`` if there is no match.
+
+ This uses the old criterion of a match in
+ :meth:`AcceptCharsetValidHeader._old_match`, which does not conform to
+ :rfc:`RFC 7231, section 5.3.3 <7231#section-5.3.3>`, in that it does
+ not interpret ``*`` values in the header correctly: ``*`` should only
+ match charsets not mentioned elsewhere in the header::
+
+ >>> AcceptCharsetValidHeader('utf-8;q=0, *').quality('utf-8')
+ 1.0
+ >>> AcceptCharsetValidHeader('utf-8;q=0.9, *').quality('utf-8')
+ 1.0
+ """
+ warnings.warn(
+ 'The behavior of AcceptCharsetValidHeader.quality is currently '
+ 'being maintained for backward compatibility, but it will be '
+ 'deprecated in the future, as it does not conform to the RFC.',
+ DeprecationWarning,
+ )
+ bestq = 0
+ for mask, q in self.parsed:
+ if self._old_match(mask, offer):
+ bestq = max(bestq, q)
+ return bestq or None
+
+
class AcceptEncoding(object):
"""
Represent an ``Accept-Encoding`` header.