summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIra Lun <sammyrosajoe@gmail.com>2017-08-29 20:17:52 +0100
committerIra Lun <sammyrosajoe@gmail.com>2017-08-29 20:28:38 +0100
commitd5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52 (patch)
tree197eb9a4aaeb0f89eb53ae7eb3207aa7527bc36f /src
parent0a42419b695e32af7ca248ebb1cc0729aac83a7e (diff)
downloadwebob-d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52.tar.gz
Rewrite AcceptCharset class, and add docs and tests.
From the old AcceptCharset.parse: 'ISO-8859-1' was a default charset in early versions of HTTP/1.1, but this is no longer the case (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Charset)
Diffstat (limited to 'src')
-rw-r--r--src/webob/acceptparse.py74
1 files changed, 60 insertions, 14 deletions
diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py
index 7424228..46e3db4 100644
--- a/src/webob/acceptparse.py
+++ b/src/webob/acceptparse.py
@@ -285,25 +285,71 @@ class NilAccept(object):
return best_offer
-class AcceptCharset(Accept):
+class AcceptCharset(object):
"""
- Represents an ``Accept-Charset`` header.
+ Represent an ``Accept-Charset`` header.
+
+ Base class for :class:`AcceptCharsetValidHeader`,
+ :class:`AcceptCharsetNoHeader`, and :class:`AcceptCharsetInvalidHeader`.
"""
- @staticmethod
- def parse(value):
+
+ # RFC 7231 Section 3.1.1.2 "Charset":
+ # charset = token
+ charset_re = token_re
+ # RFC 7231 Section 5.3.3 "Accept-Charset":
+ # Accept-Charset = 1#( ( charset / "*" ) [ weight ] )
+ charset_n_weight_re = _item_n_weight_re(item_re=charset_re)
+ charset_n_weight_compiled_re = re.compile(charset_n_weight_re)
+ accept_charset_compiled_re = _list_1_or_more__compiled_re(
+ element_re=charset_n_weight_re,
+ )
+
+ @classmethod
+ def _python_value_to_header_str(cls, value):
+ if isinstance(value, str):
+ header_str = value
+ else:
+ if hasattr(value, 'items'):
+ value = sorted(
+ value.items(),
+ key=lambda item: item[1],
+ reverse=True,
+ )
+ if isinstance(value, (tuple, list)):
+ result = []
+ for item in value:
+ if isinstance(item, (tuple, list)):
+ item = _item_qvalue_pair_to_header_element(pair=item)
+ result.append(item)
+ header_str = ', '.join(result)
+ else:
+ header_str = str(value)
+ return header_str
+
+ @classmethod
+ def parse(cls, value):
"""
- Parse ``Accept-Charset`` header.
+ Parse an ``Accept-Charset`` header.
- Return iterator of ``(charset, qvalue)`` pairs.
+ :param value: (``str``) header value
+ :return: If `value` is a valid ``Accept-Charset`` header, returns an
+ iterator of (charset, quality value) tuples, as parsed from
+ the header from left to right.
+ :raises ValueError: if `value` is an invalid header
"""
- latin1_found = False
- for m, q in Accept.parse(value):
- _m = m.lower()
- if _m == '*' or _m == 'iso-8859-1':
- latin1_found = True
- yield _m, q
- if not latin1_found:
- yield ('iso-8859-1', 1)
+ # Check if header is valid
+ # Using Python stdlib's `re` module, there is currently no way to check
+ # the match *and* get all the groups using the same regex, so we have
+ # to use one regex to check the match, and another to get the groups.
+ if cls.accept_charset_compiled_re.match(value) is None:
+ raise ValueError('Invalid value for an Accept-Charset header.')
+ def generator(value):
+ for match in (cls.charset_n_weight_compiled_re.finditer(value)):
+ charset = match.group(1)
+ qvalue = match.group(2)
+ qvalue = float(qvalue) if qvalue else 1.0
+ yield (charset, qvalue)
+ return generator(value=value)
class AcceptEncoding(object):