diff options
| author | Ira Lun <sammyrosajoe@gmail.com> | 2017-08-29 20:17:52 +0100 |
|---|---|---|
| committer | Ira Lun <sammyrosajoe@gmail.com> | 2017-08-29 20:28:38 +0100 |
| commit | d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52 (patch) | |
| tree | 197eb9a4aaeb0f89eb53ae7eb3207aa7527bc36f /src | |
| parent | 0a42419b695e32af7ca248ebb1cc0729aac83a7e (diff) | |
| download | webob-d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52.tar.gz | |
Rewrite AcceptCharset class, and add docs and tests.
From the old AcceptCharset.parse: 'ISO-8859-1' was a default charset in
early versions of HTTP/1.1, but this is no longer the case
(https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Charset)
Diffstat (limited to 'src')
| -rw-r--r-- | src/webob/acceptparse.py | 74 |
1 files changed, 60 insertions, 14 deletions
diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py index 7424228..46e3db4 100644 --- a/src/webob/acceptparse.py +++ b/src/webob/acceptparse.py @@ -285,25 +285,71 @@ class NilAccept(object): return best_offer -class AcceptCharset(Accept): +class AcceptCharset(object): """ - Represents an ``Accept-Charset`` header. + Represent an ``Accept-Charset`` header. + + Base class for :class:`AcceptCharsetValidHeader`, + :class:`AcceptCharsetNoHeader`, and :class:`AcceptCharsetInvalidHeader`. """ - @staticmethod - def parse(value): + + # RFC 7231 Section 3.1.1.2 "Charset": + # charset = token + charset_re = token_re + # RFC 7231 Section 5.3.3 "Accept-Charset": + # Accept-Charset = 1#( ( charset / "*" ) [ weight ] ) + charset_n_weight_re = _item_n_weight_re(item_re=charset_re) + charset_n_weight_compiled_re = re.compile(charset_n_weight_re) + accept_charset_compiled_re = _list_1_or_more__compiled_re( + element_re=charset_n_weight_re, + ) + + @classmethod + def _python_value_to_header_str(cls, value): + if isinstance(value, str): + header_str = value + else: + if hasattr(value, 'items'): + value = sorted( + value.items(), + key=lambda item: item[1], + reverse=True, + ) + if isinstance(value, (tuple, list)): + result = [] + for item in value: + if isinstance(item, (tuple, list)): + item = _item_qvalue_pair_to_header_element(pair=item) + result.append(item) + header_str = ', '.join(result) + else: + header_str = str(value) + return header_str + + @classmethod + def parse(cls, value): """ - Parse ``Accept-Charset`` header. + Parse an ``Accept-Charset`` header. - Return iterator of ``(charset, qvalue)`` pairs. + :param value: (``str``) header value + :return: If `value` is a valid ``Accept-Charset`` header, returns an + iterator of (charset, quality value) tuples, as parsed from + the header from left to right. + :raises ValueError: if `value` is an invalid header """ - latin1_found = False - for m, q in Accept.parse(value): - _m = m.lower() - if _m == '*' or _m == 'iso-8859-1': - latin1_found = True - yield _m, q - if not latin1_found: - yield ('iso-8859-1', 1) + # Check if header is valid + # Using Python stdlib's `re` module, there is currently no way to check + # the match *and* get all the groups using the same regex, so we have + # to use one regex to check the match, and another to get the groups. + if cls.accept_charset_compiled_re.match(value) is None: + raise ValueError('Invalid value for an Accept-Charset header.') + def generator(value): + for match in (cls.charset_n_weight_compiled_re.finditer(value)): + charset = match.group(1) + qvalue = match.group(2) + qvalue = float(qvalue) if qvalue else 1.0 + yield (charset, qvalue) + return generator(value=value) class AcceptEncoding(object): |
