Rewrite AcceptCharset class, and add docs and tests.

From the old AcceptCharset.parse: 'ISO-8859-1' was a default charset in early versions of HTTP/1.1, but this is no longer the case (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Charset)
author: Ira Lun <sammyrosajoe@gmail.com> 2017-08-29 20:17:52 +0100
committer: Ira Lun <sammyrosajoe@gmail.com> 2017-08-29 20:28:38 +0100
commit: d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52 (patch)
tree: 197eb9a4aaeb0f89eb53ae7eb3207aa7527bc36f /src
parent: 0a42419b695e32af7ca248ebb1cc0729aac83a7e (diff)
download: webob-d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52.tar.gz
1 files changed, 60 insertions, 14 deletions
diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py
index 7424228..46e3db4 100644
--- a/src/webob/acceptparse.py
+++ b/src/webob/acceptparse.py
@@ -285,25 +285,71 @@ class NilAccept(object):
         return best_offer
 
 
-class AcceptCharset(Accept):
+class AcceptCharset(object):
     """
-    Represents an ``Accept-Charset`` header.
+    Represent an ``Accept-Charset`` header.
+
+    Base class for :class:`AcceptCharsetValidHeader`,
+    :class:`AcceptCharsetNoHeader`, and :class:`AcceptCharsetInvalidHeader`.
     """
-    @staticmethod
-    def parse(value):
+
+    # RFC 7231 Section 3.1.1.2 "Charset":
+    # charset = token
+    charset_re = token_re
+    # RFC 7231 Section 5.3.3 "Accept-Charset":
+    # Accept-Charset = 1#( ( charset / "*" ) [ weight ] )
+    charset_n_weight_re = _item_n_weight_re(item_re=charset_re)
+    charset_n_weight_compiled_re = re.compile(charset_n_weight_re)
+    accept_charset_compiled_re = _list_1_or_more__compiled_re(
+        element_re=charset_n_weight_re,
+    )
+
+    @classmethod
+    def _python_value_to_header_str(cls, value):
+        if isinstance(value, str):
+            header_str = value
+        else:
+            if hasattr(value, 'items'):
+                value = sorted(
+                    value.items(),
+                    key=lambda item: item[1],
+                    reverse=True,
+                )
+            if isinstance(value, (tuple, list)):
+                result = []
+                for item in value:
+                    if isinstance(item, (tuple, list)):
+                        item = _item_qvalue_pair_to_header_element(pair=item)
+                    result.append(item)
+                header_str = ', '.join(result)
+            else:
+                header_str = str(value)
+        return header_str
+
+    @classmethod
+    def parse(cls, value):
         """
-        Parse ``Accept-Charset`` header.
+        Parse an ``Accept-Charset`` header.
 
-        Return iterator of ``(charset, qvalue)`` pairs.
+        :param value: (``str``) header value
+        :return: If `value` is a valid ``Accept-Charset`` header, returns an
+                 iterator of (charset, quality value) tuples, as parsed from
+                 the header from left to right.
+        :raises ValueError: if `value` is an invalid header
         """
-        latin1_found = False
-        for m, q in Accept.parse(value):
-            _m = m.lower()
-            if _m == '*' or _m == 'iso-8859-1':
-                latin1_found = True
-            yield _m, q
-        if not latin1_found:
-            yield ('iso-8859-1', 1)
+        # Check if header is valid
+        # Using Python stdlib's `re` module, there is currently no way to check
+        # the match *and* get all the groups using the same regex, so we have
+        # to use one regex to check the match, and another to get the groups.
+        if cls.accept_charset_compiled_re.match(value) is None:
+            raise ValueError('Invalid value for an Accept-Charset header.')
+        def generator(value):
+            for match in (cls.charset_n_weight_compiled_re.finditer(value)):
+                charset = match.group(1)
+                qvalue = match.group(2)
+                qvalue = float(qvalue) if qvalue else 1.0
+                yield (charset, qvalue)
+        return generator(value=value)
 
 
 class AcceptEncoding(object):
author	Ira Lun <sammyrosajoe@gmail.com>	2017-08-29 20:17:52 +0100
committer	Ira Lun <sammyrosajoe@gmail.com>	2017-08-29 20:28:38 +0100
commit	d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52 (patch)
tree	197eb9a4aaeb0f89eb53ae7eb3207aa7527bc36f /src
parent	0a42419b695e32af7ca248ebb1cc0729aac83a7e (diff)
download	webob-d5adf04bedb9b3b0ff59f5e44bb4cabfd2a9ee52.tar.gz