Add serializer stage that decodes/re-encodes response content

author: Jordan Cook <jordan.cook@pioneer.com> 2022-06-10 12:36:08 -0500
committer: Jordan Cook <jordan.cook@pioneer.com> 2022-06-10 14:12:27 -0500
commit: 0c46f50cc8976c7dcb178788852deae5bac226cb (patch)
tree: f541d637780dfad07a63222c90e1fff28586aa97 /requests_cache/serializers
parent: 0b0bb1ef0a1dc4120123595154049948a6a2e209 (diff)
download: requests-cache-0c46f50cc8976c7dcb178788852deae5bac226cb.tar.gz
3 files changed, 106 insertions, 1 deletions
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py
index 6328ea7..36197e1 100644
--- a/requests_cache/serializers/__init__.py
+++ b/requests_cache/serializers/__init__.py
@@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import
 # flake8: noqa: F401
 from typing import Union
 
-from .cattrs import CattrStage
+from .cattrs import CattrStage, DecodedBodyStage
 from .pipeline import SerializerPipeline, Stage
 from .preconf import (
     bson_document_serializer,
diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py
index 708975b..6138d42 100644
--- a/requests_cache/serializers/cattrs.py
+++ b/requests_cache/serializers/cattrs.py
@@ -17,12 +17,18 @@ from typing import Callable, Dict, ForwardRef, MutableMapping
 
 from cattr import GenConverter
 from requests.cookies import RequestsCookieJar, cookiejar_from_dict
+from requests.exceptions import JSONDecodeError
 from requests.structures import CaseInsensitiveDict
 from urllib3._collections import HTTPHeaderDict
 
 from ..models import CachedResponse
 from .pipeline import Stage
 
+try:
+    import ujson as json
+except ImportError:
+    import json  # type: ignore
+
 
 class CattrStage(Stage):
     """Base serializer class that does pre/post-processing with  ``cattrs``. This can be used either
@@ -43,6 +49,53 @@ class CattrStage(Stage):
         return self.converter.structure(value, cl=CachedResponse)
 
 
+class DecodedBodyStage(CattrStage):
+    """Converter that decodes the response body into a human-readable format (if possible) when
+    serializing, and re-encodes it to reconstruct the original response. Supported Content-Types
+    are ``application/json`` and ``text/*``. All other types will be saved as-is.
+
+    Notes:
+
+    * This needs access to the response object for decoding, so this is used _instead_ of
+      CattrStage, not before/after it.
+    * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
+      ``_content`` is always binary.
+    """
+
+    def dumps(self, value: CachedResponse) -> Dict:
+        response_dict = super().dumps(value)
+
+        # Decode body as JSON
+        if value.headers.get('Content-Type') == 'application/json':
+            try:
+                response_dict['_decoded_content'] = value.json()
+                response_dict.pop('_content', None)
+            except JSONDecodeError:
+                pass
+
+        # Decode body as text
+        if value.headers.get('Content-Type', '').startswith('text/'):
+            response_dict['_decoded_content'] = value.text
+            response_dict.pop('_content', None)
+
+        # Otherwise, it is most likely a binary body
+        return response_dict
+
+    def loads(self, value: Dict) -> CachedResponse:
+        # Re-encode JSON and text bodies
+        if isinstance(value.get('_decoded_content'), dict):
+            value['_decoded_content'] = json.dumps(value['_decoded_content'])
+
+        if isinstance(value.get('_decoded_content'), str):
+            response = super().loads(value)
+            response._content = response._decoded_content.encode('utf-8')
+            response._decoded_content = ''
+            response.encoding = 'utf-8'  # Set encoding explicitly so requests doesn't have to guess
+            return response
+        else:
+            return super().loads(value)
+
+
 def init_converter(
     factory: Callable[..., GenConverter] = None,
     convert_datetime: bool = True,
diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py
new file mode 100644
index 0000000..71bc3fd
--- /dev/null
+++ b/requests_cache/serializers/content_decoder.py
@@ -0,0 +1,52 @@
+import json
+from typing import Dict
+
+from requests.exceptions import JSONDecodeError
+
+from requests_cache.models.response import CachedResponse
+from requests_cache.serializers.cattrs import CattrStage
+
+
+class DecodeBodyStage(CattrStage):
+    """Converter that decodes the response body into a human-readable format when serializing
+    (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types
+    are ``application/json`` and ``text/*``. All other types will be saved as-is.
+
+    This needs access to the CachedResponse object for decoding, so this is used _instead_ of
+    CattrStage, not before/after it.
+    """
+
+    def dumps(self, value: CachedResponse) -> Dict:
+        response_dict = super().dumps(value)
+        # Decode body as JSON
+        if value.headers.get('Content-Type') == 'application/json':
+            try:
+                response_dict['content'] = value.json()
+                response_dict.pop('_content', None)
+            except JSONDecodeError:
+                pass
+
+        # Decode body as text
+        if value.headers.get('Content-Type', '').startswith('text/'):
+            response_dict['content'] = value.text
+            response_dict.pop('_content', None)
+
+        # Otherwise, it is most likely a binary body
+        return response_dict
+
+    def loads(self, value: Dict) -> CachedResponse:
+        if value.get('content'):
+            value['_content'] = value.pop('content')
+        value.setdefault('_content', None)
+
+        # Re-encode JSON and text bodies
+        if isinstance(value['_content'], dict):
+            value['_content'] = json.dumps(value['_content'])
+        if isinstance(value['_content'], str):
+            value['_content'] = value['_content'].encode('utf-8')
+            response = super().loads(value)
+            # Since we know the encoding, set that explicitly so requests doesn't have to guess it
+            response.encoding = 'utf-8'
+            return response
+        else:
+            return super().loads(value)
author	Jordan Cook <jordan.cook@pioneer.com>	2022-06-10 12:36:08 -0500
committer	Jordan Cook <jordan.cook@pioneer.com>	2022-06-10 14:12:27 -0500
commit	0c46f50cc8976c7dcb178788852deae5bac226cb (patch)
tree	f541d637780dfad07a63222c90e1fff28586aa97 /requests_cache/serializers
parent	0b0bb1ef0a1dc4120123595154049948a6a2e209 (diff)
download	requests-cache-0c46f50cc8976c7dcb178788852deae5bac226cb.tar.gz