summaryrefslogtreecommitdiff
path: root/requests_cache/serializers
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2022-06-10 12:36:08 -0500
committerJordan Cook <jordan.cook@pioneer.com>2022-06-10 14:12:27 -0500
commit0c46f50cc8976c7dcb178788852deae5bac226cb (patch)
treef541d637780dfad07a63222c90e1fff28586aa97 /requests_cache/serializers
parent0b0bb1ef0a1dc4120123595154049948a6a2e209 (diff)
downloadrequests-cache-0c46f50cc8976c7dcb178788852deae5bac226cb.tar.gz
Add serializer stage that decodes/re-encodes response content
Diffstat (limited to 'requests_cache/serializers')
-rw-r--r--requests_cache/serializers/__init__.py2
-rw-r--r--requests_cache/serializers/cattrs.py53
-rw-r--r--requests_cache/serializers/content_decoder.py52
3 files changed, 106 insertions, 1 deletions
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py
index 6328ea7..36197e1 100644
--- a/requests_cache/serializers/__init__.py
+++ b/requests_cache/serializers/__init__.py
@@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import
# flake8: noqa: F401
from typing import Union
-from .cattrs import CattrStage
+from .cattrs import CattrStage, DecodedBodyStage
from .pipeline import SerializerPipeline, Stage
from .preconf import (
bson_document_serializer,
diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py
index 708975b..6138d42 100644
--- a/requests_cache/serializers/cattrs.py
+++ b/requests_cache/serializers/cattrs.py
@@ -17,12 +17,18 @@ from typing import Callable, Dict, ForwardRef, MutableMapping
from cattr import GenConverter
from requests.cookies import RequestsCookieJar, cookiejar_from_dict
+from requests.exceptions import JSONDecodeError
from requests.structures import CaseInsensitiveDict
from urllib3._collections import HTTPHeaderDict
from ..models import CachedResponse
from .pipeline import Stage
+try:
+ import ujson as json
+except ImportError:
+ import json # type: ignore
+
class CattrStage(Stage):
"""Base serializer class that does pre/post-processing with ``cattrs``. This can be used either
@@ -43,6 +49,53 @@ class CattrStage(Stage):
return self.converter.structure(value, cl=CachedResponse)
+class DecodedBodyStage(CattrStage):
+ """Converter that decodes the response body into a human-readable format (if possible) when
+ serializing, and re-encodes it to reconstruct the original response. Supported Content-Types
+ are ``application/json`` and ``text/*``. All other types will be saved as-is.
+
+ Notes:
+
+ * This needs access to the response object for decoding, so this is used _instead_ of
+ CattrStage, not before/after it.
+ * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that
+ ``_content`` is always binary.
+ """
+
+ def dumps(self, value: CachedResponse) -> Dict:
+ response_dict = super().dumps(value)
+
+ # Decode body as JSON
+ if value.headers.get('Content-Type') == 'application/json':
+ try:
+ response_dict['_decoded_content'] = value.json()
+ response_dict.pop('_content', None)
+ except JSONDecodeError:
+ pass
+
+ # Decode body as text
+ if value.headers.get('Content-Type', '').startswith('text/'):
+ response_dict['_decoded_content'] = value.text
+ response_dict.pop('_content', None)
+
+ # Otherwise, it is most likely a binary body
+ return response_dict
+
+ def loads(self, value: Dict) -> CachedResponse:
+ # Re-encode JSON and text bodies
+ if isinstance(value.get('_decoded_content'), dict):
+ value['_decoded_content'] = json.dumps(value['_decoded_content'])
+
+ if isinstance(value.get('_decoded_content'), str):
+ response = super().loads(value)
+ response._content = response._decoded_content.encode('utf-8')
+ response._decoded_content = ''
+ response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess
+ return response
+ else:
+ return super().loads(value)
+
+
def init_converter(
factory: Callable[..., GenConverter] = None,
convert_datetime: bool = True,
diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py
new file mode 100644
index 0000000..71bc3fd
--- /dev/null
+++ b/requests_cache/serializers/content_decoder.py
@@ -0,0 +1,52 @@
+import json
+from typing import Dict
+
+from requests.exceptions import JSONDecodeError
+
+from requests_cache.models.response import CachedResponse
+from requests_cache.serializers.cattrs import CattrStage
+
+
+class DecodeBodyStage(CattrStage):
+ """Converter that decodes the response body into a human-readable format when serializing
+ (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types
+ are ``application/json`` and ``text/*``. All other types will be saved as-is.
+
+ This needs access to the CachedResponse object for decoding, so this is used _instead_ of
+ CattrStage, not before/after it.
+ """
+
+ def dumps(self, value: CachedResponse) -> Dict:
+ response_dict = super().dumps(value)
+ # Decode body as JSON
+ if value.headers.get('Content-Type') == 'application/json':
+ try:
+ response_dict['content'] = value.json()
+ response_dict.pop('_content', None)
+ except JSONDecodeError:
+ pass
+
+ # Decode body as text
+ if value.headers.get('Content-Type', '').startswith('text/'):
+ response_dict['content'] = value.text
+ response_dict.pop('_content', None)
+
+ # Otherwise, it is most likely a binary body
+ return response_dict
+
+ def loads(self, value: Dict) -> CachedResponse:
+ if value.get('content'):
+ value['_content'] = value.pop('content')
+ value.setdefault('_content', None)
+
+ # Re-encode JSON and text bodies
+ if isinstance(value['_content'], dict):
+ value['_content'] = json.dumps(value['_content'])
+ if isinstance(value['_content'], str):
+ value['_content'] = value['_content'].encode('utf-8')
+ response = super().loads(value)
+ # Since we know the encoding, set that explicitly so requests doesn't have to guess it
+ response.encoding = 'utf-8'
+ return response
+ else:
+ return super().loads(value)