diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2022-06-10 12:36:08 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2022-06-10 14:12:27 -0500 |
commit | 0c46f50cc8976c7dcb178788852deae5bac226cb (patch) | |
tree | f541d637780dfad07a63222c90e1fff28586aa97 /requests_cache/serializers | |
parent | 0b0bb1ef0a1dc4120123595154049948a6a2e209 (diff) | |
download | requests-cache-0c46f50cc8976c7dcb178788852deae5bac226cb.tar.gz |
Add serializer stage that decodes/re-encodes response content
Diffstat (limited to 'requests_cache/serializers')
-rw-r--r-- | requests_cache/serializers/__init__.py | 2 | ||||
-rw-r--r-- | requests_cache/serializers/cattrs.py | 53 | ||||
-rw-r--r-- | requests_cache/serializers/content_decoder.py | 52 |
3 files changed, 106 insertions, 1 deletions
diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py index 6328ea7..36197e1 100644 --- a/requests_cache/serializers/__init__.py +++ b/requests_cache/serializers/__init__.py @@ -21,7 +21,7 @@ class that raises an ``ImportError`` at initialization time instead of at import # flake8: noqa: F401 from typing import Union -from .cattrs import CattrStage +from .cattrs import CattrStage, DecodedBodyStage from .pipeline import SerializerPipeline, Stage from .preconf import ( bson_document_serializer, diff --git a/requests_cache/serializers/cattrs.py b/requests_cache/serializers/cattrs.py index 708975b..6138d42 100644 --- a/requests_cache/serializers/cattrs.py +++ b/requests_cache/serializers/cattrs.py @@ -17,12 +17,18 @@ from typing import Callable, Dict, ForwardRef, MutableMapping from cattr import GenConverter from requests.cookies import RequestsCookieJar, cookiejar_from_dict +from requests.exceptions import JSONDecodeError from requests.structures import CaseInsensitiveDict from urllib3._collections import HTTPHeaderDict from ..models import CachedResponse from .pipeline import Stage +try: + import ujson as json +except ImportError: + import json # type: ignore + class CattrStage(Stage): """Base serializer class that does pre/post-processing with ``cattrs``. This can be used either @@ -43,6 +49,53 @@ class CattrStage(Stage): return self.converter.structure(value, cl=CachedResponse) +class DecodedBodyStage(CattrStage): + """Converter that decodes the response body into a human-readable format (if possible) when + serializing, and re-encodes it to reconstruct the original response. Supported Content-Types + are ``application/json`` and ``text/*``. All other types will be saved as-is. + + Notes: + + * This needs access to the response object for decoding, so this is used _instead_ of + CattrStage, not before/after it. + * Decoded responses are saved in a separate ``_decoded_content`` attribute, to ensure that + ``_content`` is always binary. + """ + + def dumps(self, value: CachedResponse) -> Dict: + response_dict = super().dumps(value) + + # Decode body as JSON + if value.headers.get('Content-Type') == 'application/json': + try: + response_dict['_decoded_content'] = value.json() + response_dict.pop('_content', None) + except JSONDecodeError: + pass + + # Decode body as text + if value.headers.get('Content-Type', '').startswith('text/'): + response_dict['_decoded_content'] = value.text + response_dict.pop('_content', None) + + # Otherwise, it is most likely a binary body + return response_dict + + def loads(self, value: Dict) -> CachedResponse: + # Re-encode JSON and text bodies + if isinstance(value.get('_decoded_content'), dict): + value['_decoded_content'] = json.dumps(value['_decoded_content']) + + if isinstance(value.get('_decoded_content'), str): + response = super().loads(value) + response._content = response._decoded_content.encode('utf-8') + response._decoded_content = '' + response.encoding = 'utf-8' # Set encoding explicitly so requests doesn't have to guess + return response + else: + return super().loads(value) + + def init_converter( factory: Callable[..., GenConverter] = None, convert_datetime: bool = True, diff --git a/requests_cache/serializers/content_decoder.py b/requests_cache/serializers/content_decoder.py new file mode 100644 index 0000000..71bc3fd --- /dev/null +++ b/requests_cache/serializers/content_decoder.py @@ -0,0 +1,52 @@ +import json +from typing import Dict + +from requests.exceptions import JSONDecodeError + +from requests_cache.models.response import CachedResponse +from requests_cache.serializers.cattrs import CattrStage + + +class DecodeBodyStage(CattrStage): + """Converter that decodes the response body into a human-readable format when serializing + (if possible), and re-encodes it to reconstruct the original response. Supported Content-Types + are ``application/json`` and ``text/*``. All other types will be saved as-is. + + This needs access to the CachedResponse object for decoding, so this is used _instead_ of + CattrStage, not before/after it. + """ + + def dumps(self, value: CachedResponse) -> Dict: + response_dict = super().dumps(value) + # Decode body as JSON + if value.headers.get('Content-Type') == 'application/json': + try: + response_dict['content'] = value.json() + response_dict.pop('_content', None) + except JSONDecodeError: + pass + + # Decode body as text + if value.headers.get('Content-Type', '').startswith('text/'): + response_dict['content'] = value.text + response_dict.pop('_content', None) + + # Otherwise, it is most likely a binary body + return response_dict + + def loads(self, value: Dict) -> CachedResponse: + if value.get('content'): + value['_content'] = value.pop('content') + value.setdefault('_content', None) + + # Re-encode JSON and text bodies + if isinstance(value['_content'], dict): + value['_content'] = json.dumps(value['_content']) + if isinstance(value['_content'], str): + value['_content'] = value['_content'].encode('utf-8') + response = super().loads(value) + # Since we know the encoding, set that explicitly so requests doesn't have to guess it + response.encoding = 'utf-8' + return response + else: + return super().loads(value) |