diff options
-rw-r--r-- | examples/performance_test.py | 101 | ||||
-rw-r--r-- | poetry.lock | 35 | ||||
-rw-r--r-- | pyproject.toml | 3 | ||||
-rw-r--r-- | requests_cache/serializers/__init__.py | 3 | ||||
-rw-r--r-- | requests_cache/serializers/base.py | 11 | ||||
-rw-r--r-- | requests_cache/serializers/json.py | 78 | ||||
-rw-r--r-- | requests_cache/serializers/json_serializer.py | 23 | ||||
-rw-r--r-- | requests_cache/serializers/pickle_serializer.py (renamed from requests_cache/serializers/pickle.py) | 0 |
8 files changed, 130 insertions, 124 deletions
diff --git a/examples/performance_test.py b/examples/performance_test.py index b3dc1ef..4d092b1 100644 --- a/examples/performance_test.py +++ b/examples/performance_test.py @@ -1,81 +1,108 @@ """A manual test to compare performance of different serializers""" # flake8: noqa: F401 """ -CPU Results: -jsonpickle.encode x10000: 5.673 -jsonpickle.decode x10000: 5.448 -pickle.dumps x10000: 0.256 -pickle.loads x10000: 0.260 -cattrs.unstructure x10000: 0.002 -cattrs.structure x10000: 0.002 -cattrs + pickle.dumps x10000: 0.251 -cattrs + pickle.loads x10000: 0.253 +CPU Results (x10000 iterations): +jsonpickle.encode: 8.846 +jsonpickle.decode: 9.166 +pickle.dumps: 0.433 +pickle.loads: 0.734 +cattrs.unstructure: 1.124 +cattrs.structure: 1.048 +cattrs+pickle.dumps: 1.219 +cattrs+pickle.loads: 1.189 +cattrs+json.dumps: 2.005 +cattrs+json.loads: 2.312 +cattrs+ujson.dumps: 1.803 +cattrs+ujson.loads: 2.128 """ +import json +import os import pickle +import sys +from os.path import abspath, dirname, join from time import perf_counter as time import jsonpickle +import ujson +from cattr.preconf.json import make_converter from memory_profiler import profile from rich import print +# Add project path +sys.path.insert(0, os.path.abspath('..')) + from requests_cache import CachedSession -from requests_cache.serializers import PickleSerializer +from requests_cache.serializers import BaseSerializer, JSONSerializer, PickleSerializer ITERATIONS = 10000 session = CachedSession() -session.cache.clear() +r = session.get('https://httpbin.org/get?x=y') r = session.get('https://httpbin.org/get?x=y') def test_jsonpickle(): - start = time() - serialized = [jsonpickle.encode(r, use_base85=True) for i in range(ITERATIONS)] - print(f'jsonpickle.encode x{ITERATIONS}: {time() - start:.3f}') - - start = time() - deserialized = [jsonpickle.decode(obj) for obj in serialized] - print(f'jsonpickle.decode x{ITERATIONS}: {time() - start:.3f}') + base_test('jsonpickle', jsonpickle.encode, jsonpickle.decode) def test_pickle(): - start = time() - serialized = [pickle.dumps(r) for i in range(ITERATIONS)] - print(f'pickle.dumps x{ITERATIONS}: {time() - start:.3f}') - - start = time() - serialized = [pickle.dumps(r) for i in range(ITERATIONS)] - print(f'pickle.loads x{ITERATIONS}: {time() - start:.3f}') + base_test('pickle', pickle.dumps, pickle.loads) def test_cattrs(): s = PickleSerializer() - start = time() - serialized = [s.unstructure(r) for i in range(ITERATIONS)] - print(f'cattrs.unstructure x{ITERATIONS}: {time() - start:.3f}') - - start = time() - deserialized = [s.structure(obj) for obj in serialized] - print(f'cattrs.structure x{ITERATIONS}: {time() - start:.3f}') + base_test('cattrs', s.unstructure, s.structure) def test_cattrs_pickle(): s = PickleSerializer() + base_test('PickleSerializer', s.dumps, s.loads) + + +def test_cattrs_json(): + s = BaseSerializer(converter_factory=make_converter) + base_test( + 'json', + lambda obj: json.dumps(s.unstructure(obj)), + lambda obj: s.structure(json.loads(obj)), + ) + + +def test_cattrs_ujson(): + s = BaseSerializer(converter_factory=make_converter) + base_test( + 'ujson', + lambda obj: ujson.dumps(s.unstructure(obj)), + lambda obj: s.structure(ujson.loads(obj)), + ) + + +def base_test(module, serialize, deserialize): start = time() - serialized = [s.dumps(r) for i in range(ITERATIONS)] - print(f'cattrs + pickle.dumps x{ITERATIONS}: {time() - start:.3f}') + serialized = [serialize(r) for i in range(ITERATIONS)] + print(f'{module}.{serialize.__name__} x{ITERATIONS}: {time() - start:.3f}') start = time() - deserialized = [s.loads(obj) for obj in serialized] - print(f'cattrs + pickle.loads x{ITERATIONS}: {time() - start:.3f}') + deserialized = [deserialize(obj) for obj in serialized] + print(f'{module}.{deserialize.__name__} x{ITERATIONS}: {time() - start:.3f}') + + +def dumps(self, response: CachedResponse) -> bytes: + return json.dumps(super().unstructure(response), indent=2) # , cls=ResponseJSONEncoder) + + +def loads(self, obj: bytes) -> CachedResponse: + return super().structure(json.loads(obj)) if __name__ == '__main__': print('CPU:') - test_jsonpickle() + # test_jsonpickle() test_pickle() test_cattrs() test_cattrs_pickle() + test_cattrs_json() + test_cattrs_ujson() # Memory # print('\nMemory:') diff --git a/poetry.lock b/poetry.lock index 603063f..a4f15aa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -990,6 +990,14 @@ optional = false python-versions = "*" [[package]] +name = "ujson" +version = "4.0.2" +description = "Ultra fast JSON encoder and decoder for Python" +category = "main" +optional = true +python-versions = ">=3.6" + +[[package]] name = "url-normalize" version = "1.4.3" description = "URL normalization for Python" @@ -1048,12 +1056,12 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [extras] backends = ["boto3", "pymongo", "redis"] docs = ["m2r2", "Sphinx", "sphinx-autodoc-typehints", "sphinx-copybutton", "sphinx-rtd-theme", "sphinxcontrib-apidoc"] -json = ["cattrs"] +json = ["cattrs", "ujson"] [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "4eb795b93133ea5502e95f4f8e2776ac3eb538640142dfbbfa35ce02709de3cc" +content-hash = "8576ddbf5e4165bad17f8818200e49999185d8e8e75c6ea11af199aac0881d13" [metadata.files] alabaster = [ @@ -1667,6 +1675,29 @@ typing-extensions = [ {file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"}, {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"}, ] +ujson = [ + {file = "ujson-4.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:e390df0dcc7897ffb98e17eae1f4c442c39c91814c298ad84d935a3c5c7a32fa"}, + {file = "ujson-4.0.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:84b1dca0d53b0a8d58835f72ea2894e4d6cf7a5dd8f520ab4cbd698c81e49737"}, + {file = "ujson-4.0.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:91396a585ba51f84dc71c8da60cdc86de6b60ba0272c389b6482020a1fac9394"}, + {file = "ujson-4.0.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:eb6b25a7670c7537a5998e695fa62ff13c7f9c33faf82927adf4daa460d5f62e"}, + {file = "ujson-4.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f8aded54c2bc554ce20b397f72101737dd61ee7b81c771684a7dd7805e6cca0c"}, + {file = "ujson-4.0.2-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:30962467c36ff6de6161d784cd2a6aac1097f0128b522d6e9291678e34fb2b47"}, + {file = "ujson-4.0.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:fc51e545d65689c398161f07fd405104956ec27f22453de85898fa088b2cd4bb"}, + {file = "ujson-4.0.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e6e90330670c78e727d6637bb5a215d3e093d8e3570d439fd4922942f88da361"}, + {file = "ujson-4.0.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:5e1636b94c7f1f59a8ead4c8a7bab1b12cc52d4c21ababa295ffec56b445fd2a"}, + {file = "ujson-4.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:e2cadeb0ddc98e3963bea266cc5b884e5d77d73adf807f0bda9eca64d1c509d5"}, + {file = "ujson-4.0.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a214ba5a21dad71a43c0f5aef917cd56a2d70bc974d845be211c66b6742a471c"}, + {file = "ujson-4.0.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:0190d26c0e990c17ad072ec8593647218fe1c675d11089cd3d1440175b568967"}, + {file = "ujson-4.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:f273a875c0b42c2a019c337631bc1907f6fdfbc84210cc0d1fff0e2019bbfaec"}, + {file = "ujson-4.0.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d3a87888c40b5bfcf69b4030427cd666893e826e82cc8608d1ba8b4b5e04ea99"}, + {file = "ujson-4.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:7333e8bc45ea28c74ae26157eacaed5e5629dbada32e0103c23eb368f93af108"}, + {file = "ujson-4.0.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:b3a6dcc660220539aa718bcc9dbd6dedf2a01d19c875d1033f028f212e36d6bb"}, + {file = "ujson-4.0.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:0ea07fe57f9157118ca689e7f6db72759395b99121c0ff038d2e38649c626fb1"}, + {file = "ujson-4.0.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d6d061563470cac889c0a9fd367013a5dbd8efc36ad01ab3e67a57e56cad720"}, + {file = "ujson-4.0.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b5c70704962cf93ec6ea3271a47d952b75ae1980d6c56b8496cec2a722075939"}, + {file = "ujson-4.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:aad6d92f4d71e37ea70e966500f1951ecd065edca3a70d3861b37b176dd6702c"}, + {file = "ujson-4.0.2.tar.gz", hash = "sha256:c615a9e9e378a7383b756b7e7a73c38b22aeb8967a8bfbffd4741f7ffd043c4d"}, +] url-normalize = [ {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, diff --git a/pyproject.toml b/pyproject.toml index 74fa59b..d3246bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ url-normalize = "^1.4" # Optional serialization dependencies cattrs = {version = "^1.7", python = "^3.7", optional = true} +ujson = {version = "^4.0.2", optional = true} # Optional backend dependencies boto3 = { version = "^1.15", optional = true } @@ -44,7 +45,7 @@ sphinxcontrib-apidoc = { version = "^0.3", optional = true } [tool.poetry.extras] backends = ["boto3", "pymongo", "redis"] -json = ["cattrs"] +json = ["cattrs", "ujson"] docs = ["docutils", "m2r2", "Sphinx", "sphinx-autodoc-typehints", "sphinx-copybutton", "sphinx-material", "sphinx-rtd-theme", "sphinxcontrib-apidoc"] diff --git a/requests_cache/serializers/__init__.py b/requests_cache/serializers/__init__.py index c2cd5b2..dc06fc4 100644 --- a/requests_cache/serializers/__init__.py +++ b/requests_cache/serializers/__init__.py @@ -1,3 +1,4 @@ # flake8: noqa: F401 from .base import BaseSerializer -from .pickle import PickleSerializer, SafePickleSerializer +from .json_serializer import JSONSerializer +from .pickle_serializer import PickleSerializer, SafePickleSerializer diff --git a/requests_cache/serializers/base.py b/requests_cache/serializers/base.py index bb3bb83..3bd1ac2 100644 --- a/requests_cache/serializers/base.py +++ b/requests_cache/serializers/base.py @@ -1,6 +1,6 @@ from abc import abstractmethod from datetime import datetime, timedelta -from typing import Any +from typing import Any, Callable from requests.cookies import RequestsCookieJar, cookiejar_from_dict from requests.structures import CaseInsensitiveDict @@ -18,9 +18,9 @@ class BaseSerializer: Subclasses must provide ``dumps`` and ``loads`` methods. """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, converter_factory=None, **kwargs): super().__init__(*args, **kwargs) - self.converter = init_converter() + self.converter = init_converter(factory=converter_factory) def unstructure(self, obj: Any) -> Any: if not isinstance(obj, CachedResponse) or not self.converter: @@ -41,7 +41,7 @@ class BaseSerializer: pass -def init_converter(): +def init_converter(factory: Callable = None): """Make a converter to structure and unstructure some of the nested objects within a response, if cattrs is installed. """ @@ -50,7 +50,8 @@ def init_converter(): except ImportError: return None - converter = GenConverter(omit_if_default=True) + factory = factory or GenConverter + converter = factory(omit_if_default=True) # Convert datetimes to and from iso-formatted strings converter.register_unstructure_hook(datetime, lambda obj: obj.isoformat() if obj else None) diff --git a/requests_cache/serializers/json.py b/requests_cache/serializers/json.py deleted file mode 100644 index cc2975e..0000000 --- a/requests_cache/serializers/json.py +++ /dev/null @@ -1,78 +0,0 @@ -# TODO: Handle CachedHTTPResponse, PreparedRequest, RequestsCookieJar, CachedResponse (history) -import json -from base64 import b64decode, b64encode -from datetime import datetime, timedelta -from json import JSONDecoder, JSONEncoder -from typing import Union - -from requests.cookies import RequestsCookieJar, cookiejar_from_dict -from requests.structures import CaseInsensitiveDict - -from ..response import CachedResponse - - -class ResponseJSONSerializer: - def dumps(self, response: CachedResponse) -> str: - """Serialize a CachedResponse into JSON""" - return json.dumps(response.to_dict(), cls=ResponseJSONEncoder, indent=2) - - def loads(self, obj: str) -> CachedResponse: - """Deserialize a CachedResponse from JSON""" - response = json.loads(obj, cls=ResponseJSONDecoder) - - response['_content'] = b64decode(response['_content'].encode()) - response['_raw_response_attrs'] = response.pop('raw') - response['_request_attrs'] = response.pop('request') - response['cookies'] = cookiejar_from_dict(response.get('cookies', {})) - response['headers'] = CaseInsensitiveDict(response.get('headers', {})) - response['history'] = [self.loads(redirect) for redirect in response.get('history', [])] - - return CachedResponse(**response) - - -class ResponseJSONEncoder(JSONEncoder): - """Serialize a CachedResponse as JSON""" - - def default(self, obj): - if isinstance(obj, bytes): - return b64encode(obj).decode() - if isinstance(obj, datetime): - return obj.isoformat() - elif isinstance(obj, RequestsCookieJar): - return dict(obj) - # elif isinstance(obj, CookieJar): - # cookies = RequestsCookieJar() - # cookies.update(obj) - # return dict(cookies) - elif isinstance(obj, timedelta): - return { - '__type__': 'timedelta', - 'days': obj.days, - 'seconds': obj.seconds, - 'microseconds': obj.microseconds, - } - return super().default(obj) - - -class ResponseJSONDecoder(JSONDecoder): - """Deserialize a CachedResponse from JSON""" - - def __init__(self, **kwargs): - super().__init__(object_hook=self.object_hook, **kwargs) - - def object_hook(self, obj): - """Check for and handle custom types before they get deserialized by JSONDecoder""" - if isinstance(obj, str): - return try_parse_isoformat(obj) - elif isinstance(obj, dict) and obj.get('__type__', None) == 'timedelta': - obj.pop('__type__') - return timedelta(**obj) - return obj - - -def try_parse_isoformat(obj: str) -> Union[datetime, str]: - """Attempt to parse an ISO-formatted datetime string; if it fails, just return the string""" - try: - return datetime.fromisoformat(obj) - except (AttributeError, TypeError, ValueError): - return obj diff --git a/requests_cache/serializers/json_serializer.py b/requests_cache/serializers/json_serializer.py new file mode 100644 index 0000000..4080387 --- /dev/null +++ b/requests_cache/serializers/json_serializer.py @@ -0,0 +1,23 @@ +# Use ultrajson, if installed, otherwise stdlib json +try: + import ujson as json + from cattr.preconf.ujson import make_converter +except ImportError: + import json + from cattr.preconf.json import make_converter + +from ..models import CachedResponse +from .base import BaseSerializer + + +class JSONSerializer(BaseSerializer): + """Serializer that converts responses to JSON""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, converter_factory=make_converter, **kwargs) + + def dumps(self, response: CachedResponse) -> bytes: + return json.dumps(super().unstructure(response), indent=2) + + def loads(self, obj: bytes) -> CachedResponse: + return super().structure(json.loads(obj)) diff --git a/requests_cache/serializers/pickle.py b/requests_cache/serializers/pickle_serializer.py index 86a4e59..86a4e59 100644 --- a/requests_cache/serializers/pickle.py +++ b/requests_cache/serializers/pickle_serializer.py |