diff options
-rw-r--r-- | HISTORY.md | 6 | ||||
-rw-r--r-- | docs/user_guide/backends/dynamodb.md | 17 | ||||
-rw-r--r-- | examples/cloudformation.yml | 18 | ||||
-rw-r--r-- | requests_cache/backends/__init__.py | 4 | ||||
-rw-r--r-- | requests_cache/backends/dynamodb.py | 80 | ||||
-rw-r--r-- | tests/conftest.py | 31 | ||||
-rw-r--r-- | tests/integration/base_storage_test.py | 5 | ||||
-rw-r--r-- | tests/integration/test_dynamodb.py | 70 | ||||
-rw-r--r-- | tests/integration/test_filesystem.py | 3 |
9 files changed, 164 insertions, 70 deletions
@@ -28,8 +28,11 @@ * MongoDB: * Store responses in plain (human-readable) document format instead of fully serialized binary * Add optional integration with MongoDB TTL to improve performance for removing expired responses + * Disabled by default. See 'Backends: MongoDB' docs for details. * DynamoDB: * Create default table in on-demand mode instead of provisioned + * Add optional integration with DynamoDB TTL to improve performance for removing expired responses + * This is enabled by default, but may be disabled * SQLite, Redis, MongoDB, and GridFS: Close open database connections when `CachedSession` is used as a contextmanager, or if `CachedSession.close()` is called **Type hints:** @@ -98,7 +101,8 @@ Internal module changes: **Backends:** * Filesystem and SQLite backends: Add better error message if parent path exists but isn't a directory -* Redis: Improve performance by using native Redis TTL for cache expiration +* Redis: Add optional integration with Redis TTL to improve performance for removing expired responses + * This is enabled by default, but may be disabled **Other features:** * Support `expire_after` param for `CachedSession.send()` diff --git a/docs/user_guide/backends/dynamodb.md b/docs/user_guide/backends/dynamodb.md index f807a77..247b044 100644 --- a/docs/user_guide/backends/dynamodb.md +++ b/docs/user_guide/backends/dynamodb.md @@ -60,9 +60,24 @@ Here are the details you will need: ``` ::: -Then deploy with: +To deploy with the [AWS CLI](https://aws.amazon.com/cli): ``` aws cloudformation deploy \ --stack-name requests-cache \ --template-file examples/cloudformation.yml ``` + +## Expiration +DynamoDB natively supports TTL on a per-item basis, and can automatically remove expired responses from +the cache. This will be set by by default, according to normal {ref}`expiration settings <expiration>`. + +```{warning} +DynamoDB does not remove expired items immediately. See +[How It Works: DynamoDB Time to Live](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/howitworks-ttl.html) +for more details. +``` + +If needed, you can disable this behavior with the `ttl` argument: +```python +>>> backend = DynamoDbCache(ttl=False) +``` diff --git a/examples/cloudformation.yml b/examples/cloudformation.yml index ef5d749..e88e0ab 100644 --- a/examples/cloudformation.yml +++ b/examples/cloudformation.yml @@ -24,9 +24,15 @@ Resources: KeyType: HASH - AttributeName: key KeyType: RANGE - BillingMode: PAY_PER_REQUEST - # Uncomment for provisioned throughput instead of on-demand - # BillingMode: PROVISIONED - # ProvisionedThroughput: - # WriteCapacityUnits: 2 - # ReadCapacityUnits: 2 + # BillingMode: PAY_PER_REQUEST + + # Optional: Use provisioned throughput instead of on-demand + BillingMode: PROVISIONED + ProvisionedThroughput: + WriteCapacityUnits: 2 + ReadCapacityUnits: 2 + + # Optional: Enable DynamoDB's TTL feature + TimeToLiveSpecification: + AttributeName: ttl + Enabled: true diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py index 9f87908..7695b8f 100644 --- a/requests_cache/backends/__init__.py +++ b/requests_cache/backends/__init__.py @@ -15,9 +15,9 @@ logger = getLogger(__name__) # Import all backend classes for which dependencies are installed try: - from .dynamodb import DynamoDbCache, DynamoDbDict, DynamoDocumentDict + from .dynamodb import DynamoDbCache, DynamoDbDict, DynamoDbDocumentDict except ImportError as e: - DynamoDbCache = DynamoDbDict = DynamoDocumentDict = get_placeholder_class(e) # type: ignore + DynamoDbCache = DynamoDbDict = DynamoDbDocumentDict = get_placeholder_class(e) # type: ignore try: from .gridfs import GridFSCache, GridFSPickleDict except ImportError as e: diff --git a/requests_cache/backends/dynamodb.py b/requests_cache/backends/dynamodb.py index 17f4661..8161ae9 100644 --- a/requests_cache/backends/dynamodb.py +++ b/requests_cache/backends/dynamodb.py @@ -4,6 +4,7 @@ :classes-only: :nosignatures: """ +from time import time from typing import Dict, Iterable import boto3 @@ -24,21 +25,27 @@ class DynamoDbCache(BaseCache): namespace: Name of DynamoDB hash map connection: :boto3:`DynamoDB Resource <services/dynamodb.html#DynamoDB.ServiceResource>` object to use instead of creating a new one + ttl: Use DynamoDB TTL to automatically remove expired items kwargs: Additional keyword arguments for :py:meth:`~boto3.session.Session.resource` """ def __init__( - self, table_name: str = 'http_cache', connection: ServiceResource = None, **kwargs + self, + table_name: str = 'http_cache', + ttl: bool = True, + connection: ServiceResource = None, + **kwargs, ): super().__init__(cache_name=table_name, **kwargs) - self.responses = DynamoDocumentDict( - table_name, 'responses', connection=connection, **kwargs + self.responses = DynamoDbDocumentDict( + table_name, 'responses', ttl=ttl, connection=connection, **kwargs ) self.redirects = DynamoDbDict( - table_name, 'redirects', connection=self.responses.connection, **kwargs + table_name, 'redirects', ttl=False, connection=self.responses.connection, **kwargs ) +# TODO: Add screenshot of viewing responses in AWS console class DynamoDbDict(BaseStorage): """A dictionary-like interface for DynamoDB table @@ -47,6 +54,7 @@ class DynamoDbDict(BaseStorage): namespace: Name of DynamoDB hash map connection: :boto3:`DynamoDB Resource <services/dynamodb.html#DynamoDB.ServiceResource>` object to use instead of creating a new one + ttl: Use DynamoDB TTL to automatically remove expired items kwargs: Additional keyword arguments for :py:meth:`~boto3.session.Session.resource` """ @@ -54,6 +62,7 @@ class DynamoDbDict(BaseStorage): self, table_name: str, namespace: str, + ttl: bool = True, connection: ServiceResource = None, **kwargs, ): @@ -61,36 +70,48 @@ class DynamoDbDict(BaseStorage): connection_kwargs = get_valid_kwargs(boto3.Session, kwargs, extras=['endpoint_url']) self.connection = connection or boto3.resource('dynamodb', **connection_kwargs) self.namespace = namespace + self.table_name = table_name + self.ttl = ttl - self._create_table(table_name) - self._table = self.connection.Table(table_name) - self._table.wait_until_exists() + self._table = self.connection.Table(self.table_name) + self._create_table() + if ttl: + self._enable_ttl() - def _create_table(self, table_name: str): + def _create_table(self): """Create a default table if one does not already exist""" try: self.connection.create_table( AttributeDefinitions=[ - { - 'AttributeName': 'namespace', - 'AttributeType': 'S', - }, - { - 'AttributeName': 'key', - 'AttributeType': 'S', - }, + {'AttributeName': 'namespace', 'AttributeType': 'S'}, + {'AttributeName': 'key', 'AttributeType': 'S'}, ], - TableName=table_name, + TableName=self.table_name, KeySchema=[ {'AttributeName': 'namespace', 'KeyType': 'HASH'}, {'AttributeName': 'key', 'KeyType': 'RANGE'}, ], - BillingMode="PAY_PER_REQUEST", + BillingMode='PAY_PER_REQUEST', ) - except ClientError: - pass + self._table.wait_until_exists() + # Ignore error if table already exists + except ClientError as e: + if e.response['Error']['Code'] != 'ResourceInUseException': + raise + + def _enable_ttl(self): + """Enable TTL, if not already enabled""" + try: + self.connection.meta.client.update_time_to_live( + TableName=self.table_name, + TimeToLiveSpecification={'AttributeName': 'ttl', 'Enabled': True}, + ) + # Ignore error if TTL is already enabled + except ClientError as e: + if e.response['Error']['Code'] != 'ValidationException': + raise - def composite_key(self, key: str) -> Dict[str, str]: + def _composite_key(self, key: str) -> Dict[str, str]: return {'namespace': self.namespace, 'key': str(key)} def _scan(self): @@ -104,20 +125,25 @@ class DynamoDbDict(BaseStorage): ) def __getitem__(self, key): - result = self._table.get_item(Key=self.composite_key(key)) + result = self._table.get_item(Key=self._composite_key(key)) if 'Item' not in result: raise KeyError - # Depending on the serializer, the value may be either a string or Binary object + # With a custom serializer, the value may be a Binary object raw_value = result['Item']['value'] return raw_value.value if isinstance(raw_value, Binary) else raw_value def __setitem__(self, key, value): - item = {**self.composite_key(key), 'value': value} + item = {**self._composite_key(key), 'value': value} + + # If enabled, set TTL value as a timestamp in unix format + if self.ttl and getattr(value, 'ttl', None): + item['ttl'] = int(time() + value.ttl) + self._table.put_item(Item=item) def __delitem__(self, key): - response = self._table.delete_item(Key=self.composite_key(key), ReturnValues='ALL_OLD') + response = self._table.delete_item(Key=self._composite_key(key), ReturnValues='ALL_OLD') if 'Attributes' not in response: raise KeyError @@ -138,13 +164,13 @@ class DynamoDbDict(BaseStorage): """Delete multiple keys from the cache. Does not raise errors for missing keys.""" with self._table.batch_writer() as batch: for key in keys: - batch.delete_item(Key=self.composite_key(key)) + batch.delete_item(Key=self._composite_key(key)) def clear(self): self.bulk_delete((k for k in self)) -class DynamoDocumentDict(DynamoDbDict): +class DynamoDbDocumentDict(DynamoDbDict): """Same as :class:`DynamoDbDict`, but serializes values before saving. By default, responses are only partially serialized into a DynamoDB-compatible document format. diff --git a/tests/conftest.py b/tests/conftest.py index dc77368..96c35a7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,16 @@ from timeout_decorator import timeout from requests_cache import ALL_METHODS, CachedSession, install_cache, uninstall_cache -CACHE_NAME = 'pytest_cache' +# Configure logging to show log output when tests fail (or with pytest -s) +basicConfig( + level='INFO', + format='%(message)s', + datefmt='[%m-%d %H:%M:%S]', + handlers=[RichHandler(rich_tracebacks=True, markup=True)], +) +# getLogger('requests_cache').setLevel('DEBUG') +logger = getLogger(__name__) + # Allow running longer stress tests with an environment variable STRESS_TEST_MULTIPLIER = int(os.getenv('STRESS_TEST_MULTIPLIER', '1')) @@ -49,7 +58,6 @@ HTTPBIN_FORMATS = [ 'robots.txt', 'xml', ] - HTTPDATE_STR = 'Fri, 16 APR 2021 21:13:00 GMT' HTTPDATE_DATETIME = datetime(2021, 4, 16, 21, 13) EXPIRED_DT = datetime.now() - timedelta(1) @@ -66,28 +74,11 @@ MOCKED_URL_404 = 'http+mock://requests-cache.com/nonexistent' MOCKED_URL_500 = 'http+mock://requests-cache.com/answer?q=this-statement-is-false' MOCK_PROTOCOLS = ['mock://', 'http+mock://', 'https+mock://'] +CACHE_NAME = 'pytest_cache' PROJECT_DIR = Path(__file__).parent.parent.absolute() SAMPLE_DATA_DIR = PROJECT_DIR / 'tests' / 'sample_data' SAMPLE_CACHE_FILES = list(SAMPLE_DATA_DIR.glob('sample.db.*')) -AWS_OPTIONS = { - 'endpoint_url': 'http://localhost:8000', - 'region_name': 'us-east-1', - 'aws_access_key_id': 'placeholder', - 'aws_secret_access_key': 'placeholder', -} - - -# Configure logging to show log output when tests fail (or with pytest -s) -basicConfig( - level='INFO', - format='%(message)s', - datefmt='[%m-%d %H:%M:%S]', - handlers=[RichHandler(rich_tracebacks=True, markup=True)], -) -# getLogger('requests_cache').setLevel('DEBUG') -logger = getLogger(__name__) - def httpbin(path): """Get the url for either a local or remote httpbin instance""" diff --git a/tests/integration/base_storage_test.py b/tests/integration/base_storage_test.py index 776d494..7d38643 100644 --- a/tests/integration/base_storage_test.py +++ b/tests/integration/base_storage_test.py @@ -18,8 +18,9 @@ class BaseStorageTest: num_instances: int = 10 # Max number of cache instances to test def init_cache(self, cache_name=CACHE_NAME, index=0, clear=True, **kwargs): + kwargs = {**self.init_kwargs, **kwargs} kwargs.setdefault('serializer', 'pickle') - cache = self.storage_class(cache_name, f'table_{index}', **self.init_kwargs, **kwargs) + cache = self.storage_class(cache_name, f'table_{index}', **kwargs) if clear: cache.clear() return cache @@ -98,7 +99,7 @@ class BaseStorageTest: def test_picklable_dict(self): if self.picklable: - cache = self.init_cache() + cache = self.init_cache(serializer='pickle') original_obj = BasicDataclass( bool_attr=True, datetime_attr=datetime(2022, 2, 2), diff --git a/tests/integration/test_dynamodb.py b/tests/integration/test_dynamodb.py index 857ab6c..84c1008 100644 --- a/tests/integration/test_dynamodb.py +++ b/tests/integration/test_dynamodb.py @@ -1,13 +1,27 @@ +from collections import OrderedDict +from decimal import Decimal from unittest.mock import patch import pytest +from botocore.exceptions import ClientError -from requests_cache.backends import DynamoDbCache, DynamoDbDict, DynamoDocumentDict +from requests_cache.backends import DynamoDbCache, DynamoDbDict, DynamoDbDocumentDict from requests_cache.serializers import dynamodb_document_serializer -from tests.conftest import AWS_OPTIONS, HTTPBIN_FORMATS, HTTPBIN_METHODS, fail_if_no_connection +from tests.conftest import HTTPBIN_FORMATS, HTTPBIN_METHODS, fail_if_no_connection from tests.integration.base_cache_test import TEST_SERIALIZERS, BaseCacheTest from tests.integration.base_storage_test import BaseStorageTest +AWS_OPTIONS = { + 'endpoint_url': 'http://localhost:8000', + 'region_name': 'us-east-1', + 'aws_access_key_id': 'placeholder', + 'aws_secret_access_key': 'placeholder', +} +DYNAMODB_OPTIONS = { + **AWS_OPTIONS, + 'serializer': None, # Use class default serializer +} + # Add extra DynamoDB-specific format to list of serializers to test against DYNAMODB_SERIALIZERS = [dynamodb_document_serializer] + list(TEST_SERIALIZERS.values()) @@ -24,7 +38,7 @@ def ensure_connection(): class TestDynamoDbDict(BaseStorageTest): storage_class = DynamoDbDict - init_kwargs = AWS_OPTIONS + init_kwargs = DYNAMODB_OPTIONS @patch('requests_cache.backends.dynamodb.boto3.resource') def test_connection_kwargs(self, mock_resource): @@ -32,19 +46,55 @@ class TestDynamoDbDict(BaseStorageTest): DynamoDbDict('test_table', 'namespace', region_name='us-east-2', invalid_kwarg='???') mock_resource.assert_called_with('dynamodb', region_name='us-east-2') + def test_create_table_error(self): + """An error other than 'table already exists' should be reraised""" + cache = self.init_cache() + error = ClientError({'Error': {'Code': 'NullPointerException'}}, 'CreateTable') + with patch.object(cache.connection.meta.client, 'update_time_to_live', side_effect=error): + with pytest.raises(ClientError): + cache._enable_ttl() + + def test_enable_ttl_error(self): + """An error other than 'ttl already enabled' should be reraised""" + cache = self.init_cache() + error = ClientError({'Error': {'Code': 'NullPointerException'}}, 'CreateTable') + with patch.object(cache.connection, 'create_table', side_effect=error): + with pytest.raises(ClientError): + cache._create_table() + + @pytest.mark.parametrize('ttl_enabled', [True, False]) + def test_ttl(self, ttl_enabled): + """DynamoDB's TTL removal process can take up to 48 hours to run, so just test if the + 'ttl' attribute is set correctly if enabled, and not set if disabled. + """ + cache = self.init_cache(ttl=ttl_enabled) + item = OrderedDict(foo='bar') + item.ttl = 60 + cache['key'] = item + + # 'ttl' is a reserved word, so to retrieve it we need to alias it + item = cache._table.get_item( + Key=cache._composite_key('key'), + ProjectionExpression='#t', + ExpressionAttributeNames={'#t': 'ttl'}, + ) + ttl_value = item['Item'].get('ttl') + + if ttl_enabled: + assert isinstance(ttl_value, Decimal) + else: + assert ttl_value is None + -class TestDynamoDocumentDict(BaseStorageTest): - storage_class = DynamoDocumentDict - init_kwargs = AWS_OPTIONS +class TestDynamoDbDocumentDict(BaseStorageTest): + storage_class = DynamoDbDocumentDict + init_kwargs = DYNAMODB_OPTIONS picklable = True class TestDynamoDbCache(BaseCacheTest): backend_class = DynamoDbCache - init_kwargs = { - 'serializer': None, - **AWS_OPTIONS, - } # Use class default serializer instead of pickle + init_kwargs = DYNAMODB_OPTIONS @pytest.mark.parametrize('serializer', DYNAMODB_SERIALIZERS) @pytest.mark.parametrize('method', HTTPBIN_METHODS) diff --git a/tests/integration/test_filesystem.py b/tests/integration/test_filesystem.py index 1b81dff..4181690 100644 --- a/tests/integration/test_filesystem.py +++ b/tests/integration/test_filesystem.py @@ -19,7 +19,8 @@ class TestFileDict(BaseStorageTest): rmtree(CACHE_NAME, ignore_errors=True) def init_cache(self, index=0, clear=True, **kwargs): - cache = FileDict(f'{CACHE_NAME}_{index}', serializer='pickle', use_temp=True, **kwargs) + kwargs.setdefault('serializer', 'pickle') + cache = FileDict(f'{CACHE_NAME}_{index}', use_temp=True, **kwargs) if clear: cache.clear() return cache |