summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--HISTORY.md6
-rw-r--r--docs/user_guide/backends/dynamodb.md17
-rw-r--r--examples/cloudformation.yml18
-rw-r--r--requests_cache/backends/__init__.py4
-rw-r--r--requests_cache/backends/dynamodb.py80
-rw-r--r--tests/conftest.py31
-rw-r--r--tests/integration/base_storage_test.py5
-rw-r--r--tests/integration/test_dynamodb.py70
-rw-r--r--tests/integration/test_filesystem.py3
9 files changed, 164 insertions, 70 deletions
diff --git a/HISTORY.md b/HISTORY.md
index f174881..bc97907 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -28,8 +28,11 @@
* MongoDB:
* Store responses in plain (human-readable) document format instead of fully serialized binary
* Add optional integration with MongoDB TTL to improve performance for removing expired responses
+ * Disabled by default. See 'Backends: MongoDB' docs for details.
* DynamoDB:
* Create default table in on-demand mode instead of provisioned
+ * Add optional integration with DynamoDB TTL to improve performance for removing expired responses
+ * This is enabled by default, but may be disabled
* SQLite, Redis, MongoDB, and GridFS: Close open database connections when `CachedSession` is used as a contextmanager, or if `CachedSession.close()` is called
**Type hints:**
@@ -98,7 +101,8 @@ Internal module changes:
**Backends:**
* Filesystem and SQLite backends: Add better error message if parent path exists but isn't a directory
-* Redis: Improve performance by using native Redis TTL for cache expiration
+* Redis: Add optional integration with Redis TTL to improve performance for removing expired responses
+ * This is enabled by default, but may be disabled
**Other features:**
* Support `expire_after` param for `CachedSession.send()`
diff --git a/docs/user_guide/backends/dynamodb.md b/docs/user_guide/backends/dynamodb.md
index f807a77..247b044 100644
--- a/docs/user_guide/backends/dynamodb.md
+++ b/docs/user_guide/backends/dynamodb.md
@@ -60,9 +60,24 @@ Here are the details you will need:
```
:::
-Then deploy with:
+To deploy with the [AWS CLI](https://aws.amazon.com/cli):
```
aws cloudformation deploy \
--stack-name requests-cache \
--template-file examples/cloudformation.yml
```
+
+## Expiration
+DynamoDB natively supports TTL on a per-item basis, and can automatically remove expired responses from
+the cache. This will be set by by default, according to normal {ref}`expiration settings <expiration>`.
+
+```{warning}
+DynamoDB does not remove expired items immediately. See
+[How It Works: DynamoDB Time to Live](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/howitworks-ttl.html)
+for more details.
+```
+
+If needed, you can disable this behavior with the `ttl` argument:
+```python
+>>> backend = DynamoDbCache(ttl=False)
+```
diff --git a/examples/cloudformation.yml b/examples/cloudformation.yml
index ef5d749..e88e0ab 100644
--- a/examples/cloudformation.yml
+++ b/examples/cloudformation.yml
@@ -24,9 +24,15 @@ Resources:
KeyType: HASH
- AttributeName: key
KeyType: RANGE
- BillingMode: PAY_PER_REQUEST
- # Uncomment for provisioned throughput instead of on-demand
- # BillingMode: PROVISIONED
- # ProvisionedThroughput:
- # WriteCapacityUnits: 2
- # ReadCapacityUnits: 2
+ # BillingMode: PAY_PER_REQUEST
+
+ # Optional: Use provisioned throughput instead of on-demand
+ BillingMode: PROVISIONED
+ ProvisionedThroughput:
+ WriteCapacityUnits: 2
+ ReadCapacityUnits: 2
+
+ # Optional: Enable DynamoDB's TTL feature
+ TimeToLiveSpecification:
+ AttributeName: ttl
+ Enabled: true
diff --git a/requests_cache/backends/__init__.py b/requests_cache/backends/__init__.py
index 9f87908..7695b8f 100644
--- a/requests_cache/backends/__init__.py
+++ b/requests_cache/backends/__init__.py
@@ -15,9 +15,9 @@ logger = getLogger(__name__)
# Import all backend classes for which dependencies are installed
try:
- from .dynamodb import DynamoDbCache, DynamoDbDict, DynamoDocumentDict
+ from .dynamodb import DynamoDbCache, DynamoDbDict, DynamoDbDocumentDict
except ImportError as e:
- DynamoDbCache = DynamoDbDict = DynamoDocumentDict = get_placeholder_class(e) # type: ignore
+ DynamoDbCache = DynamoDbDict = DynamoDbDocumentDict = get_placeholder_class(e) # type: ignore
try:
from .gridfs import GridFSCache, GridFSPickleDict
except ImportError as e:
diff --git a/requests_cache/backends/dynamodb.py b/requests_cache/backends/dynamodb.py
index 17f4661..8161ae9 100644
--- a/requests_cache/backends/dynamodb.py
+++ b/requests_cache/backends/dynamodb.py
@@ -4,6 +4,7 @@
:classes-only:
:nosignatures:
"""
+from time import time
from typing import Dict, Iterable
import boto3
@@ -24,21 +25,27 @@ class DynamoDbCache(BaseCache):
namespace: Name of DynamoDB hash map
connection: :boto3:`DynamoDB Resource <services/dynamodb.html#DynamoDB.ServiceResource>`
object to use instead of creating a new one
+ ttl: Use DynamoDB TTL to automatically remove expired items
kwargs: Additional keyword arguments for :py:meth:`~boto3.session.Session.resource`
"""
def __init__(
- self, table_name: str = 'http_cache', connection: ServiceResource = None, **kwargs
+ self,
+ table_name: str = 'http_cache',
+ ttl: bool = True,
+ connection: ServiceResource = None,
+ **kwargs,
):
super().__init__(cache_name=table_name, **kwargs)
- self.responses = DynamoDocumentDict(
- table_name, 'responses', connection=connection, **kwargs
+ self.responses = DynamoDbDocumentDict(
+ table_name, 'responses', ttl=ttl, connection=connection, **kwargs
)
self.redirects = DynamoDbDict(
- table_name, 'redirects', connection=self.responses.connection, **kwargs
+ table_name, 'redirects', ttl=False, connection=self.responses.connection, **kwargs
)
+# TODO: Add screenshot of viewing responses in AWS console
class DynamoDbDict(BaseStorage):
"""A dictionary-like interface for DynamoDB table
@@ -47,6 +54,7 @@ class DynamoDbDict(BaseStorage):
namespace: Name of DynamoDB hash map
connection: :boto3:`DynamoDB Resource <services/dynamodb.html#DynamoDB.ServiceResource>`
object to use instead of creating a new one
+ ttl: Use DynamoDB TTL to automatically remove expired items
kwargs: Additional keyword arguments for :py:meth:`~boto3.session.Session.resource`
"""
@@ -54,6 +62,7 @@ class DynamoDbDict(BaseStorage):
self,
table_name: str,
namespace: str,
+ ttl: bool = True,
connection: ServiceResource = None,
**kwargs,
):
@@ -61,36 +70,48 @@ class DynamoDbDict(BaseStorage):
connection_kwargs = get_valid_kwargs(boto3.Session, kwargs, extras=['endpoint_url'])
self.connection = connection or boto3.resource('dynamodb', **connection_kwargs)
self.namespace = namespace
+ self.table_name = table_name
+ self.ttl = ttl
- self._create_table(table_name)
- self._table = self.connection.Table(table_name)
- self._table.wait_until_exists()
+ self._table = self.connection.Table(self.table_name)
+ self._create_table()
+ if ttl:
+ self._enable_ttl()
- def _create_table(self, table_name: str):
+ def _create_table(self):
"""Create a default table if one does not already exist"""
try:
self.connection.create_table(
AttributeDefinitions=[
- {
- 'AttributeName': 'namespace',
- 'AttributeType': 'S',
- },
- {
- 'AttributeName': 'key',
- 'AttributeType': 'S',
- },
+ {'AttributeName': 'namespace', 'AttributeType': 'S'},
+ {'AttributeName': 'key', 'AttributeType': 'S'},
],
- TableName=table_name,
+ TableName=self.table_name,
KeySchema=[
{'AttributeName': 'namespace', 'KeyType': 'HASH'},
{'AttributeName': 'key', 'KeyType': 'RANGE'},
],
- BillingMode="PAY_PER_REQUEST",
+ BillingMode='PAY_PER_REQUEST',
)
- except ClientError:
- pass
+ self._table.wait_until_exists()
+ # Ignore error if table already exists
+ except ClientError as e:
+ if e.response['Error']['Code'] != 'ResourceInUseException':
+ raise
+
+ def _enable_ttl(self):
+ """Enable TTL, if not already enabled"""
+ try:
+ self.connection.meta.client.update_time_to_live(
+ TableName=self.table_name,
+ TimeToLiveSpecification={'AttributeName': 'ttl', 'Enabled': True},
+ )
+ # Ignore error if TTL is already enabled
+ except ClientError as e:
+ if e.response['Error']['Code'] != 'ValidationException':
+ raise
- def composite_key(self, key: str) -> Dict[str, str]:
+ def _composite_key(self, key: str) -> Dict[str, str]:
return {'namespace': self.namespace, 'key': str(key)}
def _scan(self):
@@ -104,20 +125,25 @@ class DynamoDbDict(BaseStorage):
)
def __getitem__(self, key):
- result = self._table.get_item(Key=self.composite_key(key))
+ result = self._table.get_item(Key=self._composite_key(key))
if 'Item' not in result:
raise KeyError
- # Depending on the serializer, the value may be either a string or Binary object
+ # With a custom serializer, the value may be a Binary object
raw_value = result['Item']['value']
return raw_value.value if isinstance(raw_value, Binary) else raw_value
def __setitem__(self, key, value):
- item = {**self.composite_key(key), 'value': value}
+ item = {**self._composite_key(key), 'value': value}
+
+ # If enabled, set TTL value as a timestamp in unix format
+ if self.ttl and getattr(value, 'ttl', None):
+ item['ttl'] = int(time() + value.ttl)
+
self._table.put_item(Item=item)
def __delitem__(self, key):
- response = self._table.delete_item(Key=self.composite_key(key), ReturnValues='ALL_OLD')
+ response = self._table.delete_item(Key=self._composite_key(key), ReturnValues='ALL_OLD')
if 'Attributes' not in response:
raise KeyError
@@ -138,13 +164,13 @@ class DynamoDbDict(BaseStorage):
"""Delete multiple keys from the cache. Does not raise errors for missing keys."""
with self._table.batch_writer() as batch:
for key in keys:
- batch.delete_item(Key=self.composite_key(key))
+ batch.delete_item(Key=self._composite_key(key))
def clear(self):
self.bulk_delete((k for k in self))
-class DynamoDocumentDict(DynamoDbDict):
+class DynamoDbDocumentDict(DynamoDbDict):
"""Same as :class:`DynamoDbDict`, but serializes values before saving.
By default, responses are only partially serialized into a DynamoDB-compatible document format.
diff --git a/tests/conftest.py b/tests/conftest.py
index dc77368..96c35a7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,7 +25,16 @@ from timeout_decorator import timeout
from requests_cache import ALL_METHODS, CachedSession, install_cache, uninstall_cache
-CACHE_NAME = 'pytest_cache'
+# Configure logging to show log output when tests fail (or with pytest -s)
+basicConfig(
+ level='INFO',
+ format='%(message)s',
+ datefmt='[%m-%d %H:%M:%S]',
+ handlers=[RichHandler(rich_tracebacks=True, markup=True)],
+)
+# getLogger('requests_cache').setLevel('DEBUG')
+logger = getLogger(__name__)
+
# Allow running longer stress tests with an environment variable
STRESS_TEST_MULTIPLIER = int(os.getenv('STRESS_TEST_MULTIPLIER', '1'))
@@ -49,7 +58,6 @@ HTTPBIN_FORMATS = [
'robots.txt',
'xml',
]
-
HTTPDATE_STR = 'Fri, 16 APR 2021 21:13:00 GMT'
HTTPDATE_DATETIME = datetime(2021, 4, 16, 21, 13)
EXPIRED_DT = datetime.now() - timedelta(1)
@@ -66,28 +74,11 @@ MOCKED_URL_404 = 'http+mock://requests-cache.com/nonexistent'
MOCKED_URL_500 = 'http+mock://requests-cache.com/answer?q=this-statement-is-false'
MOCK_PROTOCOLS = ['mock://', 'http+mock://', 'https+mock://']
+CACHE_NAME = 'pytest_cache'
PROJECT_DIR = Path(__file__).parent.parent.absolute()
SAMPLE_DATA_DIR = PROJECT_DIR / 'tests' / 'sample_data'
SAMPLE_CACHE_FILES = list(SAMPLE_DATA_DIR.glob('sample.db.*'))
-AWS_OPTIONS = {
- 'endpoint_url': 'http://localhost:8000',
- 'region_name': 'us-east-1',
- 'aws_access_key_id': 'placeholder',
- 'aws_secret_access_key': 'placeholder',
-}
-
-
-# Configure logging to show log output when tests fail (or with pytest -s)
-basicConfig(
- level='INFO',
- format='%(message)s',
- datefmt='[%m-%d %H:%M:%S]',
- handlers=[RichHandler(rich_tracebacks=True, markup=True)],
-)
-# getLogger('requests_cache').setLevel('DEBUG')
-logger = getLogger(__name__)
-
def httpbin(path):
"""Get the url for either a local or remote httpbin instance"""
diff --git a/tests/integration/base_storage_test.py b/tests/integration/base_storage_test.py
index 776d494..7d38643 100644
--- a/tests/integration/base_storage_test.py
+++ b/tests/integration/base_storage_test.py
@@ -18,8 +18,9 @@ class BaseStorageTest:
num_instances: int = 10 # Max number of cache instances to test
def init_cache(self, cache_name=CACHE_NAME, index=0, clear=True, **kwargs):
+ kwargs = {**self.init_kwargs, **kwargs}
kwargs.setdefault('serializer', 'pickle')
- cache = self.storage_class(cache_name, f'table_{index}', **self.init_kwargs, **kwargs)
+ cache = self.storage_class(cache_name, f'table_{index}', **kwargs)
if clear:
cache.clear()
return cache
@@ -98,7 +99,7 @@ class BaseStorageTest:
def test_picklable_dict(self):
if self.picklable:
- cache = self.init_cache()
+ cache = self.init_cache(serializer='pickle')
original_obj = BasicDataclass(
bool_attr=True,
datetime_attr=datetime(2022, 2, 2),
diff --git a/tests/integration/test_dynamodb.py b/tests/integration/test_dynamodb.py
index 857ab6c..84c1008 100644
--- a/tests/integration/test_dynamodb.py
+++ b/tests/integration/test_dynamodb.py
@@ -1,13 +1,27 @@
+from collections import OrderedDict
+from decimal import Decimal
from unittest.mock import patch
import pytest
+from botocore.exceptions import ClientError
-from requests_cache.backends import DynamoDbCache, DynamoDbDict, DynamoDocumentDict
+from requests_cache.backends import DynamoDbCache, DynamoDbDict, DynamoDbDocumentDict
from requests_cache.serializers import dynamodb_document_serializer
-from tests.conftest import AWS_OPTIONS, HTTPBIN_FORMATS, HTTPBIN_METHODS, fail_if_no_connection
+from tests.conftest import HTTPBIN_FORMATS, HTTPBIN_METHODS, fail_if_no_connection
from tests.integration.base_cache_test import TEST_SERIALIZERS, BaseCacheTest
from tests.integration.base_storage_test import BaseStorageTest
+AWS_OPTIONS = {
+ 'endpoint_url': 'http://localhost:8000',
+ 'region_name': 'us-east-1',
+ 'aws_access_key_id': 'placeholder',
+ 'aws_secret_access_key': 'placeholder',
+}
+DYNAMODB_OPTIONS = {
+ **AWS_OPTIONS,
+ 'serializer': None, # Use class default serializer
+}
+
# Add extra DynamoDB-specific format to list of serializers to test against
DYNAMODB_SERIALIZERS = [dynamodb_document_serializer] + list(TEST_SERIALIZERS.values())
@@ -24,7 +38,7 @@ def ensure_connection():
class TestDynamoDbDict(BaseStorageTest):
storage_class = DynamoDbDict
- init_kwargs = AWS_OPTIONS
+ init_kwargs = DYNAMODB_OPTIONS
@patch('requests_cache.backends.dynamodb.boto3.resource')
def test_connection_kwargs(self, mock_resource):
@@ -32,19 +46,55 @@ class TestDynamoDbDict(BaseStorageTest):
DynamoDbDict('test_table', 'namespace', region_name='us-east-2', invalid_kwarg='???')
mock_resource.assert_called_with('dynamodb', region_name='us-east-2')
+ def test_create_table_error(self):
+ """An error other than 'table already exists' should be reraised"""
+ cache = self.init_cache()
+ error = ClientError({'Error': {'Code': 'NullPointerException'}}, 'CreateTable')
+ with patch.object(cache.connection.meta.client, 'update_time_to_live', side_effect=error):
+ with pytest.raises(ClientError):
+ cache._enable_ttl()
+
+ def test_enable_ttl_error(self):
+ """An error other than 'ttl already enabled' should be reraised"""
+ cache = self.init_cache()
+ error = ClientError({'Error': {'Code': 'NullPointerException'}}, 'CreateTable')
+ with patch.object(cache.connection, 'create_table', side_effect=error):
+ with pytest.raises(ClientError):
+ cache._create_table()
+
+ @pytest.mark.parametrize('ttl_enabled', [True, False])
+ def test_ttl(self, ttl_enabled):
+ """DynamoDB's TTL removal process can take up to 48 hours to run, so just test if the
+ 'ttl' attribute is set correctly if enabled, and not set if disabled.
+ """
+ cache = self.init_cache(ttl=ttl_enabled)
+ item = OrderedDict(foo='bar')
+ item.ttl = 60
+ cache['key'] = item
+
+ # 'ttl' is a reserved word, so to retrieve it we need to alias it
+ item = cache._table.get_item(
+ Key=cache._composite_key('key'),
+ ProjectionExpression='#t',
+ ExpressionAttributeNames={'#t': 'ttl'},
+ )
+ ttl_value = item['Item'].get('ttl')
+
+ if ttl_enabled:
+ assert isinstance(ttl_value, Decimal)
+ else:
+ assert ttl_value is None
+
-class TestDynamoDocumentDict(BaseStorageTest):
- storage_class = DynamoDocumentDict
- init_kwargs = AWS_OPTIONS
+class TestDynamoDbDocumentDict(BaseStorageTest):
+ storage_class = DynamoDbDocumentDict
+ init_kwargs = DYNAMODB_OPTIONS
picklable = True
class TestDynamoDbCache(BaseCacheTest):
backend_class = DynamoDbCache
- init_kwargs = {
- 'serializer': None,
- **AWS_OPTIONS,
- } # Use class default serializer instead of pickle
+ init_kwargs = DYNAMODB_OPTIONS
@pytest.mark.parametrize('serializer', DYNAMODB_SERIALIZERS)
@pytest.mark.parametrize('method', HTTPBIN_METHODS)
diff --git a/tests/integration/test_filesystem.py b/tests/integration/test_filesystem.py
index 1b81dff..4181690 100644
--- a/tests/integration/test_filesystem.py
+++ b/tests/integration/test_filesystem.py
@@ -19,7 +19,8 @@ class TestFileDict(BaseStorageTest):
rmtree(CACHE_NAME, ignore_errors=True)
def init_cache(self, index=0, clear=True, **kwargs):
- cache = FileDict(f'{CACHE_NAME}_{index}', serializer='pickle', use_temp=True, **kwargs)
+ kwargs.setdefault('serializer', 'pickle')
+ cache = FileDict(f'{CACHE_NAME}_{index}', use_temp=True, **kwargs)
if clear:
cache.clear()
return cache