summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2021-09-13 17:38:12 -0500
committerJordan Cook <jordan.cook@pioneer.com>2021-09-18 17:43:12 -0500
commitdbee4da8293e8410deeb77ddb5c2a6f5cc4c31eb (patch)
tree8b0cdebf6f6d68b96f9e158dfa43df1813086455
parent1fa0fbb715e5964eb8aee467721f6b5a0f4cfdb6 (diff)
downloadrequests-cache-dbee4da8293e8410deeb77ddb5c2a6f5cc4c31eb.tar.gz
Make per-request expiration thread-safe by passing via request headers instead of session attribute, and use Cache-Control request headers by default
-rw-r--r--docs/user_guide/expiration.md4
-rw-r--r--docs/user_guide/headers.md7
-rw-r--r--requests_cache/cache_control.py71
-rw-r--r--requests_cache/session.py28
-rw-r--r--tests/unit/test_cache_control.py49
5 files changed, 67 insertions, 92 deletions
diff --git a/docs/user_guide/expiration.md b/docs/user_guide/expiration.md
index 6ef3dbf..37bc010 100644
--- a/docs/user_guide/expiration.md
+++ b/docs/user_guide/expiration.md
@@ -16,8 +16,8 @@ reponses:
Expiration can be set on a per-session, per-URL, or per-request basis, in addition to cache
headers (see sections below for usage details). When there are multiple values provided for a given
request, the following order of precedence is used:
-1. Cache-Control request headers (if enabled)
-2. Cache-Control response headers (if enabled)
+1. Cache-Control response headers (if enabled)
+2. Cache-Control request headers
3. Per-request expiration (`expire_after` argument for {py:meth}`.CachedSession.request`)
4. Per-URL expiration (`urls_expire_after` argument for {py:class}`.CachedSession`)
5. Per-session expiration (`expire_after` argument for {py:class}`.CacheBackend`)
diff --git a/docs/user_guide/headers.md b/docs/user_guide/headers.md
index 7aade73..a1c7e2f 100644
--- a/docs/user_guide/headers.md
+++ b/docs/user_guide/headers.md
@@ -32,8 +32,11 @@ True, True
```
## Cache-Control
-If enabled, `Cache-Control` directives will take priority over any other `expire_after` value.
-See {ref}`precedence` for the full order of precedence.
+`Cache-Control` request headers will be used if present. This is mainly useful for patching an
+existing library that sets request headers.
+
+`Cache-Control` response headers are an opt-in feature. If enabled, these will take priority over
+any other `expire_after` values. See {ref}`precedence` for the full order of precedence.
To enable this behavior, use the `cache_control` option:
```python
diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py
index b3d1703..79add63 100644
--- a/requests_cache/cache_control.py
+++ b/requests_cache/cache_control.py
@@ -47,7 +47,7 @@ class CacheActions:
If multiple sources provide an expiration time, they will be used in the following order of
precedence:
- 1. Cache-Control request headers (if enabled)
+ 1. Cache-Control request headers
2. Cache-Control response headers (if enabled)
3. Per-request expiration
4. Per-URL expiration
@@ -67,53 +67,26 @@ class CacheActions:
cache_key: str,
request: PreparedRequest,
cache_control: bool = False,
- **kwargs,
- ):
- """Initialize from request info and cache settings"""
- if cache_control and has_cache_headers(request.headers):
- return cls.from_headers(cache_key, request.headers)
- else:
- return cls.from_settings(cache_key, request.url, cache_control=cache_control, **kwargs)
-
- @classmethod
- def from_headers(cls, cache_key: str, headers: Mapping):
- """Initialize from request headers"""
- directives = get_cache_directives(headers)
- do_not_cache = directives.get('max-age') == DO_NOT_CACHE
- return cls(
- cache_control=True,
- cache_key=cache_key,
- expire_after=directives.get('max-age'),
- skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives,
- skip_write=do_not_cache or 'no-store' in directives,
- )
-
- @classmethod
- def from_settings(
- cls,
- cache_key: str,
- url: str = None,
- cache_control: bool = True,
- request_expire_after: ExpirationTime = None,
session_expire_after: ExpirationTime = None,
urls_expire_after: ExpirationPatterns = None,
**kwargs,
):
- """Initialize from cache settings"""
+ """Initialize from request info and cache settings"""
# Check expire_after values in order of precedence
+ directives = get_cache_directives(request.headers)
expire_after = coalesce(
- request_expire_after,
- get_url_expiration(url, urls_expire_after),
+ directives.get('max-age'),
+ get_url_expiration(request.url, urls_expire_after),
session_expire_after,
)
-
do_not_cache = expire_after == DO_NOT_CACHE
+
return cls(
cache_control=cache_control,
cache_key=cache_key,
expire_after=expire_after,
- skip_read=do_not_cache,
- skip_write=do_not_cache,
+ skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives,
+ skip_write=do_not_cache or 'no-store' in directives,
)
@property
@@ -122,8 +95,11 @@ class CacheActions:
return get_expiration_datetime(self.expire_after)
def update_from_cached_response(self, response: CachedResponse):
- """Used after fetching a cached response, but before potentially sending a new request.
- Check for relevant cache headers on a cached response, and set corresponding request headers.
+ """Check for relevant cache headers on a cached response, and set corresponding request
+ headers for a conditional request, if possible.
+
+ Used after fetching a cached response, but before potentially sending a new request
+ (if expired).
"""
if not response or not response.is_expired:
return
@@ -135,15 +111,18 @@ class CacheActions:
self.request_headers = {k: v for k, v in self.request_headers.items() if v}
def update_from_response(self, response: Response):
- """Used after receiving a new response but before saving it to the cache.
- Update expiration + actions based on response headers, if not previously set.
+ """Update expiration + actions based on response headers, if not previously set.
+
+ Used after receiving a new response but before saving it to the cache.
"""
if not self.cache_control or not response:
return
directives = get_cache_directives(response.headers)
do_not_cache = directives.get('max-age') == DO_NOT_CACHE
- self.expire_after = coalesce(self.expires, directives.get('max-age'), directives.get('expires'))
+ self.expire_after = coalesce(
+ directives.get('max-age'), directives.get('expires'), self.expire_after
+ )
self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives
@@ -173,6 +152,12 @@ def get_expiration_datetime(expire_after: ExpirationTime) -> Optional[datetime]:
return datetime.utcnow() + expire_after
+def get_expiration_seconds(expire_after: ExpirationTime) -> Optional[float]:
+ """Convert an expiration value in any supported format to an expiration time in seconds"""
+ expires = get_expiration_datetime(expire_after)
+ return (expires - datetime.utcnow()).total_seconds() if expires else None
+
+
def get_cache_directives(headers: Mapping) -> Dict:
"""Get all Cache-Control directives, and handle multiple headers and comma-separated lists"""
if not headers:
@@ -200,12 +185,6 @@ def get_url_expiration(
return None
-def has_cache_headers(headers: Mapping) -> bool:
- """Determine if headers contain supported cache directives"""
- has_cache_control = any([d in headers.get('Cache-Control', '') for d in CACHE_DIRECTIVES])
- return has_cache_control or bool(headers.get('Expires'))
-
-
def parse_http_date(value: str) -> Optional[datetime]:
"""Attempt to parse an HTTP (RFC 5322-compatible) timestamp"""
try:
diff --git a/requests_cache/session.py b/requests_cache/session.py
index 6109303..fcf2bb0 100644
--- a/requests_cache/session.py
+++ b/requests_cache/session.py
@@ -25,7 +25,7 @@ from urllib3 import filepost
from . import get_valid_kwargs
from .backends import BackendSpecifier, init_backend
-from .cache_control import CacheActions, ExpirationTime
+from .cache_control import CacheActions, ExpirationTime, get_expiration_seconds
from .cache_keys import normalize_dict
from .models import AnyResponse, CachedResponse, set_response_defaults
@@ -67,7 +67,6 @@ class CacheMixin(MIXIN_BASE):
self.stale_if_error = stale_if_error or kwargs.pop('old_data_on_error', False)
self.cache.name = cache_name # Set to handle backend=<instance>
- self._request_expire_after: ExpirationTime = None
self._disabled = False
self._lock = RLock()
@@ -75,13 +74,14 @@ class CacheMixin(MIXIN_BASE):
session_kwargs = get_valid_kwargs(super().__init__, kwargs)
super().__init__(**session_kwargs) # type: ignore
- def request( # type: ignore # Note: Session.request() doesn't have expire_after param
+ def request( # type: ignore # Note: An extra param (expire_after) is added here
self,
method: str,
url: str,
params: Dict = None,
data: Any = None,
json: Dict = None,
+ headers: Dict = None,
expire_after: ExpirationTime = None,
**kwargs,
) -> AnyResponse:
@@ -95,8 +95,7 @@ class CacheMixin(MIXIN_BASE):
Args:
expire_after: Expiration time to set only for this request; see details below.
Overrides ``CachedSession.expire_after``. Accepts all the same values as
- ``CachedSession.expire_after`` except for ``None``; use ``-1`` to disable expiration
- on a per-request basis.
+ ``CachedSession.expire_after``. Use ``-1`` to disable expiration.
Returns:
Either a new or cached response
@@ -112,13 +111,19 @@ class CacheMixin(MIXIN_BASE):
7. :py:meth:`.BaseCache.save_response` (if not previously cached)
"""
- with self.request_expire_after(expire_after), patch_form_boundary(**kwargs):
+ # If present, set per-request expiration as a request header, to be handled in send()
+ if expire_after is not None:
+ headers = headers or {}
+ headers['Cache-Control'] = f'max-age={get_expiration_seconds(expire_after)}'
+
+ with patch_form_boundary(**kwargs):
return super().request(
method,
url,
params=normalize_dict(params),
data=normalize_dict(data),
json=normalize_dict(json),
+ headers=headers,
**kwargs,
)
@@ -129,7 +134,6 @@ class CacheMixin(MIXIN_BASE):
actions = CacheActions.from_request(
cache_key=cache_key,
request=request,
- request_expire_after=self._request_expire_after,
session_expire_after=self.expire_after,
urls_expire_after=self.urls_expire_after,
cache_control=self.cache_control,
@@ -251,16 +255,6 @@ class CacheMixin(MIXIN_BASE):
finally:
self._disabled = False
- @contextmanager
- def request_expire_after(self, expire_after: ExpirationTime = None):
- """Temporarily override ``expire_after`` for an individual request. This is needed to
- persist the value between requests.Session.request() -> send()."""
- # TODO: Is there a way to pass this via request kwargs -> PreparedRequest?
- with self._lock:
- self._request_expire_after = expire_after
- yield
- self._request_expire_after = None
-
def remove_expired_responses(self, expire_after: ExpirationTime = None):
"""Remove expired responses from the cache, optionally with revalidation
diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py
index 083d03d..3f105ec 100644
--- a/tests/unit/test_cache_control.py
+++ b/tests/unit/test_cache_control.py
@@ -24,16 +24,12 @@ IGNORED_DIRECTIVES = [
@pytest.mark.parametrize(
- 'request_expire_after, url_expire_after, header_expire_after, expected_expiration',
+ 'request_expire_after, url_expire_after, expected_expiration',
[
- (None, None, None, 1),
- (2, None, None, 2),
- (2, 3, None, 2),
- (None, 3, None, 3),
- (2, 3, 4, 4),
- (2, None, 4, 4),
- (None, 3, 4, 4),
- (None, None, 4, 4),
+ (2, 3, 2),
+ (None, 3, 3),
+ (2, None, 2),
+ (None, None, 1),
],
)
@patch('requests_cache.cache_control.get_url_expiration')
@@ -41,7 +37,6 @@ def test_init(
get_url_expiration,
request_expire_after,
url_expire_after,
- header_expire_after,
expected_expiration,
):
"""Test precedence with various combinations or per-request, per-session, per-URL, and
@@ -49,7 +44,8 @@ def test_init(
"""
request = PreparedRequest()
request.url = 'https://img.site.com/base/img.jpg'
- request.headers = {'Cache-Control': f'max-age={header_expire_after}'} if header_expire_after else {}
+ if request_expire_after:
+ request.headers = {'Cache-Control': f'max-age={request_expire_after}'}
get_url_expiration.return_value = url_expire_after
actions = CacheActions.from_request(
@@ -92,19 +88,19 @@ def test_init_from_headers(headers, expected_expiration):
@pytest.mark.parametrize(
'url, request_expire_after, expected_expiration',
[
- ('img.site_1.com', None, timedelta(hours=12)),
- ('img.site_1.com', 60, 60),
- ('http://img.site.com/base/', None, 1),
- ('https://img.site.com/base/img.jpg', None, 1),
- ('site_2.com/resource_1', None, timedelta(hours=20)),
- ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)),
- ('http://site_2.com/resource_2/', None, timedelta(days=7)),
- ('http://site_2.com/static/', None, -1),
+ # ('img.site_1.com', None, timedelta(hours=12)),
+ # ('img.site_1.com', 60, 60),
+ # ('http://img.site.com/base/', None, 1),
+ # ('https://img.site.com/base/img.jpg', None, 1),
+ # ('site_2.com/resource_1', None, timedelta(hours=20)),
+ # ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)),
+ # ('http://site_2.com/resource_2/', None, timedelta(days=7)),
+ # ('http://site_2.com/static/', None, -1),
('http://site_2.com/static/img.jpg', None, -1),
- ('site_2.com', None, 1),
- ('site_2.com', 60, 60),
- ('some_other_site.com', None, 1),
- ('some_other_site.com', 60, 60),
+ # ('site_2.com', None, 1),
+ # ('site_2.com', 60, 60),
+ # ('some_other_site.com', None, 1),
+ # ('some_other_site.com', 60, 60),
],
)
def test_init_from_settings(url, request_expire_after, expected_expiration):
@@ -115,10 +111,13 @@ def test_init_from_settings(url, request_expire_after, expected_expiration):
'site_2.com/resource_2': timedelta(days=7),
'site_2.com/static': -1,
}
+ request = MagicMock(url=url)
+ if request_expire_after:
+ request.headers = {'Cache-Control': f'max-age={request_expire_after}'}
+
actions = CacheActions.from_request(
cache_key='key',
- request=MagicMock(url=url),
- request_expire_after=request_expire_after,
+ request=request,
session_expire_after=1,
urls_expire_after=urls_expire_after,
)