diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2021-09-13 17:38:12 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-09-18 17:43:12 -0500 |
commit | dbee4da8293e8410deeb77ddb5c2a6f5cc4c31eb (patch) | |
tree | 8b0cdebf6f6d68b96f9e158dfa43df1813086455 | |
parent | 1fa0fbb715e5964eb8aee467721f6b5a0f4cfdb6 (diff) | |
download | requests-cache-dbee4da8293e8410deeb77ddb5c2a6f5cc4c31eb.tar.gz |
Make per-request expiration thread-safe by passing via request headers instead of session attribute, and use Cache-Control request headers by default
-rw-r--r-- | docs/user_guide/expiration.md | 4 | ||||
-rw-r--r-- | docs/user_guide/headers.md | 7 | ||||
-rw-r--r-- | requests_cache/cache_control.py | 71 | ||||
-rw-r--r-- | requests_cache/session.py | 28 | ||||
-rw-r--r-- | tests/unit/test_cache_control.py | 49 |
5 files changed, 67 insertions, 92 deletions
diff --git a/docs/user_guide/expiration.md b/docs/user_guide/expiration.md index 6ef3dbf..37bc010 100644 --- a/docs/user_guide/expiration.md +++ b/docs/user_guide/expiration.md @@ -16,8 +16,8 @@ reponses: Expiration can be set on a per-session, per-URL, or per-request basis, in addition to cache headers (see sections below for usage details). When there are multiple values provided for a given request, the following order of precedence is used: -1. Cache-Control request headers (if enabled) -2. Cache-Control response headers (if enabled) +1. Cache-Control response headers (if enabled) +2. Cache-Control request headers 3. Per-request expiration (`expire_after` argument for {py:meth}`.CachedSession.request`) 4. Per-URL expiration (`urls_expire_after` argument for {py:class}`.CachedSession`) 5. Per-session expiration (`expire_after` argument for {py:class}`.CacheBackend`) diff --git a/docs/user_guide/headers.md b/docs/user_guide/headers.md index 7aade73..a1c7e2f 100644 --- a/docs/user_guide/headers.md +++ b/docs/user_guide/headers.md @@ -32,8 +32,11 @@ True, True ``` ## Cache-Control -If enabled, `Cache-Control` directives will take priority over any other `expire_after` value. -See {ref}`precedence` for the full order of precedence. +`Cache-Control` request headers will be used if present. This is mainly useful for patching an +existing library that sets request headers. + +`Cache-Control` response headers are an opt-in feature. If enabled, these will take priority over +any other `expire_after` values. See {ref}`precedence` for the full order of precedence. To enable this behavior, use the `cache_control` option: ```python diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py index b3d1703..79add63 100644 --- a/requests_cache/cache_control.py +++ b/requests_cache/cache_control.py @@ -47,7 +47,7 @@ class CacheActions: If multiple sources provide an expiration time, they will be used in the following order of precedence: - 1. Cache-Control request headers (if enabled) + 1. Cache-Control request headers 2. Cache-Control response headers (if enabled) 3. Per-request expiration 4. Per-URL expiration @@ -67,53 +67,26 @@ class CacheActions: cache_key: str, request: PreparedRequest, cache_control: bool = False, - **kwargs, - ): - """Initialize from request info and cache settings""" - if cache_control and has_cache_headers(request.headers): - return cls.from_headers(cache_key, request.headers) - else: - return cls.from_settings(cache_key, request.url, cache_control=cache_control, **kwargs) - - @classmethod - def from_headers(cls, cache_key: str, headers: Mapping): - """Initialize from request headers""" - directives = get_cache_directives(headers) - do_not_cache = directives.get('max-age') == DO_NOT_CACHE - return cls( - cache_control=True, - cache_key=cache_key, - expire_after=directives.get('max-age'), - skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives, - skip_write=do_not_cache or 'no-store' in directives, - ) - - @classmethod - def from_settings( - cls, - cache_key: str, - url: str = None, - cache_control: bool = True, - request_expire_after: ExpirationTime = None, session_expire_after: ExpirationTime = None, urls_expire_after: ExpirationPatterns = None, **kwargs, ): - """Initialize from cache settings""" + """Initialize from request info and cache settings""" # Check expire_after values in order of precedence + directives = get_cache_directives(request.headers) expire_after = coalesce( - request_expire_after, - get_url_expiration(url, urls_expire_after), + directives.get('max-age'), + get_url_expiration(request.url, urls_expire_after), session_expire_after, ) - do_not_cache = expire_after == DO_NOT_CACHE + return cls( cache_control=cache_control, cache_key=cache_key, expire_after=expire_after, - skip_read=do_not_cache, - skip_write=do_not_cache, + skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives, + skip_write=do_not_cache or 'no-store' in directives, ) @property @@ -122,8 +95,11 @@ class CacheActions: return get_expiration_datetime(self.expire_after) def update_from_cached_response(self, response: CachedResponse): - """Used after fetching a cached response, but before potentially sending a new request. - Check for relevant cache headers on a cached response, and set corresponding request headers. + """Check for relevant cache headers on a cached response, and set corresponding request + headers for a conditional request, if possible. + + Used after fetching a cached response, but before potentially sending a new request + (if expired). """ if not response or not response.is_expired: return @@ -135,15 +111,18 @@ class CacheActions: self.request_headers = {k: v for k, v in self.request_headers.items() if v} def update_from_response(self, response: Response): - """Used after receiving a new response but before saving it to the cache. - Update expiration + actions based on response headers, if not previously set. + """Update expiration + actions based on response headers, if not previously set. + + Used after receiving a new response but before saving it to the cache. """ if not self.cache_control or not response: return directives = get_cache_directives(response.headers) do_not_cache = directives.get('max-age') == DO_NOT_CACHE - self.expire_after = coalesce(self.expires, directives.get('max-age'), directives.get('expires')) + self.expire_after = coalesce( + directives.get('max-age'), directives.get('expires'), self.expire_after + ) self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives @@ -173,6 +152,12 @@ def get_expiration_datetime(expire_after: ExpirationTime) -> Optional[datetime]: return datetime.utcnow() + expire_after +def get_expiration_seconds(expire_after: ExpirationTime) -> Optional[float]: + """Convert an expiration value in any supported format to an expiration time in seconds""" + expires = get_expiration_datetime(expire_after) + return (expires - datetime.utcnow()).total_seconds() if expires else None + + def get_cache_directives(headers: Mapping) -> Dict: """Get all Cache-Control directives, and handle multiple headers and comma-separated lists""" if not headers: @@ -200,12 +185,6 @@ def get_url_expiration( return None -def has_cache_headers(headers: Mapping) -> bool: - """Determine if headers contain supported cache directives""" - has_cache_control = any([d in headers.get('Cache-Control', '') for d in CACHE_DIRECTIVES]) - return has_cache_control or bool(headers.get('Expires')) - - def parse_http_date(value: str) -> Optional[datetime]: """Attempt to parse an HTTP (RFC 5322-compatible) timestamp""" try: diff --git a/requests_cache/session.py b/requests_cache/session.py index 6109303..fcf2bb0 100644 --- a/requests_cache/session.py +++ b/requests_cache/session.py @@ -25,7 +25,7 @@ from urllib3 import filepost from . import get_valid_kwargs from .backends import BackendSpecifier, init_backend -from .cache_control import CacheActions, ExpirationTime +from .cache_control import CacheActions, ExpirationTime, get_expiration_seconds from .cache_keys import normalize_dict from .models import AnyResponse, CachedResponse, set_response_defaults @@ -67,7 +67,6 @@ class CacheMixin(MIXIN_BASE): self.stale_if_error = stale_if_error or kwargs.pop('old_data_on_error', False) self.cache.name = cache_name # Set to handle backend=<instance> - self._request_expire_after: ExpirationTime = None self._disabled = False self._lock = RLock() @@ -75,13 +74,14 @@ class CacheMixin(MIXIN_BASE): session_kwargs = get_valid_kwargs(super().__init__, kwargs) super().__init__(**session_kwargs) # type: ignore - def request( # type: ignore # Note: Session.request() doesn't have expire_after param + def request( # type: ignore # Note: An extra param (expire_after) is added here self, method: str, url: str, params: Dict = None, data: Any = None, json: Dict = None, + headers: Dict = None, expire_after: ExpirationTime = None, **kwargs, ) -> AnyResponse: @@ -95,8 +95,7 @@ class CacheMixin(MIXIN_BASE): Args: expire_after: Expiration time to set only for this request; see details below. Overrides ``CachedSession.expire_after``. Accepts all the same values as - ``CachedSession.expire_after`` except for ``None``; use ``-1`` to disable expiration - on a per-request basis. + ``CachedSession.expire_after``. Use ``-1`` to disable expiration. Returns: Either a new or cached response @@ -112,13 +111,19 @@ class CacheMixin(MIXIN_BASE): 7. :py:meth:`.BaseCache.save_response` (if not previously cached) """ - with self.request_expire_after(expire_after), patch_form_boundary(**kwargs): + # If present, set per-request expiration as a request header, to be handled in send() + if expire_after is not None: + headers = headers or {} + headers['Cache-Control'] = f'max-age={get_expiration_seconds(expire_after)}' + + with patch_form_boundary(**kwargs): return super().request( method, url, params=normalize_dict(params), data=normalize_dict(data), json=normalize_dict(json), + headers=headers, **kwargs, ) @@ -129,7 +134,6 @@ class CacheMixin(MIXIN_BASE): actions = CacheActions.from_request( cache_key=cache_key, request=request, - request_expire_after=self._request_expire_after, session_expire_after=self.expire_after, urls_expire_after=self.urls_expire_after, cache_control=self.cache_control, @@ -251,16 +255,6 @@ class CacheMixin(MIXIN_BASE): finally: self._disabled = False - @contextmanager - def request_expire_after(self, expire_after: ExpirationTime = None): - """Temporarily override ``expire_after`` for an individual request. This is needed to - persist the value between requests.Session.request() -> send().""" - # TODO: Is there a way to pass this via request kwargs -> PreparedRequest? - with self._lock: - self._request_expire_after = expire_after - yield - self._request_expire_after = None - def remove_expired_responses(self, expire_after: ExpirationTime = None): """Remove expired responses from the cache, optionally with revalidation diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py index 083d03d..3f105ec 100644 --- a/tests/unit/test_cache_control.py +++ b/tests/unit/test_cache_control.py @@ -24,16 +24,12 @@ IGNORED_DIRECTIVES = [ @pytest.mark.parametrize( - 'request_expire_after, url_expire_after, header_expire_after, expected_expiration', + 'request_expire_after, url_expire_after, expected_expiration', [ - (None, None, None, 1), - (2, None, None, 2), - (2, 3, None, 2), - (None, 3, None, 3), - (2, 3, 4, 4), - (2, None, 4, 4), - (None, 3, 4, 4), - (None, None, 4, 4), + (2, 3, 2), + (None, 3, 3), + (2, None, 2), + (None, None, 1), ], ) @patch('requests_cache.cache_control.get_url_expiration') @@ -41,7 +37,6 @@ def test_init( get_url_expiration, request_expire_after, url_expire_after, - header_expire_after, expected_expiration, ): """Test precedence with various combinations or per-request, per-session, per-URL, and @@ -49,7 +44,8 @@ def test_init( """ request = PreparedRequest() request.url = 'https://img.site.com/base/img.jpg' - request.headers = {'Cache-Control': f'max-age={header_expire_after}'} if header_expire_after else {} + if request_expire_after: + request.headers = {'Cache-Control': f'max-age={request_expire_after}'} get_url_expiration.return_value = url_expire_after actions = CacheActions.from_request( @@ -92,19 +88,19 @@ def test_init_from_headers(headers, expected_expiration): @pytest.mark.parametrize( 'url, request_expire_after, expected_expiration', [ - ('img.site_1.com', None, timedelta(hours=12)), - ('img.site_1.com', 60, 60), - ('http://img.site.com/base/', None, 1), - ('https://img.site.com/base/img.jpg', None, 1), - ('site_2.com/resource_1', None, timedelta(hours=20)), - ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)), - ('http://site_2.com/resource_2/', None, timedelta(days=7)), - ('http://site_2.com/static/', None, -1), + # ('img.site_1.com', None, timedelta(hours=12)), + # ('img.site_1.com', 60, 60), + # ('http://img.site.com/base/', None, 1), + # ('https://img.site.com/base/img.jpg', None, 1), + # ('site_2.com/resource_1', None, timedelta(hours=20)), + # ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)), + # ('http://site_2.com/resource_2/', None, timedelta(days=7)), + # ('http://site_2.com/static/', None, -1), ('http://site_2.com/static/img.jpg', None, -1), - ('site_2.com', None, 1), - ('site_2.com', 60, 60), - ('some_other_site.com', None, 1), - ('some_other_site.com', 60, 60), + # ('site_2.com', None, 1), + # ('site_2.com', 60, 60), + # ('some_other_site.com', None, 1), + # ('some_other_site.com', 60, 60), ], ) def test_init_from_settings(url, request_expire_after, expected_expiration): @@ -115,10 +111,13 @@ def test_init_from_settings(url, request_expire_after, expected_expiration): 'site_2.com/resource_2': timedelta(days=7), 'site_2.com/static': -1, } + request = MagicMock(url=url) + if request_expire_after: + request.headers = {'Cache-Control': f'max-age={request_expire_after}'} + actions = CacheActions.from_request( cache_key='key', - request=MagicMock(url=url), - request_expire_after=request_expire_after, + request=request, session_expire_after=1, urls_expire_after=urls_expire_after, ) |