summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <JWCook@users.noreply.github.com>2021-10-10 14:05:53 -0500
committerGitHub <noreply@github.com>2021-10-10 14:05:53 -0500
commit73cc36133b7eb3d4f714b2199042d7b6fc6bd902 (patch)
tree185169048d1b0b584fdef39c9505f8ea6255b7c3
parente42f6753d017b8d9f9a176c7bc957b13e17f5df5 (diff)
parentd9ea0f1391ce3fdcf9e38bda1979ed7877e5b44a (diff)
downloadrequests-cache-73cc36133b7eb3d4f714b2199042d7b6fc6bd902.tar.gz
Merge pull request #430 from JWCook/cache-control
Fix behavior for cache_control=True with expire_after, and support revalidation with max-age=0
-rw-r--r--HISTORY.md2
-rw-r--r--requests_cache/cache_control.py56
-rw-r--r--requests_cache/session.py29
-rw-r--r--tests/integration/base_cache_test.py28
-rw-r--r--tests/unit/test_cache_control.py82
5 files changed, 144 insertions, 53 deletions
diff --git a/HISTORY.md b/HISTORY.md
index f86478f..ccbfe96 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -7,6 +7,8 @@
* Make per-request expiration thread-safe for both `CachedSession.request()` and `CachedSession.send()`
* Handle some additional corner cases when normalizing request data
* Some micro-optimizations for request matching
+* Fix issue with cache headers not being used correctly if `cache_control=True` is used with an `expire_after` value
+* Support immediate expiration + revalidation for `Cache-Control: max-age=0` and `Expires: 0`
* Fix license metadata as shown on PyPI
## 0.8.1 (2021-09-15)
diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py
index 10a6e4e..2cad7b4 100644
--- a/requests_cache/cache_control.py
+++ b/requests_cache/cache_control.py
@@ -24,6 +24,7 @@ if TYPE_CHECKING:
from .models import CachedResponse
__all__ = ['DO_NOT_CACHE', 'CacheActions']
+
# May be set by either headers or expire_after param to disable caching
DO_NOT_CACHE = 0
# Supported Cache-Control directives
@@ -41,10 +42,10 @@ class CacheActions:
"""A class that translates cache settings and headers into specific actions to take for a
given cache item. Actions include:
- * Reading from the cache
- * Writing to the cache
- * Setting cache expiration
- * Adding request headers
+ * Read from the cache
+ * Write to the cache
+ * Set cache expiration
+ * Add headers for conditional requests
If multiple sources provide an expiration time, they will be used in the following order of
precedence:
@@ -54,14 +55,17 @@ class CacheActions:
3. Per-request expiration
4. Per-URL expiration
5. Per-session expiration
+
+ See :ref:`headers` for more details about behavior.
"""
cache_control: bool = field(default=False)
cache_key: str = field(default=None)
expire_after: ExpirationTime = field(default=None)
- request_headers: Dict[str, str] = field(factory=dict)
+ request_directives: Dict[str, str] = field(factory=dict)
skip_read: bool = field(default=False)
skip_write: bool = field(default=False)
+ validation_headers: Dict[str, str] = field(factory=dict)
@classmethod
def from_request(
@@ -75,22 +79,30 @@ class CacheActions:
**kwargs,
):
"""Initialize from request info and cache settings"""
- # Check expire_after values in order of precedence
directives = get_cache_directives(request.headers)
+ logger.debug(f'Cache directives from request headers: {directives}')
+
+ # Check expiration values in order of precedence
expire_after = coalesce(
directives.get('max-age'),
request_expire_after,
get_url_expiration(request.url, urls_expire_after),
session_expire_after,
)
- do_not_cache = expire_after == DO_NOT_CACHE
+
+ # Check conditions for reading from the cache, based on request headers and possibly
+ # expire_after options. With cache_control=True, don't consider expire_after options, since
+ # these can be overridden by response headers.
+ check_expiration = directives.get('max-age') if cache_control else expire_after
+ do_not_cache = check_expiration == DO_NOT_CACHE or 'no-store' in directives
return cls(
cache_control=cache_control,
cache_key=cache_key,
expire_after=expire_after,
- skip_read=do_not_cache or 'no-store' in directives or 'no-cache' in directives,
- skip_write=do_not_cache or 'no-store' in directives,
+ request_directives=directives,
+ skip_read=do_not_cache or 'no-cache' in directives,
+ skip_write=do_not_cache,
)
@property
@@ -109,25 +121,39 @@ class CacheActions:
return
if response.headers.get('ETag'):
- self.request_headers['If-None-Match'] = response.headers['ETag']
+ self.validation_headers['If-None-Match'] = response.headers['ETag']
if response.headers.get('Last-Modified'):
- self.request_headers['If-Modified-Since'] = response.headers['Last-Modified']
- self.request_headers = {k: v for k, v in self.request_headers.items() if v}
+ self.validation_headers['If-Modified-Since'] = response.headers['Last-Modified']
def update_from_response(self, response: Response):
"""Update expiration + actions based on response headers, if not previously set.
Used after receiving a new response but before saving it to the cache.
"""
- if not self.cache_control or not response:
+ if not response or not self.cache_control:
return
directives = get_cache_directives(response.headers)
- do_not_cache = directives.get('max-age') == DO_NOT_CACHE
+ logger.debug(f'Cache directives from response headers: {directives}')
+
+ # Check expiration headers
self.expire_after = coalesce(
directives.get('max-age'), directives.get('expires'), self.expire_after
)
- self.skip_write = self.skip_write or do_not_cache or 'no-store' in directives
+
+ # If expiration is 0 and there's a validator, save it to the cache and revalidate on use
+ has_validator = response.headers.get('ETag') or response.headers.get('Last-Modified')
+ if self.expire_after == DO_NOT_CACHE and has_validator:
+ self.expire_after = datetime.utcnow()
+
+ # Check conditions for writing to the cache
+ self.skip_write = any(
+ [
+ self.expire_after == DO_NOT_CACHE,
+ 'no-store' in directives,
+ self.skip_write,
+ ]
+ )
def coalesce(*values: Any, default=None) -> Any:
diff --git a/requests_cache/session.py b/requests_cache/session.py
index a2234be..22bfd50 100644
--- a/requests_cache/session.py
+++ b/requests_cache/session.py
@@ -16,7 +16,7 @@
from contextlib import contextmanager
from logging import getLogger
from threading import RLock
-from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional
+from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional
from requests import PreparedRequest, Response
from requests import Session as OriginalSession
@@ -78,9 +78,6 @@ class CacheMixin(MIXIN_BASE):
self,
method: str,
url: str,
- params: Dict = None,
- data: Any = None,
- json: Dict = None,
headers: Dict = None,
expire_after: ExpirationTime = None,
**kwargs,
@@ -116,15 +113,7 @@ class CacheMixin(MIXIN_BASE):
headers['Cache-Control'] = f'max-age={get_expiration_seconds(expire_after)}'
with patch_form_boundary(**kwargs):
- return super().request(
- method,
- url,
- params=params,
- data=data,
- json=json,
- headers=headers,
- **kwargs,
- )
+ return super().request(method, url, headers=headers, **kwargs)
def send(
self, request: PreparedRequest, expire_after: ExpirationTime = None, **kwargs
@@ -182,10 +171,10 @@ class CacheMixin(MIXIN_BASE):
) -> AnyResponse:
"""Send the request and cache the response, unless disabled by settings or headers.
- If applicable, also add request headers to check if the remote resource has been modified.
- If we get a 304 Not Modified response, return the expired cache item.
+ If applicable, also add headers to make a conditional request. If we get a 304 Not Modified
+ response, return the stale cache item.
"""
- request.headers.update(actions.request_headers)
+ request.headers.update(actions.validation_headers)
response = super().send(request, **kwargs)
actions.update_from_response(response)
@@ -202,9 +191,9 @@ class CacheMixin(MIXIN_BASE):
self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs
) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If the request fails, delete
- the expired cache item.
+ the stale cache item.
"""
- logger.debug('Expired response; attempting to re-send request')
+ logger.debug('Stale response; attempting to re-send request')
try:
return self._send_and_cache(request, actions, cached_response, **kwargs)
except Exception:
@@ -215,10 +204,10 @@ class CacheMixin(MIXIN_BASE):
self, request: PreparedRequest, actions: CacheActions, cached_response: CachedResponse, **kwargs
) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If there are any errors, ignore
- them and and return the expired cache item.
+ them and and return the stale cache item.
"""
# Attempt to send the request and cache the new response
- logger.debug('Expired response; attempting to re-send request')
+ logger.debug('Stale response; attempting to re-send request')
try:
response = self._send_and_cache(request, actions, cached_response, **kwargs)
response.raise_for_status()
diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py
index b435b4f..8f1c5f2 100644
--- a/tests/integration/base_cache_test.py
+++ b/tests/integration/base_cache_test.py
@@ -5,11 +5,12 @@ from io import BytesIO
from threading import Thread
from time import sleep, time
from typing import Dict, Type
+from unittest.mock import MagicMock, patch
from urllib.parse import parse_qs, urlparse
import pytest
import requests
-from requests.models import PreparedRequest
+from requests import PreparedRequest, Session
from requests_cache import ALL_METHODS, CachedResponse, CachedSession
from requests_cache.backends.base import BaseCache
@@ -168,7 +169,7 @@ class BaseCacheTest:
({'ETag': ETAG, 'Last-Modified': LAST_MODIFIED}, True),
],
)
- def test_304_not_modified(self, cached_response_headers, expected_from_cache):
+ def test_conditional_request(self, cached_response_headers, expected_from_cache):
"""Test behavior of ETag and Last-Modified headers and 304 responses.
When a cached response contains one of these headers, corresponding request headers should
@@ -184,6 +185,29 @@ class BaseCacheTest:
response = session.get(httpbin('cache'))
assert response.from_cache == expected_from_cache
+ def test_conditional_request__max_age_0(self):
+ """With both max-age=0 and a validator, the response should be saved and revalidated on next
+ request
+ """
+ url = httpbin('response-headers')
+ response_headers = {'Cache-Control': 'max-age=0', 'ETag': ETAG}
+ session = self.init_session(cache_control=True)
+
+ # This endpoint returns request params as response headers
+ session.get(url, params=response_headers)
+
+ # It doesn't respond to conditional requests, but let's just pretend it does
+ with patch.object(Session, 'send', return_value=MagicMock(status_code=304)):
+ response = session.get(url, params=response_headers)
+
+ assert response.from_cache is True
+ assert response.is_expired is True
+
+ # cache_key = list(session.cache.responses.keys())[0]
+ # response = session.cache.responses[cache_key]
+ # response.request.url = httpbin(f'etag/{ETAG}')
+ # response = session.get(httpbin('response-headers'), params=response_headers)
+
@pytest.mark.parametrize('stream', [True, False])
def test_response_decode(self, stream):
"""Test that gzip-compressed raw responses (including streamed responses) can be manually
diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py
index 3f105ec..2388f3d 100644
--- a/tests/unit/test_cache_control.py
+++ b/tests/unit/test_cache_control.py
@@ -88,19 +88,19 @@ def test_init_from_headers(headers, expected_expiration):
@pytest.mark.parametrize(
'url, request_expire_after, expected_expiration',
[
- # ('img.site_1.com', None, timedelta(hours=12)),
- # ('img.site_1.com', 60, 60),
- # ('http://img.site.com/base/', None, 1),
- # ('https://img.site.com/base/img.jpg', None, 1),
- # ('site_2.com/resource_1', None, timedelta(hours=20)),
- # ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)),
- # ('http://site_2.com/resource_2/', None, timedelta(days=7)),
- # ('http://site_2.com/static/', None, -1),
+ ('img.site_1.com', None, timedelta(hours=12)),
+ ('img.site_1.com', 60, 60),
+ ('http://img.site.com/base/', None, 1),
+ ('https://img.site.com/base/img.jpg', None, 1),
+ ('site_2.com/resource_1', None, timedelta(hours=20)),
+ ('http://site_2.com/resource_1/index.html', None, timedelta(hours=20)),
+ ('http://site_2.com/resource_2/', None, timedelta(days=7)),
+ ('http://site_2.com/static/', None, -1),
('http://site_2.com/static/img.jpg', None, -1),
- # ('site_2.com', None, 1),
- # ('site_2.com', 60, 60),
- # ('some_other_site.com', None, 1),
- # ('some_other_site.com', 60, 60),
+ ('site_2.com', None, 1),
+ ('site_2.com', 60, 60),
+ ('some_other_site.com', None, 1),
+ ('some_other_site.com', 60, 60),
],
)
def test_init_from_settings(url, request_expire_after, expected_expiration):
@@ -125,7 +125,42 @@ def test_init_from_settings(url, request_expire_after, expected_expiration):
@pytest.mark.parametrize(
- 'response_headers, expected_request_headers',
+ 'cache_control, headers, expire_after, expected_expiration, expected_skip_read',
+ [
+ (False, {'Cache-Control': 'max-age=60'}, 1, 60, False),
+ (False, {}, 1, 1, False),
+ (False, {}, 0, 0, True),
+ (True, {'Cache-Control': 'max-age=60'}, 1, 60, False),
+ (True, {'Cache-Control': 'max-age=0'}, 1, 0, True),
+ (True, {'Cache-Control': 'no-store'}, 1, 1, True),
+ (True, {'Cache-Control': 'no-cache'}, 1, 1, True),
+ (True, {}, 1, 1, False),
+ (True, {}, 0, 0, False),
+ ],
+)
+def test_init_from_settings_and_headers(
+ cache_control, headers, expire_after, expected_expiration, expected_skip_read
+):
+ """Test behavior with both cache settings and request headers. The only variation in behavior
+ with cache_control=True is that expire_after=0 should *not* cause the cache read to be skipped.
+ """
+ request = MagicMock(
+ url='https://img.site.com/base/img.jpg',
+ headers=headers,
+ )
+
+ actions = CacheActions.from_request(
+ cache_key='key',
+ cache_control=cache_control,
+ request=request,
+ session_expire_after=expire_after,
+ )
+ assert actions.expire_after == expected_expiration
+ assert actions.skip_read == expected_skip_read
+
+
+@pytest.mark.parametrize(
+ 'response_headers, expected_validation_headers',
[
({}, {}),
({'ETag': ETAG}, {'If-None-Match': ETAG}),
@@ -136,7 +171,7 @@ def test_init_from_settings(url, request_expire_after, expected_expiration):
),
],
)
-def test_update_from_cached_response(response_headers, expected_request_headers):
+def test_update_from_cached_response(response_headers, expected_validation_headers):
"""Test that conditional request headers are added if the cached response is expired"""
actions = CacheActions.from_request(
cache_key='key',
@@ -145,7 +180,7 @@ def test_update_from_cached_response(response_headers, expected_request_headers)
cached_response = CachedResponse(headers=response_headers, expires=datetime.now() - timedelta(1))
actions.update_from_cached_response(cached_response)
- assert actions.request_headers == expected_request_headers
+ assert actions.validation_headers == expected_validation_headers
def test_update_from_cached_response__ignored():
@@ -159,7 +194,7 @@ def test_update_from_cached_response__ignored():
)
actions.update_from_cached_response(cached_response)
- assert actions.request_headers == {}
+ assert actions.validation_headers == {}
@pytest.mark.parametrize(
@@ -203,6 +238,21 @@ def test_update_from_response__ignored():
assert actions.expire_after is None
+@patch('requests_cache.cache_control.datetime')
+def test_update_from_response__revalidate(mock_datetime):
+ """If expiration is 0 and there's a validator the response should be cached and revalidated"""
+ url = 'https://img.site.com/base/img.jpg'
+ actions = CacheActions.from_request(
+ cache_key='key',
+ request=MagicMock(url=url),
+ cache_control=True,
+ )
+ actions.update_from_response(
+ MagicMock(url=url, headers={'Cache-Control': 'max-age=0', 'ETag': ETAG})
+ )
+ assert actions.expire_after == mock_datetime.utcnow()
+
+
@pytest.mark.parametrize('directive', IGNORED_DIRECTIVES)
def test_ignored_headers(directive):
"""Ensure that currently unimplemented Cache-Control headers do not affect behavior"""