summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Cook <jordan.cook@pioneer.com>2021-10-13 15:22:30 -0500
committerJordan Cook <jordan.cook@pioneer.com>2021-10-13 16:32:51 -0500
commit7ed709ae1b737b1d28c2bf56ce8cc1eb0d3c7b7f (patch)
treec477dc04055ffddff0df9e83fdc8835842d86ec9
parent73cc36133b7eb3d4f714b2199042d7b6fc6bd902 (diff)
downloadrequests-cache-7ed709ae1b737b1d28c2bf56ce8cc1eb0d3c7b7f.tar.gz
Fix immediate expiration with Expires: 0 and reorganize cache header logic a bit
-rw-r--r--requests_cache/cache_control.py54
-rw-r--r--tests/integration/base_cache_test.py11
-rw-r--r--tests/unit/test_cache_control.py29
3 files changed, 41 insertions, 53 deletions
diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py
index 2cad7b4..604820f 100644
--- a/requests_cache/cache_control.py
+++ b/requests_cache/cache_control.py
@@ -90,19 +90,18 @@ class CacheActions:
session_expire_after,
)
- # Check conditions for reading from the cache, based on request headers and possibly
- # expire_after options. With cache_control=True, don't consider expire_after options, since
- # these can be overridden by response headers.
+ # Check conditions for caching based on request headers. Also check expire_after options
+ # unless cache_control=True, in which case these may be overridden by response headers.
check_expiration = directives.get('max-age') if cache_control else expire_after
- do_not_cache = check_expiration == DO_NOT_CACHE or 'no-store' in directives
+ skip_write = check_expiration == DO_NOT_CACHE or 'no-store' in directives
return cls(
cache_control=cache_control,
cache_key=cache_key,
expire_after=expire_after,
request_directives=directives,
- skip_read=do_not_cache or 'no-cache' in directives,
- skip_write=do_not_cache,
+ skip_read=skip_write or 'no-cache' in directives,
+ skip_write=skip_write,
)
@property
@@ -111,8 +110,8 @@ class CacheActions:
return get_expiration_datetime(self.expire_after)
def update_from_cached_response(self, response: CachedResponse):
- """Check for relevant cache headers on a cached response, and set corresponding request
- headers for a conditional request, if possible.
+ """Check for relevant cache headers from a cached response, and set headers for a
+ conditional request, if possible.
Used after fetching a cached response, but before potentially sending a new request
(if expired).
@@ -126,7 +125,7 @@ class CacheActions:
self.validation_headers['If-Modified-Since'] = response.headers['Last-Modified']
def update_from_response(self, response: Response):
- """Update expiration + actions based on response headers, if not previously set.
+ """Update expiration + actions based on headers from a new response.
Used after receiving a new response but before saving it to the cache.
"""
@@ -136,24 +135,17 @@ class CacheActions:
directives = get_cache_directives(response.headers)
logger.debug(f'Cache directives from response headers: {directives}')
- # Check expiration headers
+ # Check headers for expiration, validators, and other cache directives
self.expire_after = coalesce(
directives.get('max-age'), directives.get('expires'), self.expire_after
)
+ has_validator = response.headers.get('ETag') or response.headers.get('Last-Modified')
+ no_store = 'no-store' in directives or 'no-store' in self.request_directives
# If expiration is 0 and there's a validator, save it to the cache and revalidate on use
- has_validator = response.headers.get('ETag') or response.headers.get('Last-Modified')
- if self.expire_after == DO_NOT_CACHE and has_validator:
- self.expire_after = datetime.utcnow()
-
- # Check conditions for writing to the cache
- self.skip_write = any(
- [
- self.expire_after == DO_NOT_CACHE,
- 'no-store' in directives,
- self.skip_write,
- ]
- )
+ # Otherwise, skip writing to the cache if specified by expiration or other headers
+ expire_immediately = try_int(self.expire_after) == DO_NOT_CACHE
+ self.skip_write = (expire_immediately or no_store) and not has_validator
def coalesce(*values: Any, default=None) -> Any:
@@ -167,12 +159,11 @@ def get_expiration_datetime(expire_after: ExpirationTime) -> Optional[datetime]:
if expire_after is None or expire_after == -1:
return None
# Expire immediately
- elif expire_after == DO_NOT_CACHE:
+ elif try_int(expire_after) == DO_NOT_CACHE:
return datetime.utcnow()
# Already a datetime or datetime str
if isinstance(expire_after, str):
- expire_after = parse_http_date(expire_after)
- return to_utc(expire_after) if expire_after else None
+ return parse_http_date(expire_after)
elif isinstance(expire_after, datetime):
return to_utc(expire_after)
@@ -193,8 +184,10 @@ def get_cache_directives(headers: Mapping) -> Dict:
if not headers:
return {}
- cache_directives = headers.get('Cache-Control', '').split(',')
- kv_directives = dict([split_kv_directive(value) for value in cache_directives])
+ kv_directives = {}
+ if headers.get('Cache-Control'):
+ cache_directives = headers['Cache-Control'].split(',')
+ kv_directives = dict([split_kv_directive(value) for value in cache_directives])
if 'Expires' in headers:
kv_directives['expires'] = headers['Expires']
@@ -218,7 +211,8 @@ def get_url_expiration(
def parse_http_date(value: str) -> Optional[datetime]:
"""Attempt to parse an HTTP (RFC 5322-compatible) timestamp"""
try:
- return parsedate_to_datetime(value)
+ expire_after = parsedate_to_datetime(value)
+ return to_utc(expire_after)
except (TypeError, ValueError):
logger.debug(f'Failed to parse timestamp: {value}')
return None
@@ -246,8 +240,8 @@ def to_utc(dt: datetime):
return dt
-def try_int(value: Optional[str]) -> Optional[int]:
- """Convert a string value to an int, if possible, otherwise ``None``"""
+def try_int(value: Any) -> Optional[int]:
+ """Convert a value to an int, if possible, otherwise ``None``"""
return int(str(value)) if str(value).isnumeric() else None
diff --git a/tests/integration/base_cache_test.py b/tests/integration/base_cache_test.py
index 8f1c5f2..9f3e1b0 100644
--- a/tests/integration/base_cache_test.py
+++ b/tests/integration/base_cache_test.py
@@ -185,12 +185,14 @@ class BaseCacheTest:
response = session.get(httpbin('cache'))
assert response.from_cache == expected_from_cache
- def test_conditional_request__max_age_0(self):
+ @pytest.mark.parametrize('validator_headers', [{'ETag': ETAG}, {'Last-Modified': LAST_MODIFIED}])
+ @pytest.mark.parametrize('cache_headers', [{'Cache-Control': 'max-age=0'}, {'Expires': '0'}])
+ def test_conditional_request__max_age_0(self, cache_headers, validator_headers):
"""With both max-age=0 and a validator, the response should be saved and revalidated on next
request
"""
url = httpbin('response-headers')
- response_headers = {'Cache-Control': 'max-age=0', 'ETag': ETAG}
+ response_headers = {**cache_headers, **validator_headers}
session = self.init_session(cache_control=True)
# This endpoint returns request params as response headers
@@ -203,11 +205,6 @@ class BaseCacheTest:
assert response.from_cache is True
assert response.is_expired is True
- # cache_key = list(session.cache.responses.keys())[0]
- # response = session.cache.responses[cache_key]
- # response.request.url = httpbin(f'etag/{ETAG}')
- # response = session.get(httpbin('response-headers'), params=response_headers)
-
@pytest.mark.parametrize('stream', [True, False])
def test_response_decode(self, stream):
"""Test that gzip-compressed raw responses (including streamed responses) can be manually
diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py
index 2388f3d..cb8ea96 100644
--- a/tests/unit/test_cache_control.py
+++ b/tests/unit/test_cache_control.py
@@ -221,6 +221,7 @@ def test_update_from_response(headers, expected_expiration):
actions.update_from_response(MagicMock(url=url, headers=headers))
if expected_expiration == DO_NOT_CACHE:
+ assert not actions.expire_after # May be either 0 or None
assert actions.skip_write is True
else:
assert actions.expire_after == expected_expiration
@@ -229,28 +230,24 @@ def test_update_from_response(headers, expected_expiration):
def test_update_from_response__ignored():
url = 'https://img.site.com/base/img.jpg'
- actions = CacheActions.from_request(
- cache_key='key',
- request=MagicMock(url=url),
- cache_control=False,
- )
+ actions = CacheActions.from_request(cache_key='key', request=MagicMock(url=url), cache_control=False)
actions.update_from_response(MagicMock(url=url, headers={'Cache-Control': 'max-age=5'}))
assert actions.expire_after is None
+@pytest.mark.parametrize('validator_headers', [{'ETag': ETAG}, {'Last-Modified': LAST_MODIFIED}])
+@pytest.mark.parametrize('cache_headers', [{'Cache-Control': 'max-age=0'}, {'Expires': '0'}])
@patch('requests_cache.cache_control.datetime')
-def test_update_from_response__revalidate(mock_datetime):
- """If expiration is 0 and there's a validator the response should be cached and revalidated"""
+def test_update_from_response__revalidate(mock_datetime, cache_headers, validator_headers):
+ """If expiration is 0 and there's a validator, the response should be cached, but with immediate
+ expiration
+ """
url = 'https://img.site.com/base/img.jpg'
- actions = CacheActions.from_request(
- cache_key='key',
- request=MagicMock(url=url),
- cache_control=True,
- )
- actions.update_from_response(
- MagicMock(url=url, headers={'Cache-Control': 'max-age=0', 'ETag': ETAG})
- )
- assert actions.expire_after == mock_datetime.utcnow()
+ headers = {**cache_headers, **validator_headers}
+ actions = CacheActions.from_request(cache_key='key', request=MagicMock(url=url), cache_control=True)
+ actions.update_from_response(MagicMock(url=url, headers=headers))
+ assert actions.expires == mock_datetime.utcnow()
+ assert actions.skip_write is False
@pytest.mark.parametrize('directive', IGNORED_DIRECTIVES)