From 94331ef108ad160faddb48cfc6a259c8a2497c99 Mon Sep 17 00:00:00 2001 From: Simon Leiner Date: Tue, 14 Mar 2023 18:23:40 +0100 Subject: Allow regexes for URL expiration patterns This allows for more fine-grained control over URL patterns than globbing in the rare cases where that is needed. --- requests_cache/policy/__init__.py | 8 ++++++-- requests_cache/policy/expiration.py | 19 ++++++++++++++----- requests_cache/policy/settings.py | 4 ++-- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'requests_cache') diff --git a/requests_cache/policy/__init__.py b/requests_cache/policy/__init__.py index dbd5bab..48b384a 100644 --- a/requests_cache/policy/__init__.py +++ b/requests_cache/policy/__init__.py @@ -4,12 +4,16 @@ additional settings and features specific to requests-cache. # flake8: noqa: E402,F401 # isort: skip_file from datetime import datetime, timedelta -from typing import Callable, Dict, Union, MutableMapping +from typing import Callable, Dict, Pattern as RegexPattern, Union, MutableMapping from requests import Response ExpirationTime = Union[None, int, float, str, datetime, timedelta] -ExpirationPatterns = Dict[str, ExpirationTime] +ExpirationPattern = Union[ # Either a glob expression as str or a compiled regex pattern + str, + RegexPattern, +] +ExpirationPatterns = Dict[ExpirationPattern, ExpirationTime] FilterCallback = Callable[[Response], bool] KeyCallback = Callable[..., str] HeaderDict = MutableMapping[str, str] diff --git a/requests_cache/policy/expiration.py b/requests_cache/policy/expiration.py index 1041e36..1350951 100644 --- a/requests_cache/policy/expiration.py +++ b/requests_cache/policy/expiration.py @@ -5,9 +5,10 @@ from fnmatch import fnmatch from logging import getLogger from math import ceil from typing import Optional +from typing import Pattern as RegexPattern from .._utils import try_int -from . import ExpirationPatterns, ExpirationTime +from . import ExpirationPattern, ExpirationPatterns, ExpirationTime # Special expiration values that may be set by either headers or keyword args DO_NOT_CACHE = 0x0D0E0200020704 # Per RFC 4824 @@ -89,7 +90,7 @@ def _to_utc(dt: datetime): return dt -def _url_match(url: str, pattern: str) -> bool: +def _url_match(url: str, pattern: ExpirationPattern) -> bool: """Determine if a URL matches a pattern Args: @@ -103,7 +104,15 @@ def _url_match(url: str, pattern: str) -> bool: True >>> url_match('https://httpbin.org/stream/2', 'httpbin.org/*/1') False + >>> url_match('https://httpbin.org/stream/2', re.compile('httpbin.org/*/\\d+')) + True + >>> url_match('https://httpbin.org/stream/x', re.compile('httpbin.org/*/\\d+')) + False """ - url = url.split('://')[-1] - pattern = pattern.split('://')[-1].rstrip('*') + '**' - return fnmatch(url, pattern) + if isinstance(pattern, RegexPattern): + match = pattern.search(url) + return match is not None + else: + url = url.split('://')[-1] + pattern = pattern.split('://')[-1].rstrip('*') + '**' + return fnmatch(url, pattern) diff --git a/requests_cache/policy/settings.py b/requests_cache/policy/settings.py index 8e8e26b..7c4dce8 100644 --- a/requests_cache/policy/settings.py +++ b/requests_cache/policy/settings.py @@ -4,7 +4,7 @@ from attr import define, field from .._utils import get_valid_kwargs from ..models import RichMixin -from . import ExpirationTime, FilterCallback, KeyCallback +from . import ExpirationPattern, ExpirationTime, FilterCallback, KeyCallback ALL_METHODS = ('GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE') DEFAULT_CACHE_NAME = 'http_cache' @@ -36,7 +36,7 @@ class CacheSettings(RichMixin): only_if_cached: bool = field(default=False) stale_if_error: Union[bool, ExpirationTime] = field(default=False) stale_while_revalidate: Union[bool, ExpirationTime] = field(default=False) - urls_expire_after: Dict[str, ExpirationTime] = field(factory=dict) + urls_expire_after: Dict[ExpirationPattern, ExpirationTime] = field(factory=dict) @classmethod def from_kwargs(cls, **kwargs): -- cgit v1.2.1