diff options
author | Jordan Cook <jordan.cook@pioneer.com> | 2022-04-11 21:03:50 -0500 |
---|---|---|
committer | Jordan Cook <jordan.cook@pioneer.com> | 2022-04-16 21:08:42 -0500 |
commit | d6ee9143965d53dae44ca3a98802b2cc7ad6eeb7 (patch) | |
tree | 96e38c3f7289a0ff3a46c23df3f1e5b3b7c1a940 /docs | |
parent | 166f5690fb8d5b067f839fa8ffb9421cf1b8a7e7 (diff) | |
download | requests-cache-d6ee9143965d53dae44ca3a98802b2cc7ad6eeb7.tar.gz |
Move detailed backend docs from rst docstings to md files
Diffstat (limited to 'docs')
-rw-r--r-- | docs/api/requests_cache.backends.base.md | 8 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.dynamodb.md | 49 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.filesystem.md | 56 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.gridfs.md | 23 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.mongodb.md | 105 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.redis.md | 66 | ||||
-rw-r--r-- | docs/api/requests_cache.backends.sqlite.md | 90 | ||||
-rw-r--r-- | docs/api/requests_cache.session.md | 17 | ||||
-rw-r--r-- | docs/conf.py | 46 | ||||
-rw-r--r-- | docs/reference.md | 7 | ||||
-rw-r--r-- | docs/session.rst | 3 |
11 files changed, 456 insertions, 14 deletions
diff --git a/docs/api/requests_cache.backends.base.md b/docs/api/requests_cache.backends.base.md new file mode 100644 index 0000000..f4601a8 --- /dev/null +++ b/docs/api/requests_cache.backends.base.md @@ -0,0 +1,8 @@ +# Base +```{eval-rst} +.. automodule:: requests_cache.backends.base + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +```
\ No newline at end of file diff --git a/docs/api/requests_cache.backends.dynamodb.md b/docs/api/requests_cache.backends.dynamodb.md new file mode 100644 index 0000000..a4fcb23 --- /dev/null +++ b/docs/api/requests_cache.backends.dynamodb.md @@ -0,0 +1,49 @@ +# DynamoDB +```{image} ../_static/dynamodb.png +``` + +[DynamoDB](https://aws.amazon.com/dynamodb) highly scalable NoSQL document database hosted on +[Amazon Web Services](https://aws.amazon.com). + +## Use Cases +In terms of features, DynamoDB is roughly comparable to MongoDB and other NoSQL databases. It is a +fully managed service, making it very convenient to use if you are already on AWS. It is an +especially good fit for serverless applications running on +[AWS Lambda](https://aws.amazon.com/lambda). + +```{warning} +DynamoDB item sizes are limited to 400KB. If you need to cache larger responses, consider +using a different backend. +``` + +## Creating Tables +Tables will be automatically created if they don't already exist. This is convienient if you just +want to quickly test out DynamoDB as a cache backend, but in a production environment you will +likely want to create the tables yourself, for example with [CloudFormation](https://aws.amazon.com/cloudformation/) or [Terraform](https://www.terraform.io/). Here are the +details you'll need: + +- Tables: two tables, named `responses` and `redirects` +- Partition key (aka namespace): `namespace` +- Range key (aka sort key): `key` +- Attributes: `namespace` (string) and `key` (string) + +## Connection Options +The DynamoDB backend accepts any keyword arguments for {py:meth}`boto3.session.Session.resource`. +These can be passed via {py:class}`.DynamoDbCache`: +```python +>>> backend = DynamoDbCache(region_name='us-west-2') +>>> session = CachedSession('http_cache', backend=backend) +``` + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.dynamodb + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.dynamodb + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.backends.filesystem.md b/docs/api/requests_cache.backends.filesystem.md new file mode 100644 index 0000000..f1137b0 --- /dev/null +++ b/docs/api/requests_cache.backends.filesystem.md @@ -0,0 +1,56 @@ +# Filesystem +```{image} ../_static/files-generic.png +``` + +This backend stores responses in files on the local filesystem, with one file per response. + +## Use Cases +This backend is useful if you would like to use your cached response data outside of requests-cache, +for example: + +- Manually viewing cached responses without the need for extra tools (e.g., with a simple text editor) +- Using cached responses as sample data for automated tests +- Reading cached responses directly from another application or library, without depending on requests-cache + +## File Formats +By default, responses are saved as pickle files. If you want to save responses in a human-readable +format, you can use one of the other available {ref}`serializers`. For example, to save responses as +JSON files: +```python +>>> session = CachedSession('~/http_cache', backend='filesystem', serializer='json') +>>> session.get('https://httpbin.org/get') +>>> print(list(session.cache.paths())) +> ['/home/user/http_cache/4dc151d95200ec.json'] +``` + +Or as YAML (requires `pyyaml`): +```python +>>> session = CachedSession('~/http_cache', backend='filesystem', serializer='yaml') +>>> session.get('https://httpbin.org/get') +>>> print(list(session.cache.paths())) +> ['/home/user/http_cache/4dc151d95200ec.yaml'] +``` + +## Cache Files +- See {ref}`files` for general info on specifying cache paths +- The path for a given response will be in the format `<cache_name>/<cache_key>` +- Redirects are stored in a separate SQLite database, located at `<cache_name>/redirects.sqlite` +- Use {py:meth}`.FileCache.paths` to get a list of all cached response paths + +## Performance and Limitations +- Write performance will vary based on the serializer used, in the range of roughly 1-3ms per write. +- This backend stores response files in a single directory, and does not currently implement fan-out. This means that on most filesystems, storing a very large number of responses will result in reduced performance. +- This backend currently uses a simple threading lock rather than a file lock system, so it is not an ideal choice for highly parallel applications. + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.filesystem + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.filesystem + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.backends.gridfs.md b/docs/api/requests_cache.backends.gridfs.md new file mode 100644 index 0000000..18a2583 --- /dev/null +++ b/docs/api/requests_cache.backends.gridfs.md @@ -0,0 +1,23 @@ +# GridFS +```{image} ../_static/mongodb.png +``` + +[GridFS](https://docs.mongodb.com/manual/core/gridfs/) is a specification for storing large files +in MongoDB. + +## Use Cases +Use this backend if you are using MongoDB and expect to store responses **larger than 16MB**. See +{py:mod}`~requests_cache.backends.mongodb` for more general info. + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.gridfs + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.gridfs + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.backends.mongodb.md b/docs/api/requests_cache.backends.mongodb.md new file mode 100644 index 0000000..5b643d9 --- /dev/null +++ b/docs/api/requests_cache.backends.mongodb.md @@ -0,0 +1,105 @@ +# MongoDB +```{image} ../_static/mongodb.png +``` + +[MongoDB](https://www.mongodb.com) is a NoSQL document database. It stores data in collections +of documents, which are more flexible and less strictly structured than tables in a relational +database. + +## Use Cases +MongoDB scales well and is a good option for larger applications. For raw caching performance, it is +not quite as fast as {py:mod}`~requests_cache.backends.redis`, but may be preferable if you already +have an instance running, or if it has a specific feature you want to use. See below for some +relevant examples. + +## Viewing Responses +Unlike most of the other backends, response data can be easily viewed via the +[MongoDB shell](https://www.mongodb.com/docs/mongodb-shell/#mongodb-binary-bin.mongosh), +[Compass](https://www.mongodb.com/products/compass), or any other interface for MongoDB. This is +possible because its internal document format ([BSON](https://www.mongodb.com/json-and-bson)) +supports all the types needed to store a response as a plain document rather than a fully serialized +blob. + +Here is an example response viewed in +[MongoDB for VSCode](https://code.visualstudio.com/docs/azure/mongodb): + +:::{admonition} Screenshot +:class: toggle + +```{image} ../_static/mongodb_vscode.png +``` +::: + +## Expiration +MongoDB [natively supports TTL](https://www.mongodb.com/docs/v4.0/core/index-ttl), and can +automatically remove expired responses from the cache. + +**Notes:** +- TTL is set for a whole collection, and cannot be set on a per-document basis. +- It will persist until explicitly removed or overwritten, or if the collection is deleted. +- Expired items are + [not guaranteed to be removed immediately](https://www.mongodb.com/docs/v4.0/core/index-ttl/#timing-of-the-delete-operation). + Typically it happens within 60 seconds. +- If you want, you can rely entirely on MongoDB TTL instead of requests-cache + {ref}`expiration settings <expiration>`. +- Or you can set both values, to be certain that you don't get an expired response before MongoDB + removes it. +- If you intend to reuse expired responses, e.g. with {ref}`conditional-requests` or `stale_if_error`, + you can set TTL to a larger value than your session `expire_after`, or disable it altogether. + +**Examples:** +Create a TTL index: +```python +>>> backend = MongoCache() +>>> backend.set_ttl(3600) +``` + +Overwrite it with a new value: +```python +>>> backend = MongoCache() +>>> backend.set_ttl(timedelta(days=1), overwrite=True) +``` + +Remove the TTL index: +```python +>>> backend = MongoCache() +>>> backend.set_ttl(None, overwrite=True) +``` + +Use both MongoDB TTL and requests-cache expiration: +```python +>>> ttl = timedelta(days=1) +>>> backend = MongoCache() +>>> backend.set_ttl(ttl) +>>> session = CachedSession(backend=backend, expire_after=ttl) +``` + +**Recommended:** Set MongoDB TTL to a longer value than your {py:class}`.CachedSession` expiration. +This allows expired responses to be eventually cleaned up, but still be reused for conditional +requests for some period of time: +```python +>>> backend = MongoCache() +>>> backend.set_ttl(timedelta(days=7)) +>>> session = CachedSession(backend=backend, expire_after=timedelta(days=1)) +``` + +## Connection Options +The MongoDB backend accepts any keyword arguments for {py:class}`pymongo.mongo_client.MongoClient`. +These can be passed via {py:class}`.MongoCache`: +```python +>>> backend = MongoCache(host='192.168.1.63', port=27017) +>>> session = CachedSession('http_cache', backend=backend) +``` + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.mongodb + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.mongodb + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.backends.redis.md b/docs/api/requests_cache.backends.redis.md new file mode 100644 index 0000000..7e6f8a7 --- /dev/null +++ b/docs/api/requests_cache.backends.redis.md @@ -0,0 +1,66 @@ +# Redis +```{image} ../_static/redis.png +``` + +[Redis](https://redis.io) is an in-memory data store with on-disk persistence. + +## Use Cases +Redis offers a high-performace cache that scales exceptionally well, making it an ideal choice for +larger applications, especially those that make a large volume of concurrent requests. + +## Persistence +Redis operates on data in memory, and by default also persists data to snapshots on disk. This is +optimized for performance, with a minor risk of data loss, and is usually the best configuration +for a cache. If you need different behavior, the frequency and type of persistence can be customized +or disabled entirely. See [Redis Persistence](https://redis.io/topics/persistence) for details. + +## Expiration +Redis natively supports TTL on a per-key basis, and can automatically remove expired responses from +the cache. This will be set by by default, according to normal {ref}`expiration settings <expiration>`. + +If you intend to reuse expired responses, e.g. with {ref}`conditional-requests` or `stale_if_error`, +you can disable this behavior with the `ttl` argument: +```python +>>> backend = RedisCache(ttl=False) +``` + +## Connection Options +The Redis backend accepts any keyword arguments for {py:class}`redis.client.Redis`. These can be +passed via {py:class}`.RedisCache`: +```python +>>> backend = RedisCache(host='192.168.1.63', port=6379) +>>> session = CachedSession('http_cache', backend=backend) +``` + +Or you can pass an existing `Redis` object: +```python +>>> from redis import Redis +>>> connection = Redis(host='192.168.1.63', port=6379) +>>> backend = RedisCache(connection=connection)) +>>> session = CachedSession('http_cache', backend=backend) +``` + +## Redislite +If you can't easily set up your own Redis server, another option is +[redislite](https://github.com/yahoo/redislite). It contains its own lightweight, embedded Redis +database, and can be used as a drop-in replacement for redis-py. Usage example: +```python +>>> from redislite import Redis +>>> from requests_cache import CachedSession, RedisCache + +>>> backend = RedisCache(connection=Redis()) +>>> session = CachedSession(backend=backend) +``` + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.redis + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.redis + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.backends.sqlite.md b/docs/api/requests_cache.backends.sqlite.md new file mode 100644 index 0000000..eea2407 --- /dev/null +++ b/docs/api/requests_cache.backends.sqlite.md @@ -0,0 +1,90 @@ +# SQLite +```{image} ../_static/sqlite.png +``` + +[SQLite](https://www.sqlite.org/) is a fast and lightweight SQL database engine that stores data +either in memory or in a single file on disk. + +## Use Cases +Despite its simplicity, SQLite is a powerful tool. For example, it's the primary storage system for +a number of common applications including Dropbox, Firefox, and Chrome. It's well suited for +caching, and requires no extra configuration or dependencies, which is why it's the default backend +for requests-cache. + +## Cache Files +- See {ref}`files` for general info on specifying cache paths +- If you specify a name without an extension, the default extension `.sqlite` will be used + +### In-Memory Caching +SQLite also supports [in-memory databases](https://www.sqlite.org/inmemorydb.html). +You can enable this (in "shared" memory mode) with the `use_memory` option: +```python +>>> session = CachedSession('http_cache', use_memory=True) +``` + +Or specify a memory URI with additional options: +```python +>>> session = CachedSession(':file:memdb1?mode=memory') +``` + +Or just `:memory:`, if you are only using the cache from a single thread: +```python +>>> session = CachedSession(':memory:') +``` + +## Performance +When working with average-sized HTTP responses (\< 1MB) and using a modern SSD for file storage, you +can expect speeds of around: +- Write: 2-8ms +- Read: 0.2-0.6ms + +Of course, this will vary based on hardware specs, response size, and other factors. + +## Concurrency +SQLite supports concurrent access, so it is safe to use from a multi-threaded and/or multi-process +application. It supports unlimited concurrent reads. Writes, however, are queued and run in serial, +so if you need to make large volumes of concurrent requests, you may want to consider a different +backend that's specifically made for that kind of workload, like {py:class}`.RedisCache`. + +## Hosting Services and Filesystem Compatibility +There are some caveats to using SQLite with some hosting services, based on what kind of storage is +available: + +- NFS: + - SQLite may be used on a NFS, but is usually only safe to use from a single process at a time. + See the [SQLite FAQ](https://www.sqlite.org/faq.html#q5) for details. + - PythonAnywhere is one example of a host that uses NFS-backed storage. Using SQLite from a + multiprocess application will likely result in `sqlite3.OperationalError: database is locked`. +- Ephemeral storage: + - Heroku [explicitly disables SQLite](https://devcenter.heroku.com/articles/sqlite3) on its dynos. + - AWS [EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html), + [Lambda (depending on configuration)](https://aws.amazon.com/blogs/compute/choosing-between-aws-lambda-data-storage-options-in-web-apps/), + and some other AWS services use ephemeral storage that only persists for the lifetime of the + instance. This is fine for short-term caching. For longer-term persistance, you can use an + [attached EBS volume](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-attaching-volume.html). + +## Connection Options +The SQLite backend accepts any keyword arguments for {py:func}`sqlite3.connect`. These can be passed +via {py:class}`.CachedSession`: +```python +>>> session = CachedSession('http_cache', timeout=30) +``` + +Or via {py:class}`.SQLiteCache`: +```python +>>> backend = SQLiteCache('http_cache', timeout=30) +>>> session = CachedSession(backend=backend) +``` + +## API Reference +```{eval-rst} +.. automodsumm:: requests_cache.backends.sqlite + :classes-only: + :nosignatures: + +.. automodule:: requests_cache.backends.sqlite + :members: + :undoc-members: + :inherited-members: + :show-inheritance: +``` diff --git a/docs/api/requests_cache.session.md b/docs/api/requests_cache.session.md new file mode 100644 index 0000000..eb37f90 --- /dev/null +++ b/docs/api/requests_cache.session.md @@ -0,0 +1,17 @@ +# Session +```{eval-rst} +.. automodule:: requests_cache.session + +.. autosummary:: + :nosignatures: + + CachedSession + CacheMixin + +.. Show inherited method docs on CachedSession instead of CachedMixin +.. autoclass:: CachedSession + :show-inheritance: + :inherited-members: + +.. autoclass:: CacheMixin +``` diff --git a/docs/conf.py b/docs/conf.py index ed48e1b..03f58f2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,22 +1,37 @@ -# requests-cache documentation build configuration file +"""requests-cache documentation build config. + +Notes: + +* MyST-flavored markdown is used instead of rST for all user guide docs +* API reference docs are generated based on module docstrings +* Google-style docstrings are used throughout the project +* apidoc is used to generate source files for the majority of module docs +* The `api/` directory contains manually written docs for some modules +* The `_templates` directory contains some Sphinx templates that modify auto-generated sources +""" import os import sys -from os.path import abspath, dirname, join +from os.path import join +from pathlib import Path +from shutil import copy # Add project path sys.path.insert(0, os.path.abspath('..')) from requests_cache import __version__ # noqa: E402 -PROJECT_DIR = abspath(dirname(dirname(__file__))) -PACKAGE_DIR = join(PROJECT_DIR, 'requests_cache') -TEMPLATE_DIR = join(PROJECT_DIR, 'docs', '_templates') +DOCS_DIR = Path(__file__).parent.absolute() +PROJECT_DIR = DOCS_DIR.parent +PACKAGE_DIR = PROJECT_DIR / 'requests_cache' +TEMPLATE_DIR = DOCS_DIR / '_templates' +EXTRA_APIDOC_DIR = DOCS_DIR / 'api' +APIDOC_DIR = DOCS_DIR / 'modules' # General information about the project. project = 'requests-cache' needs_sphinx = '4.0' master_doc = 'index' -source_suffix = ['.rst', '.md'] +source_suffix = ['.md', '.rst'] version = release = __version__ html_static_path = ['_static'] exclude_patterns = ['_build'] @@ -51,8 +66,13 @@ myst_enable_extensions = [ 'smartquotes', ] -# Exclude auto-generated page for top-level __init__.py -exclude_patterns = ['_build', 'modules/requests_cache.rst'] +# Ignore auto-generated pages for which manually written docs exist +exclude_patterns = [ + '_build', + f'{APIDOC_DIR.stem}/requests_cache.rst', + f'{APIDOC_DIR.stem}/requests_cache.backends.*.rst', + f'{EXTRA_APIDOC_DIR.stem}/*', +] # Enable automatic links to other projects' Sphinx docs intersphinx_mapping = { @@ -87,8 +107,8 @@ autodoc_typehints = 'description' always_document_param_types = True # Use apidoc to auto-generate rst sources -apidoc_module_dir = PACKAGE_DIR -apidoc_output_dir = 'modules' +apidoc_module_dir = str(PACKAGE_DIR) +apidoc_output_dir = APIDOC_DIR.stem apidoc_excluded_paths = ['session.py'] apidoc_extra_args = [f'--templatedir={TEMPLATE_DIR}'] # Note: Must be an absolute path apidoc_module_first = True @@ -131,6 +151,7 @@ def setup(app): """Run some additional steps after the Sphinx builder is initialized""" app.add_css_file('collapsible_container.css') app.connect('builder-inited', patch_automodapi) + app.connect('builder-inited', copy_module_docs) def patch_automodapi(app): @@ -141,3 +162,8 @@ def patch_automodapi(app): from sphinx_automodapi.utils import find_mod_objs automodsumm.find_mod_objs = lambda *args: find_mod_objs(args[0], onlylocals=True) + + +def copy_module_docs(app): + for doc in EXTRA_APIDOC_DIR.iterdir(): + copy(doc, APIDOC_DIR) diff --git a/docs/reference.md b/docs/reference.md index 9d784b9..bc74488 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -11,12 +11,17 @@ from requests_cache import CachedSession, RedisCache, json_serializer ``` ::: +<!-- +TODO: +* move rst backend docs to md +* Copy/overwrite from extra_modules/ to modules/ +--> ## Primary Modules The following modules include the majority of the API relevant for most users: ```{toctree} :maxdepth: 2 -session +modules/requests_cache.session modules/requests_cache.patcher modules/requests_cache.backends modules/requests_cache.models diff --git a/docs/session.rst b/docs/session.rst deleted file mode 100644 index efbf260..0000000 --- a/docs/session.rst +++ /dev/null @@ -1,3 +0,0 @@ -Session -======= -.. automodule:: requests_cache.session |