diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-08-29 13:35:03 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-08-29 13:35:03 -0400 |
| commit | 3fb9aed2ccc7802a8706471ae2e50e1b3afb6d79 (patch) | |
| tree | bfcc508a01b8c08afa4bbe3d8eb706e8416c90df /lib/sqlalchemy | |
| parent | 9d25eae8eec8c090339a01c03445bdf04a0840f5 (diff) | |
| download | sqlalchemy-3fb9aed2ccc7802a8706471ae2e50e1b3afb6d79.tar.gz | |
- add some more docs to yield_per
Diffstat (limited to 'lib/sqlalchemy')
| -rw-r--r-- | lib/sqlalchemy/orm/query.py | 56 |
1 files changed, 38 insertions, 18 deletions
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 61fbd1be8..a3711db6a 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -705,24 +705,44 @@ class Query(object): def yield_per(self, count): """Yield only ``count`` rows at a time. - WARNING: use this method with caution; if the same instance is present - in more than one batch of rows, end-user changes to attributes will be - overwritten. - - In particular, it's usually impossible to use this setting with - eagerly loaded collections (i.e. any lazy='joined' or 'subquery') - since those collections will be cleared for a new load when - encountered in a subsequent result batch. In the case of 'subquery' - loading, the full result for all rows is fetched which generally - defeats the purpose of :meth:`~sqlalchemy.orm.query.Query.yield_per`. - - Also note that while :meth:`~sqlalchemy.orm.query.Query.yield_per` - will set the ``stream_results`` execution option to True, currently - this is only understood by - :mod:`~sqlalchemy.dialects.postgresql.psycopg2` dialect which will - stream results using server side cursors instead of pre-buffer all - rows for this query. Other DBAPIs pre-buffer all rows before making - them available. + The purpose of this method is when fetching very large result sets + (> 10K rows), to batch results in sub-collections and yield them + out partially, so that the Python interpreter doesn't need to declare + very large areas of memory which is both time consuming and leads + to excessive memory use. The performance from fetching hundreds of + thousands of rows can often double when a suitable yield-per setting + (e.g. approximately 1000) is used, even with DBAPIs that buffer + rows (which are most). + + The :meth:`.yield_per` method **is not compatible with most + eager loading schemes, including joinedload and subqueryload**. + See the warning below. + + .. warning:: + + Use this method with caution; if the same instance is + present in more than one batch of rows, end-user changes + to attributes will be overwritten. + + In particular, it's usually impossible to use this setting + with eagerly loaded collections (i.e. any lazy='joined' or + 'subquery') since those collections will be cleared for a + new load when encountered in a subsequent result batch. + In the case of 'subquery' loading, the full result for all + rows is fetched which generally defeats the purpose of + :meth:`~sqlalchemy.orm.query.Query.yield_per`. + + Also note that while + :meth:`~sqlalchemy.orm.query.Query.yield_per` will set the + ``stream_results`` execution option to True, currently + this is only understood by + :mod:`~sqlalchemy.dialects.postgresql.psycopg2` dialect + which will stream results using server side cursors + instead of pre-buffer all rows for this query. Other + DBAPIs **pre-buffer all rows** before making them + available. The memory use of raw database rows is much less + than that of an ORM-mapped object, but should still be taken into + consideration when benchmarking. """ self._yield_per = count |
