summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2014-08-29 13:35:03 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2014-08-29 13:35:03 -0400
commit3fb9aed2ccc7802a8706471ae2e50e1b3afb6d79 (patch)
treebfcc508a01b8c08afa4bbe3d8eb706e8416c90df /lib/sqlalchemy
parent9d25eae8eec8c090339a01c03445bdf04a0840f5 (diff)
downloadsqlalchemy-3fb9aed2ccc7802a8706471ae2e50e1b3afb6d79.tar.gz
- add some more docs to yield_per
Diffstat (limited to 'lib/sqlalchemy')
-rw-r--r--lib/sqlalchemy/orm/query.py56
1 files changed, 38 insertions, 18 deletions
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 61fbd1be8..a3711db6a 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -705,24 +705,44 @@ class Query(object):
def yield_per(self, count):
"""Yield only ``count`` rows at a time.
- WARNING: use this method with caution; if the same instance is present
- in more than one batch of rows, end-user changes to attributes will be
- overwritten.
-
- In particular, it's usually impossible to use this setting with
- eagerly loaded collections (i.e. any lazy='joined' or 'subquery')
- since those collections will be cleared for a new load when
- encountered in a subsequent result batch. In the case of 'subquery'
- loading, the full result for all rows is fetched which generally
- defeats the purpose of :meth:`~sqlalchemy.orm.query.Query.yield_per`.
-
- Also note that while :meth:`~sqlalchemy.orm.query.Query.yield_per`
- will set the ``stream_results`` execution option to True, currently
- this is only understood by
- :mod:`~sqlalchemy.dialects.postgresql.psycopg2` dialect which will
- stream results using server side cursors instead of pre-buffer all
- rows for this query. Other DBAPIs pre-buffer all rows before making
- them available.
+ The purpose of this method is when fetching very large result sets
+ (> 10K rows), to batch results in sub-collections and yield them
+ out partially, so that the Python interpreter doesn't need to declare
+ very large areas of memory which is both time consuming and leads
+ to excessive memory use. The performance from fetching hundreds of
+ thousands of rows can often double when a suitable yield-per setting
+ (e.g. approximately 1000) is used, even with DBAPIs that buffer
+ rows (which are most).
+
+ The :meth:`.yield_per` method **is not compatible with most
+ eager loading schemes, including joinedload and subqueryload**.
+ See the warning below.
+
+ .. warning::
+
+ Use this method with caution; if the same instance is
+ present in more than one batch of rows, end-user changes
+ to attributes will be overwritten.
+
+ In particular, it's usually impossible to use this setting
+ with eagerly loaded collections (i.e. any lazy='joined' or
+ 'subquery') since those collections will be cleared for a
+ new load when encountered in a subsequent result batch.
+ In the case of 'subquery' loading, the full result for all
+ rows is fetched which generally defeats the purpose of
+ :meth:`~sqlalchemy.orm.query.Query.yield_per`.
+
+ Also note that while
+ :meth:`~sqlalchemy.orm.query.Query.yield_per` will set the
+ ``stream_results`` execution option to True, currently
+ this is only understood by
+ :mod:`~sqlalchemy.dialects.postgresql.psycopg2` dialect
+ which will stream results using server side cursors
+ instead of pre-buffer all rows for this query. Other
+ DBAPIs **pre-buffer all rows** before making them
+ available. The memory use of raw database rows is much less
+ than that of an ORM-mapped object, but should still be taken into
+ consideration when benchmarking.
"""
self._yield_per = count