summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2017-03-23 17:55:43 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2017-04-26 16:08:57 -0400
commit19d2424e05fadf66044d914b13a405cb212b4100 (patch)
tree351141593947112a00bed97a3d355a01136bf5ec /lib/sqlalchemy
parent029d0f75385298f8056c04eba1d2f9563126a8a6 (diff)
downloadsqlalchemy-19d2424e05fadf66044d914b13a405cb212b4100.tar.gz
Add selectin loading
Adding a new kind of relationship loader that is a cross between the "immediateload" and the "subquery" eager loader, using an IN criteria to load related items in bulk immediately after the lead query result is loaded. Change-Id: If13713fba9b465865aef8fd50b5b6b977fe3ef7d Fixes: #3944
Diffstat (limited to 'lib/sqlalchemy')
-rw-r--r--lib/sqlalchemy/orm/__init__.py2
-rw-r--r--lib/sqlalchemy/orm/loading.py59
-rw-r--r--lib/sqlalchemy/orm/query.py14
-rw-r--r--lib/sqlalchemy/orm/relationships.py7
-rw-r--r--lib/sqlalchemy/orm/strategies.py176
-rw-r--r--lib/sqlalchemy/orm/strategy_options.py56
6 files changed, 306 insertions, 8 deletions
diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py
index 449173548..8d46324e8 100644
--- a/lib/sqlalchemy/orm/__init__.py
+++ b/lib/sqlalchemy/orm/__init__.py
@@ -240,6 +240,8 @@ lazyload = strategy_options.lazyload._unbound_fn
lazyload_all = strategy_options.lazyload_all._unbound_all_fn
subqueryload = strategy_options.subqueryload._unbound_fn
subqueryload_all = strategy_options.subqueryload_all._unbound_all_fn
+selectinload = strategy_options.selectinload._unbound_fn
+selectinload_all = strategy_options.selectinload_all._unbound_all_fn
immediateload = strategy_options.immediateload._unbound_fn
noload = strategy_options.noload._unbound_fn
raiseload = strategy_options.raiseload._unbound_fn
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index f749cdd1d..3733d50e1 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -18,6 +18,7 @@ from .. import util
from . import attributes, exc as orm_exc
from ..sql import util as sql_util
from . import strategy_options
+from . import path_registry
from .util import _none_set, state_str
from .base import _SET_DEFERRED_EXPIRED, _DEFER_FOR_STATE
@@ -31,6 +32,7 @@ def instances(query, cursor, context):
"""Return an ORM result as an iterator."""
context.runid = _new_runid()
+ context.post_load_paths = {}
filtered = query._has_mapper_entities
@@ -77,6 +79,10 @@ def instances(query, cursor, context):
rows = [keyed_tuple([proc(row) for proc in process])
for row in fetch]
+ for path, post_load in \
+ context.post_load_paths.items():
+ post_load.invoke(context, path)
+
if filtered:
rows = util.unique_list(rows, filter_fn)
@@ -346,6 +352,7 @@ def _instance_processor(
session_id = context.session.hash_key
version_check = context.version_check
runid = context.runid
+ post_load = PostLoad.for_context(context, load_path, only_load_props)
if refresh_state:
refresh_identity_key = refresh_state.key
@@ -452,6 +459,9 @@ def _instance_processor(
else:
state._commit_all(dict_, session_identity_map)
+ if post_load:
+ post_load.add_state(state, True)
+
else:
# partial population routines, for objects that were already
# in the Session, but a row matches them; apply eager loaders
@@ -475,6 +485,9 @@ def _instance_processor(
state._commit(dict_, to_load)
+ if post_load and context.invoke_all_eagers:
+ post_load.add_state(state, False)
+
return instance
if mapper.polymorphic_map and not _polymorphic_from and not refresh_state:
@@ -632,6 +645,52 @@ def _decorate_polymorphic_switch(
return polymorphic_instance
+class PostLoad(object):
+ """Track loaders and states for "post load" operations.
+
+ """
+ __slots__ = 'loaders', 'states', 'load_keys'
+
+ def __init__(self):
+ self.loaders = {}
+ self.states = util.OrderedDict()
+ self.load_keys = None
+
+ def add_state(self, state, overwrite):
+ self.states[state] = overwrite
+
+ def invoke(self, context, path):
+ if not self.states:
+ return
+ path = path_registry.PathRegistry.coerce(path)
+ for key, loader, arg, kw in self.loaders.values():
+ loader(
+ context, path, self.states.items(),
+ self.load_keys, *arg, **kw)
+ self.states.clear()
+
+ @classmethod
+ def for_context(cls, context, path, only_load_props):
+ pl = context.post_load_paths.get(path.path)
+ if pl is not None and only_load_props:
+ pl.load_keys = only_load_props
+ return pl
+
+ @classmethod
+ def path_exists(self, context, path, key):
+ return path.path in context.post_load_paths and \
+ key in context.post_load_paths[path.path].loaders
+
+ @classmethod
+ def callable_for_path(
+ cls, context, path, attr_key, loader_callable, *arg, **kw):
+ if path.path in context.post_load_paths:
+ pl = context.post_load_paths[path.path]
+ else:
+ pl = context.post_load_paths[path.path] = PostLoad()
+ pl.loaders[attr_key] = (attr_key, loader_callable, arg, kw)
+
+
def load_scalar_attributes(mapper, state, attribute_names):
"""initiate a column-based attribute refresh operation."""
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 272ef77fb..a2f83818c 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -753,9 +753,14 @@ class Query(object):
(e.g. approximately 1000) is used, even with DBAPIs that buffer
rows (which are most).
- The :meth:`.Query.yield_per` method **is not compatible with most
- eager loading schemes, including subqueryload and joinedload with
- collections**. For this reason, it may be helpful to disable
+ The :meth:`.Query.yield_per` method **is not compatible
+ subqueryload eager loading or joinedload eager loading when
+ using collections**. It is potentially compatible with "select in"
+ eager loading, **provided the databse driver supports multiple,
+ independent cursors** (pysqlite and psycopg2 are known to work,
+ MySQL and SQL Server ODBC drivers do not).
+
+ Therefore in some cases, it may be helpful to disable
eager loads, either unconditionally with
:meth:`.Query.enable_eagerloads`::
@@ -4103,7 +4108,8 @@ class QueryContext(object):
'primary_columns', 'secondary_columns', 'eager_order_by',
'eager_joins', 'create_eager_joins', 'propagate_options',
'attributes', 'statement', 'from_clause', 'whereclause',
- 'order_by', 'labels', '_for_update_arg', 'runid', 'partials'
+ 'order_by', 'labels', '_for_update_arg', 'runid', 'partials',
+ 'post_load_paths'
)
def __init__(self, query):
diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py
index 1005e7eeb..43f53aec5 100644
--- a/lib/sqlalchemy/orm/relationships.py
+++ b/lib/sqlalchemy/orm/relationships.py
@@ -527,6 +527,13 @@ class RelationshipProperty(StrategizedProperty):
a subquery of the original statement, for each collection
requested.
+ * ``selectin`` - items should be loaded "eagerly" as the parents
+ are loaded, using one or more additional SQL statements, which
+ issues a JOIN to the immediate parent object, specifying primary
+ key identifiers using an IN clause.
+
+ .. versionadded:: 1.2
+
* ``noload`` - no loading should occur at any time. This is to
support "write-only" attributes, or attributes which are
populated in some manner specific to the application.
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 10131c80d..ddf5397fd 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -14,10 +14,10 @@ from ..sql import util as sql_util, visitors
from .. import sql
from . import (
attributes, interfaces, exc as orm_exc, loading,
- unitofwork, util as orm_util
+ unitofwork, util as orm_util, query
)
from .state import InstanceState
-from .util import _none_set
+from .util import _none_set, aliased
from . import properties
from .interfaces import (
LoaderStrategy, StrategizedProperty
@@ -1744,6 +1744,178 @@ class JoinedLoader(AbstractRelationshipLoader):
populators["eager"].append((self.key, load_scalar_from_joined_exec))
+@log.class_logger
+@properties.RelationshipProperty.strategy_for(lazy="selectin")
+class SelectInLoader(AbstractRelationshipLoader):
+ __slots__ = (
+ 'join_depth', '_parent_alias', '_in_expr', '_parent_pk_cols',
+ '_zero_idx'
+ )
+
+ _chunksize = 500
+
+ def __init__(self, parent, strategy_key):
+ super(SelectInLoader, self).__init__(parent, strategy_key)
+ self.join_depth = self.parent_property.join_depth
+ self._parent_alias = aliased(self.parent.class_)
+ pa_insp = inspect(self._parent_alias)
+ self._parent_pk_cols = pk_cols = [
+ pa_insp._adapt_element(col) for col in self.parent.primary_key]
+ if len(pk_cols) > 1:
+ self._in_expr = sql.tuple_(*pk_cols)
+ self._zero_idx = False
+ else:
+ self._in_expr = pk_cols[0]
+ self._zero_idx = True
+
+ def init_class_attribute(self, mapper):
+ self.parent_property.\
+ _get_strategy((("lazy", "select"),)).\
+ init_class_attribute(mapper)
+
+ def create_row_processor(
+ self, context, path, loadopt, mapper,
+ result, adapter, populators):
+ if not self.parent.class_manager[self.key].impl.supports_population:
+ raise sa_exc.InvalidRequestError(
+ "'%s' does not support object "
+ "population - eager loading cannot be applied." %
+ self
+ )
+
+ selectin_path = (
+ context.query._current_path or orm_util.PathRegistry.root) + path
+
+ if loading.PostLoad.path_exists(context, selectin_path, self.key):
+ return
+
+ path_w_prop = path[self.parent_property]
+ selectin_path_w_prop = selectin_path[self.parent_property]
+
+ # build up a path indicating the path from the leftmost
+ # entity to the thing we're subquery loading.
+ with_poly_info = path_w_prop.get(
+ context.attributes,
+ "path_with_polymorphic", None)
+
+ if with_poly_info is not None:
+ effective_entity = with_poly_info.entity
+ else:
+ effective_entity = self.mapper
+
+ if not path_w_prop.contains(context.attributes, "loader"):
+ if self.join_depth:
+ if selectin_path_w_prop.length / 2 > self.join_depth:
+ return
+ elif selectin_path_w_prop.contains_mapper(self.mapper):
+ return
+
+ loading.PostLoad.callable_for_path(
+ context, selectin_path, self.key,
+ self._load_for_path, effective_entity)
+
+ @util.dependencies("sqlalchemy.ext.baked")
+ def _load_for_path(
+ self, baked, context, path, states, load_only, effective_entity):
+
+ if load_only and self.key not in load_only:
+ return
+
+ our_states = [
+ (state.key[1], state, overwrite)
+ for state, overwrite in states
+ ]
+
+ pk_cols = self._parent_pk_cols
+ pa = self._parent_alias
+
+ q = baked.BakedQuery(
+ # TODO: use strategy-local cache
+ self.mapper._compiled_cache,
+ lambda session: session.query(
+ query.Bundle("pk", *pk_cols), effective_entity
+ )
+ )
+
+ q.add_criteria(
+ lambda q: q.select_from(pa).join(
+ getattr(pa,
+ self.parent_property.key).of_type(effective_entity)).
+ filter(
+ self._in_expr.in_(
+ sql.bindparam('primary_keys', expanding=True))
+ ).order_by(*pk_cols)
+ )
+
+ orig_query = context.query
+
+ q._add_lazyload_options(
+ orig_query._with_options, path[self.parent_property]
+ )
+
+ if orig_query._populate_existing:
+ q.add_criteria(
+ lambda q: q.populate_existing()
+ )
+
+ if self.parent_property.order_by:
+ def _setup_outermost_orderby(q):
+ # imitate the same method that
+ # subquery eager loading does it, looking for the
+ # adapted "secondary" table
+ eagerjoin = q._from_obj[0]
+ eager_order_by = \
+ eagerjoin._target_adapter.\
+ copy_and_process(
+ util.to_list(
+ self.parent_property.order_by
+ )
+ )
+ return q.order_by(*eager_order_by)
+
+ q.add_criteria(
+ _setup_outermost_orderby
+ )
+
+ uselist = self.uselist
+ _empty_result = () if uselist else None
+
+ while our_states:
+ chunk = our_states[0:self._chunksize]
+ our_states = our_states[self._chunksize:]
+
+ data = {
+ k: [vv[1] for vv in v]
+ for k, v in itertools.groupby(
+ q(context.session).params(
+ primary_keys=[
+ key[0] if self._zero_idx else key
+ for key, state, overwrite in chunk]
+ ),
+ lambda x: x[0]
+ )
+ }
+
+ for key, state, overwrite in chunk:
+ if not overwrite and self.key in state.dict:
+ continue
+
+ collection = data.get(key, _empty_result)
+
+ if not uselist and collection:
+ if len(collection) > 1:
+ util.warn(
+ "Multiple rows returned with "
+ "uselist=False for eagerly-loaded "
+ "attribute '%s' "
+ % self)
+ state.get_impl(self.key).set_committed_value(
+ state, state.dict, collection[0])
+ else:
+ state.get_impl(self.key).set_committed_value(
+ state, state.dict, collection)
+
+
def single_parent_validator(desc, prop):
def _do_check(state, value, oldvalue, initiator):
if value is not None and initiator.key == prop.key:
diff --git a/lib/sqlalchemy/orm/strategy_options.py b/lib/sqlalchemy/orm/strategy_options.py
index 423d3f558..df13f05db 100644
--- a/lib/sqlalchemy/orm/strategy_options.py
+++ b/lib/sqlalchemy/orm/strategy_options.py
@@ -900,11 +900,13 @@ def subqueryload(loadopt, attr):
query(User).options(subqueryload(User.orders))
# subquery-load Order.items and then Item.keywords
- query(Order).options(subqueryload(Order.items).subqueryload(Item.keywords))
+ query(Order).options(
+ subqueryload(Order.items).subqueryload(Item.keywords))
# lazily load Order.items, but when Items are loaded,
# subquery-load the keywords collection
- query(Order).options(lazyload(Order.items).subqueryload(Item.keywords))
+ query(Order).options(
+ lazyload(Order.items).subqueryload(Item.keywords))
.. seealso::
@@ -928,6 +930,50 @@ def subqueryload_all(*keys):
@loader_option()
+def selectinload(loadopt, attr):
+ """Indicate that the given attribute should be loaded using
+ SELECT IN eager loading.
+
+ This function is part of the :class:`.Load` interface and supports
+ both method-chained and standalone operation.
+
+ examples::
+
+ # selectin-load the "orders" collection on "User"
+ query(User).options(selectinload(User.orders))
+
+ # selectin-load Order.items and then Item.keywords
+ query(Order).options(
+ selectinload(Order.items).selectinload(Item.keywords))
+
+ # lazily load Order.items, but when Items are loaded,
+ # selectin-load the keywords collection
+ query(Order).options(
+ lazyload(Order.items).selectinload(Item.keywords))
+
+ .. versionadded:: 1.2
+
+ .. seealso::
+
+ :ref:`loading_toplevel`
+
+ :ref:`selectin_eager_loading`
+
+ """
+ return loadopt.set_relationship_strategy(attr, {"lazy": "selectin"})
+
+
+@selectinload._add_unbound_fn
+def selectinload(*keys):
+ return _UnboundLoad._from_keys(_UnboundLoad.selectinload, keys, False, {})
+
+
+@selectinload._add_unbound_all_fn
+def selectinload_all(*keys):
+ return _UnboundLoad._from_keys(_UnboundLoad.selectinload, keys, True, {})
+
+
+@loader_option()
def lazyload(loadopt, attr):
"""Indicate that the given attribute should be loaded using "lazy"
loading.
@@ -960,6 +1006,10 @@ def immediateload(loadopt, attr):
"""Indicate that the given attribute should be loaded using
an immediate load with a per-attribute SELECT statement.
+ The :func:`.immediateload` option is superseded in general
+ by the :func:`.selectinload` option, which performs the same task
+ more efficiently by emitting a SELECT for all loaded objects.
+
This function is part of the :class:`.Load` interface and supports
both method-chained and standalone operation.
@@ -967,6 +1017,8 @@ def immediateload(loadopt, attr):
:ref:`loading_toplevel`
+ :ref:`selectin_eager_loading`
+
"""
loader = loadopt.set_relationship_strategy(attr, {"lazy": "immediate"})
return loader