diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2017-03-23 17:55:43 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2017-04-26 16:08:57 -0400 |
| commit | 19d2424e05fadf66044d914b13a405cb212b4100 (patch) | |
| tree | 351141593947112a00bed97a3d355a01136bf5ec /lib/sqlalchemy | |
| parent | 029d0f75385298f8056c04eba1d2f9563126a8a6 (diff) | |
| download | sqlalchemy-19d2424e05fadf66044d914b13a405cb212b4100.tar.gz | |
Add selectin loading
Adding a new kind of relationship loader that is
a cross between the "immediateload" and the "subquery"
eager loader, using an IN criteria to load related items
in bulk immediately after the lead query result is loaded.
Change-Id: If13713fba9b465865aef8fd50b5b6b977fe3ef7d
Fixes: #3944
Diffstat (limited to 'lib/sqlalchemy')
| -rw-r--r-- | lib/sqlalchemy/orm/__init__.py | 2 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/loading.py | 59 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/query.py | 14 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/relationships.py | 7 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/strategies.py | 176 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/strategy_options.py | 56 |
6 files changed, 306 insertions, 8 deletions
diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 449173548..8d46324e8 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -240,6 +240,8 @@ lazyload = strategy_options.lazyload._unbound_fn lazyload_all = strategy_options.lazyload_all._unbound_all_fn subqueryload = strategy_options.subqueryload._unbound_fn subqueryload_all = strategy_options.subqueryload_all._unbound_all_fn +selectinload = strategy_options.selectinload._unbound_fn +selectinload_all = strategy_options.selectinload_all._unbound_all_fn immediateload = strategy_options.immediateload._unbound_fn noload = strategy_options.noload._unbound_fn raiseload = strategy_options.raiseload._unbound_fn diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py index f749cdd1d..3733d50e1 100644 --- a/lib/sqlalchemy/orm/loading.py +++ b/lib/sqlalchemy/orm/loading.py @@ -18,6 +18,7 @@ from .. import util from . import attributes, exc as orm_exc from ..sql import util as sql_util from . import strategy_options +from . import path_registry from .util import _none_set, state_str from .base import _SET_DEFERRED_EXPIRED, _DEFER_FOR_STATE @@ -31,6 +32,7 @@ def instances(query, cursor, context): """Return an ORM result as an iterator.""" context.runid = _new_runid() + context.post_load_paths = {} filtered = query._has_mapper_entities @@ -77,6 +79,10 @@ def instances(query, cursor, context): rows = [keyed_tuple([proc(row) for proc in process]) for row in fetch] + for path, post_load in \ + context.post_load_paths.items(): + post_load.invoke(context, path) + if filtered: rows = util.unique_list(rows, filter_fn) @@ -346,6 +352,7 @@ def _instance_processor( session_id = context.session.hash_key version_check = context.version_check runid = context.runid + post_load = PostLoad.for_context(context, load_path, only_load_props) if refresh_state: refresh_identity_key = refresh_state.key @@ -452,6 +459,9 @@ def _instance_processor( else: state._commit_all(dict_, session_identity_map) + if post_load: + post_load.add_state(state, True) + else: # partial population routines, for objects that were already # in the Session, but a row matches them; apply eager loaders @@ -475,6 +485,9 @@ def _instance_processor( state._commit(dict_, to_load) + if post_load and context.invoke_all_eagers: + post_load.add_state(state, False) + return instance if mapper.polymorphic_map and not _polymorphic_from and not refresh_state: @@ -632,6 +645,52 @@ def _decorate_polymorphic_switch( return polymorphic_instance +class PostLoad(object): + """Track loaders and states for "post load" operations. + + """ + __slots__ = 'loaders', 'states', 'load_keys' + + def __init__(self): + self.loaders = {} + self.states = util.OrderedDict() + self.load_keys = None + + def add_state(self, state, overwrite): + self.states[state] = overwrite + + def invoke(self, context, path): + if not self.states: + return + path = path_registry.PathRegistry.coerce(path) + for key, loader, arg, kw in self.loaders.values(): + loader( + context, path, self.states.items(), + self.load_keys, *arg, **kw) + self.states.clear() + + @classmethod + def for_context(cls, context, path, only_load_props): + pl = context.post_load_paths.get(path.path) + if pl is not None and only_load_props: + pl.load_keys = only_load_props + return pl + + @classmethod + def path_exists(self, context, path, key): + return path.path in context.post_load_paths and \ + key in context.post_load_paths[path.path].loaders + + @classmethod + def callable_for_path( + cls, context, path, attr_key, loader_callable, *arg, **kw): + if path.path in context.post_load_paths: + pl = context.post_load_paths[path.path] + else: + pl = context.post_load_paths[path.path] = PostLoad() + pl.loaders[attr_key] = (attr_key, loader_callable, arg, kw) + + def load_scalar_attributes(mapper, state, attribute_names): """initiate a column-based attribute refresh operation.""" diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 272ef77fb..a2f83818c 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -753,9 +753,14 @@ class Query(object): (e.g. approximately 1000) is used, even with DBAPIs that buffer rows (which are most). - The :meth:`.Query.yield_per` method **is not compatible with most - eager loading schemes, including subqueryload and joinedload with - collections**. For this reason, it may be helpful to disable + The :meth:`.Query.yield_per` method **is not compatible + subqueryload eager loading or joinedload eager loading when + using collections**. It is potentially compatible with "select in" + eager loading, **provided the databse driver supports multiple, + independent cursors** (pysqlite and psycopg2 are known to work, + MySQL and SQL Server ODBC drivers do not). + + Therefore in some cases, it may be helpful to disable eager loads, either unconditionally with :meth:`.Query.enable_eagerloads`:: @@ -4103,7 +4108,8 @@ class QueryContext(object): 'primary_columns', 'secondary_columns', 'eager_order_by', 'eager_joins', 'create_eager_joins', 'propagate_options', 'attributes', 'statement', 'from_clause', 'whereclause', - 'order_by', 'labels', '_for_update_arg', 'runid', 'partials' + 'order_by', 'labels', '_for_update_arg', 'runid', 'partials', + 'post_load_paths' ) def __init__(self, query): diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py index 1005e7eeb..43f53aec5 100644 --- a/lib/sqlalchemy/orm/relationships.py +++ b/lib/sqlalchemy/orm/relationships.py @@ -527,6 +527,13 @@ class RelationshipProperty(StrategizedProperty): a subquery of the original statement, for each collection requested. + * ``selectin`` - items should be loaded "eagerly" as the parents + are loaded, using one or more additional SQL statements, which + issues a JOIN to the immediate parent object, specifying primary + key identifiers using an IN clause. + + .. versionadded:: 1.2 + * ``noload`` - no loading should occur at any time. This is to support "write-only" attributes, or attributes which are populated in some manner specific to the application. diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 10131c80d..ddf5397fd 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -14,10 +14,10 @@ from ..sql import util as sql_util, visitors from .. import sql from . import ( attributes, interfaces, exc as orm_exc, loading, - unitofwork, util as orm_util + unitofwork, util as orm_util, query ) from .state import InstanceState -from .util import _none_set +from .util import _none_set, aliased from . import properties from .interfaces import ( LoaderStrategy, StrategizedProperty @@ -1744,6 +1744,178 @@ class JoinedLoader(AbstractRelationshipLoader): populators["eager"].append((self.key, load_scalar_from_joined_exec)) +@log.class_logger +@properties.RelationshipProperty.strategy_for(lazy="selectin") +class SelectInLoader(AbstractRelationshipLoader): + __slots__ = ( + 'join_depth', '_parent_alias', '_in_expr', '_parent_pk_cols', + '_zero_idx' + ) + + _chunksize = 500 + + def __init__(self, parent, strategy_key): + super(SelectInLoader, self).__init__(parent, strategy_key) + self.join_depth = self.parent_property.join_depth + self._parent_alias = aliased(self.parent.class_) + pa_insp = inspect(self._parent_alias) + self._parent_pk_cols = pk_cols = [ + pa_insp._adapt_element(col) for col in self.parent.primary_key] + if len(pk_cols) > 1: + self._in_expr = sql.tuple_(*pk_cols) + self._zero_idx = False + else: + self._in_expr = pk_cols[0] + self._zero_idx = True + + def init_class_attribute(self, mapper): + self.parent_property.\ + _get_strategy((("lazy", "select"),)).\ + init_class_attribute(mapper) + + def create_row_processor( + self, context, path, loadopt, mapper, + result, adapter, populators): + if not self.parent.class_manager[self.key].impl.supports_population: + raise sa_exc.InvalidRequestError( + "'%s' does not support object " + "population - eager loading cannot be applied." % + self + ) + + selectin_path = ( + context.query._current_path or orm_util.PathRegistry.root) + path + + if loading.PostLoad.path_exists(context, selectin_path, self.key): + return + + path_w_prop = path[self.parent_property] + selectin_path_w_prop = selectin_path[self.parent_property] + + # build up a path indicating the path from the leftmost + # entity to the thing we're subquery loading. + with_poly_info = path_w_prop.get( + context.attributes, + "path_with_polymorphic", None) + + if with_poly_info is not None: + effective_entity = with_poly_info.entity + else: + effective_entity = self.mapper + + if not path_w_prop.contains(context.attributes, "loader"): + if self.join_depth: + if selectin_path_w_prop.length / 2 > self.join_depth: + return + elif selectin_path_w_prop.contains_mapper(self.mapper): + return + + loading.PostLoad.callable_for_path( + context, selectin_path, self.key, + self._load_for_path, effective_entity) + + @util.dependencies("sqlalchemy.ext.baked") + def _load_for_path( + self, baked, context, path, states, load_only, effective_entity): + + if load_only and self.key not in load_only: + return + + our_states = [ + (state.key[1], state, overwrite) + for state, overwrite in states + ] + + pk_cols = self._parent_pk_cols + pa = self._parent_alias + + q = baked.BakedQuery( + # TODO: use strategy-local cache + self.mapper._compiled_cache, + lambda session: session.query( + query.Bundle("pk", *pk_cols), effective_entity + ) + ) + + q.add_criteria( + lambda q: q.select_from(pa).join( + getattr(pa, + self.parent_property.key).of_type(effective_entity)). + filter( + self._in_expr.in_( + sql.bindparam('primary_keys', expanding=True)) + ).order_by(*pk_cols) + ) + + orig_query = context.query + + q._add_lazyload_options( + orig_query._with_options, path[self.parent_property] + ) + + if orig_query._populate_existing: + q.add_criteria( + lambda q: q.populate_existing() + ) + + if self.parent_property.order_by: + def _setup_outermost_orderby(q): + # imitate the same method that + # subquery eager loading does it, looking for the + # adapted "secondary" table + eagerjoin = q._from_obj[0] + eager_order_by = \ + eagerjoin._target_adapter.\ + copy_and_process( + util.to_list( + self.parent_property.order_by + ) + ) + return q.order_by(*eager_order_by) + + q.add_criteria( + _setup_outermost_orderby + ) + + uselist = self.uselist + _empty_result = () if uselist else None + + while our_states: + chunk = our_states[0:self._chunksize] + our_states = our_states[self._chunksize:] + + data = { + k: [vv[1] for vv in v] + for k, v in itertools.groupby( + q(context.session).params( + primary_keys=[ + key[0] if self._zero_idx else key + for key, state, overwrite in chunk] + ), + lambda x: x[0] + ) + } + + for key, state, overwrite in chunk: + if not overwrite and self.key in state.dict: + continue + + collection = data.get(key, _empty_result) + + if not uselist and collection: + if len(collection) > 1: + util.warn( + "Multiple rows returned with " + "uselist=False for eagerly-loaded " + "attribute '%s' " + % self) + state.get_impl(self.key).set_committed_value( + state, state.dict, collection[0]) + else: + state.get_impl(self.key).set_committed_value( + state, state.dict, collection) + + def single_parent_validator(desc, prop): def _do_check(state, value, oldvalue, initiator): if value is not None and initiator.key == prop.key: diff --git a/lib/sqlalchemy/orm/strategy_options.py b/lib/sqlalchemy/orm/strategy_options.py index 423d3f558..df13f05db 100644 --- a/lib/sqlalchemy/orm/strategy_options.py +++ b/lib/sqlalchemy/orm/strategy_options.py @@ -900,11 +900,13 @@ def subqueryload(loadopt, attr): query(User).options(subqueryload(User.orders)) # subquery-load Order.items and then Item.keywords - query(Order).options(subqueryload(Order.items).subqueryload(Item.keywords)) + query(Order).options( + subqueryload(Order.items).subqueryload(Item.keywords)) # lazily load Order.items, but when Items are loaded, # subquery-load the keywords collection - query(Order).options(lazyload(Order.items).subqueryload(Item.keywords)) + query(Order).options( + lazyload(Order.items).subqueryload(Item.keywords)) .. seealso:: @@ -928,6 +930,50 @@ def subqueryload_all(*keys): @loader_option() +def selectinload(loadopt, attr): + """Indicate that the given attribute should be loaded using + SELECT IN eager loading. + + This function is part of the :class:`.Load` interface and supports + both method-chained and standalone operation. + + examples:: + + # selectin-load the "orders" collection on "User" + query(User).options(selectinload(User.orders)) + + # selectin-load Order.items and then Item.keywords + query(Order).options( + selectinload(Order.items).selectinload(Item.keywords)) + + # lazily load Order.items, but when Items are loaded, + # selectin-load the keywords collection + query(Order).options( + lazyload(Order.items).selectinload(Item.keywords)) + + .. versionadded:: 1.2 + + .. seealso:: + + :ref:`loading_toplevel` + + :ref:`selectin_eager_loading` + + """ + return loadopt.set_relationship_strategy(attr, {"lazy": "selectin"}) + + +@selectinload._add_unbound_fn +def selectinload(*keys): + return _UnboundLoad._from_keys(_UnboundLoad.selectinload, keys, False, {}) + + +@selectinload._add_unbound_all_fn +def selectinload_all(*keys): + return _UnboundLoad._from_keys(_UnboundLoad.selectinload, keys, True, {}) + + +@loader_option() def lazyload(loadopt, attr): """Indicate that the given attribute should be loaded using "lazy" loading. @@ -960,6 +1006,10 @@ def immediateload(loadopt, attr): """Indicate that the given attribute should be loaded using an immediate load with a per-attribute SELECT statement. + The :func:`.immediateload` option is superseded in general + by the :func:`.selectinload` option, which performs the same task + more efficiently by emitting a SELECT for all loaded objects. + This function is part of the :class:`.Load` interface and supports both method-chained and standalone operation. @@ -967,6 +1017,8 @@ def immediateload(loadopt, attr): :ref:`loading_toplevel` + :ref:`selectin_eager_loading` + """ loader = loadopt.set_relationship_strategy(attr, {"lazy": "immediate"}) return loader |
