selectinload omit join

The "selectin" loader strategy now omits the JOIN in the case of a simple one-to-many load, where it instead relies upon the foreign key columns of the related table in order to match up to primary keys in the parent table. This optimization can be disabled by setting the :paramref:`.relationship.omit_join` flag to False. Many thanks to Jayson Reis for the efforts on this. As part of this change, horizontal shard no longer relies upon the _mapper_zero() method to get the query-bound mapper, instead using the more generalized _bind_mapper() (which will use mapper_zero if no explicit FROM is present). A short check for the particular recursive condition is added to BundleEntity and it no longer assigns itself as the "namespace" to its ColumnEntity objects which creates a reference cycle. Co-authored-by: Mike Bayer <mike_mp@zzzcomputing.com> Fixes: #4340 Change-Id: I649587e1c07b684ecd63f7d10054cd165891baf4 Pull-request: https://bitbucket.org/zzzeek/sqlalchemy/pull-requests/7
author: Jayson Reis <santosdosreis@gmail.com> 2018-10-01 12:58:46 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2018-10-10 13:28:02 -0400
commit: 21fbb5e38f04affb8ac4502428672b3a629c2bec (patch)
tree: bc7742dbb9181cd42f144125c0a4bcb277b876c8 /lib/sqlalchemy
parent: bb65193bffeb106e63dab535a024565d7fc2e26d (diff)
download: sqlalchemy-21fbb5e38f04affb8ac4502428672b3a629c2bec.tar.gz
4 files changed, 121 insertions, 33 deletions
diff --git a/lib/sqlalchemy/ext/horizontal_shard.py b/lib/sqlalchemy/ext/horizontal_shard.py
index 425d28963..f86e4fc93 100644
--- a/lib/sqlalchemy/ext/horizontal_shard.py
+++ b/lib/sqlalchemy/ext/horizontal_shard.py
@@ -45,7 +45,7 @@ class ShardedQuery(Query):
         def iter_for_shard(shard_id):
             context.attributes['shard_id'] = context.identity_token = shard_id
             result = self._connection_from_session(
-                mapper=self._mapper_zero(),
+                mapper=self._bind_mapper(),
                 shard_id=shard_id).execute(
                 context.statement,
                 self._params)
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index e96996a39..bfddb5cfe 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -4020,13 +4020,17 @@ class _BundleEntity(_QueryEntity):
                 if isinstance(expr, Bundle):
                     _BundleEntity(self, expr)
                 else:
-                    _ColumnEntity(self, expr, namespace=self)
+                    _ColumnEntity(self, expr)
 
         self.supports_single_entity = self.bundle.single_entity
 
     @property
     def mapper(self):
-        return self.entity_zero.mapper
+        ezero = self.entity_zero
+        if ezero is not None:
+            return ezero.mapper
+        else:
+            return None
 
     @property
     def entities(self):
diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py
index 27a75d45c..818f1c0ae 100644
--- a/lib/sqlalchemy/orm/relationships.py
+++ b/lib/sqlalchemy/orm/relationships.py
@@ -116,7 +116,8 @@ class RelationshipProperty(StrategizedProperty):
                  bake_queries=True,
                  _local_remote_pairs=None,
                  query_class=None,
-                 info=None):
+                 info=None,
+                 omit_join=None):
         """Provide a relationship between two mapped classes.
 
         This corresponds to a parent-child or associative table relationship.
@@ -816,6 +817,13 @@ class RelationshipProperty(StrategizedProperty):
           the full set of related objects, to prevent modifications of the
           collection from resulting in persistence operations.
 
+        :param omit_join:
+          Allows manual control over the "selectin" automatic join
+          optimization.  Set to ``False`` to disable the "omit join" feature
+          added in SQLAlchemy 1.3.
+
+          .. versionadded:: 1.3
+
 
         """
         super(RelationshipProperty, self).__init__()
@@ -843,6 +851,7 @@ class RelationshipProperty(StrategizedProperty):
         self.doc = doc
         self.active_history = active_history
         self.join_depth = join_depth
+        self.omit_join = omit_join
         self.local_remote_pairs = _local_remote_pairs
         self.extension = extension
         self.bake_queries = bake_queries
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index d7597d3b2..b9abf0647 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1837,8 +1837,8 @@ class JoinedLoader(AbstractRelationshipLoader):
 @properties.RelationshipProperty.strategy_for(lazy="selectin")
 class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots):
     __slots__ = (
-        'join_depth', '_parent_alias', '_in_expr', '_parent_pk_cols',
-        '_zero_idx', '_bakery'
+        'join_depth', 'omit_join', '_parent_alias', '_in_expr',
+        '_pk_cols', '_zero_idx', '_bakery'
     )
 
     _chunksize = 500
@@ -1846,9 +1846,46 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots):
     def __init__(self, parent, strategy_key):
         super(SelectInLoader, self).__init__(parent, strategy_key)
         self.join_depth = self.parent_property.join_depth
+
+        if self.parent_property.omit_join is not None:
+            self.omit_join = self.parent_property.omit_join
+        else:
+            lazyloader = self.parent_property._get_strategy(
+                (("lazy", "select"),))
+            self.omit_join = self.parent._get_clause[0].compare(
+                lazyloader._rev_lazywhere,
+                use_proxies=True,
+                equivalents=self.parent._equivalent_columns
+            )
+        if self.omit_join:
+            self._init_for_omit_join()
+        else:
+            self._init_for_join()
+
+    def _init_for_omit_join(self):
+        pk_to_fk = dict(
+            self.parent_property._join_condition.local_remote_pairs
+        )
+        pk_to_fk.update(
+            (equiv, pk_to_fk[k])
+            for k in list(pk_to_fk)
+            for equiv in self.parent._equivalent_columns.get(k, ())
+        )
+
+        self._pk_cols = fk_cols = [
+            pk_to_fk[col]
+            for col in self.parent.primary_key if col in pk_to_fk]
+        if len(fk_cols) > 1:
+            self._in_expr = sql.tuple_(*fk_cols)
+            self._zero_idx = False
+        else:
+            self._in_expr = fk_cols[0]
+            self._zero_idx = True
+
+    def _init_for_join(self):
         self._parent_alias = aliased(self.parent.class_)
         pa_insp = inspect(self._parent_alias)
-        self._parent_pk_cols = pk_cols = [
+        self._pk_cols = pk_cols = [
             pa_insp._adapt_element(col) for col in self.parent.primary_key]
         if len(pk_cols) > 1:
             self._in_expr = sql.tuple_(*pk_cols)
@@ -1922,8 +1959,24 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots):
             for state, overwrite in states
         ]
 
-        pk_cols = self._parent_pk_cols
-        pa = self._parent_alias
+        pk_cols = self._pk_cols
+        in_expr = self._in_expr
+
+        if self.omit_join:
+            # in "omit join" mode, the primary key column and the
+            # "in" expression are in terms of the related entity.  So
+            # if the related entity is polymorphic or otherwise aliased,
+            # we need to adapt our "_pk_cols" and "_in_expr" to that
+            # entity.   in non-"omit join" mode, these are against the
+            # parent entity and do not need adaption.
+            insp = inspect(effective_entity)
+            if insp.is_aliased_class:
+                pk_cols = [
+                    insp._adapt_element(col)
+                    for col in pk_cols
+                ]
+                in_expr = insp._adapt_element(in_expr)
+                pk_cols = [insp._adapt_element(col) for col in pk_cols]
 
         q = self._bakery(
             lambda session: session.query(
@@ -1931,15 +1984,30 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots):
             ), self
         )
 
+        if self.omit_join:
+            # the Bundle we have in the "omit_join" case is against raw, non
+            # annotated columns, so to ensure the Query knows its primary
+            # entity, we add it explictly.  If we made the Bundle against
+            # annotated columns, we hit a performance issue in this specific
+            # case, which is detailed in issue #4347.
+            q.add_criteria(lambda q: q.select_from(effective_entity))
+        else:
+            # in the non-omit_join case, the Bundle is against the annotated/
+            # mapped column of the parent entity, but the #4347 issue does not
+            # occur in this case.
+            pa = self._parent_alias
+            q.add_criteria(
+                lambda q: q.select_from(pa).join(
+                    getattr(pa, self.parent_property.key).of_type(
+                        effective_entity)
+                )
+            )
+
         q.add_criteria(
-            lambda q: q.select_from(pa).join(
-                getattr(pa,
-                        self.parent_property.key).of_type(effective_entity)).
-            filter(
-                self._in_expr.in_(
-                    sql.bindparam('primary_keys', expanding=True))
-            ).order_by(*pk_cols)
-        )
+            lambda q: q.filter(
+                in_expr.in_(
+                    sql.bindparam("primary_keys", expanding=True))
+            ).order_by(*pk_cols))
 
         orig_query = context.query
 
@@ -1954,23 +2022,30 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots):
             )
 
         if self.parent_property.order_by:
-            def _setup_outermost_orderby(q):
-                # imitate the same method that
-                # subquery eager loading does it, looking for the
-                # adapted "secondary" table
-                eagerjoin = q._from_obj[0]
-                eager_order_by = \
-                    eagerjoin._target_adapter.\
-                    copy_and_process(
-                        util.to_list(
-                            self.parent_property.order_by
+            if self.omit_join:
+                eager_order_by = self.parent_property.order_by
+                if insp.is_aliased_class:
+                    eager_order_by = [
+                        insp._adapt_element(elem) for elem in
+                        eager_order_by
+                    ]
+                q.add_criteria(
+                    lambda q: q.order_by(*eager_order_by)
+                )
+            else:
+                def _setup_outermost_orderby(q):
+                    # imitate the same method that subquery eager loading uses,
+                    # looking for the adapted "secondary" table
+                    eagerjoin = q._from_obj[0]
+                    eager_order_by = \
+                        eagerjoin._target_adapter.\
+                        copy_and_process(
+                            util.to_list(self.parent_property.order_by)
                         )
-                    )
-                return q.order_by(*eager_order_by)
-
-            q.add_criteria(
-                _setup_outermost_orderby
-            )
+                    return q.order_by(*eager_order_by)
+                q.add_criteria(
+                    _setup_outermost_orderby
+                )
 
         uselist = self.uselist
         _empty_result = () if uselist else None
author	Jayson Reis <santosdosreis@gmail.com>	2018-10-01 12:58:46 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2018-10-10 13:28:02 -0400
commit	21fbb5e38f04affb8ac4502428672b3a629c2bec (patch)
tree	bc7742dbb9181cd42f144125c0a4bcb277b876c8 /lib/sqlalchemy
parent	bb65193bffeb106e63dab535a024565d7fc2e26d (diff)
download	sqlalchemy-21fbb5e38f04affb8ac4502428672b3a629c2bec.tar.gz