diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-03-30 11:04:24 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-03-30 11:43:00 -0400 |
| commit | d3cc4cdcf10db1f50fb2806358532821bc6ed796 (patch) | |
| tree | e0b47b4c1df0c4e1907f0c9a02a0aaf4ebb8a7e1 /lib/sqlalchemy | |
| parent | ffcc16b114f14459f1b90b10e126fc31afef682a (diff) | |
| download | sqlalchemy-d3cc4cdcf10db1f50fb2806358532821bc6ed796.tar.gz | |
Remove ORDER BY pk from subqueryload, selectinload
Modified the queries used by subqueryload and selectinload to no longer
ORDER BY the primary key of the parent entity; this ordering was there to
allow the rows as they come in to be copied into lists directly with a
minimal level of Python-side collation. However, these ORDER BY clauses
can negatively impact the performance of the query as in many scenarios
these columns are derived from a subquery or are otherwise not actual
primary key columns such that SQL planners cannot make use of indexes. The
Python-side collation uses the native itertools.group_by() to collate the
incoming rows, and has been modified to allow multiple
row-groups-per-parent to be assembled together using list.extend(), which
should still allow for relatively fast Python-side performance. There will
still be an ORDER BY present for a relationship that includes an explicit
order_by parameter, however this is the only ORDER BY that will be added to
the query for both kinds of loading.
Fixes: #5162
Change-Id: I8befd1303c1af7cc24cbf005f39bc01c8b2745f3
Diffstat (limited to 'lib/sqlalchemy')
| -rw-r--r-- | lib/sqlalchemy/orm/strategies.py | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 4da601530..a55295fc7 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -1142,7 +1142,7 @@ class SubqueryLoader(PostLoader): to_join, local_attr, parent_alias = self._prep_for_joins( left_alias, subq_path ) - q = q.order_by(*local_attr) + q = q.add_columns(*local_attr) q = self._apply_joins( q, to_join, left_alias, parent_alias, effective_entity @@ -1394,10 +1394,9 @@ class SubqueryLoader(PostLoader): return self._data.get(key, default) def _load(self): - self._data = dict( - (k, [vv[0] for vv in v]) - for k, v in itertools.groupby(self.subq, lambda x: x[1:]) - ) + self._data = collections.defaultdict(list) + for k, v in itertools.groupby(self.subq, lambda x: x[1:]): + self._data[k].extend(vv[0] for vv in v) def loader(self, state, dict_, row): if self._data is None: @@ -2428,7 +2427,7 @@ class SelectInLoader(PostLoader, util.MemoizedSlots): q.add_criteria( lambda q: q.filter( in_expr.in_(sql.bindparam("primary_keys", expanding=True)) - ).order_by(*pk_cols) + ) ) orig_query = context.query @@ -2525,13 +2524,12 @@ class SelectInLoader(PostLoader, util.MemoizedSlots): for key, state, state_dict, overwrite in chunk ] - data = { - k: [vv[1] for vv in v] - for k, v in itertools.groupby( - q(context.session).params(primary_keys=primary_keys), - lambda x: x[0], - ) - } + data = collections.defaultdict(list) + for k, v in itertools.groupby( + q(context.session).params(primary_keys=primary_keys), + lambda x: x[0], + ): + data[k].extend(vv[1] for vv in v) for key, state, state_dict, overwrite in chunk: |
