summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2020-03-30 11:04:24 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2020-03-30 11:43:00 -0400
commitd3cc4cdcf10db1f50fb2806358532821bc6ed796 (patch)
treee0b47b4c1df0c4e1907f0c9a02a0aaf4ebb8a7e1 /lib/sqlalchemy
parentffcc16b114f14459f1b90b10e126fc31afef682a (diff)
downloadsqlalchemy-d3cc4cdcf10db1f50fb2806358532821bc6ed796.tar.gz
Remove ORDER BY pk from subqueryload, selectinload
Modified the queries used by subqueryload and selectinload to no longer ORDER BY the primary key of the parent entity; this ordering was there to allow the rows as they come in to be copied into lists directly with a minimal level of Python-side collation. However, these ORDER BY clauses can negatively impact the performance of the query as in many scenarios these columns are derived from a subquery or are otherwise not actual primary key columns such that SQL planners cannot make use of indexes. The Python-side collation uses the native itertools.group_by() to collate the incoming rows, and has been modified to allow multiple row-groups-per-parent to be assembled together using list.extend(), which should still allow for relatively fast Python-side performance. There will still be an ORDER BY present for a relationship that includes an explicit order_by parameter, however this is the only ORDER BY that will be added to the query for both kinds of loading. Fixes: #5162 Change-Id: I8befd1303c1af7cc24cbf005f39bc01c8b2745f3
Diffstat (limited to 'lib/sqlalchemy')
-rw-r--r--lib/sqlalchemy/orm/strategies.py24
1 files changed, 11 insertions, 13 deletions
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 4da601530..a55295fc7 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1142,7 +1142,7 @@ class SubqueryLoader(PostLoader):
to_join, local_attr, parent_alias = self._prep_for_joins(
left_alias, subq_path
)
- q = q.order_by(*local_attr)
+
q = q.add_columns(*local_attr)
q = self._apply_joins(
q, to_join, left_alias, parent_alias, effective_entity
@@ -1394,10 +1394,9 @@ class SubqueryLoader(PostLoader):
return self._data.get(key, default)
def _load(self):
- self._data = dict(
- (k, [vv[0] for vv in v])
- for k, v in itertools.groupby(self.subq, lambda x: x[1:])
- )
+ self._data = collections.defaultdict(list)
+ for k, v in itertools.groupby(self.subq, lambda x: x[1:]):
+ self._data[k].extend(vv[0] for vv in v)
def loader(self, state, dict_, row):
if self._data is None:
@@ -2428,7 +2427,7 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
q.add_criteria(
lambda q: q.filter(
in_expr.in_(sql.bindparam("primary_keys", expanding=True))
- ).order_by(*pk_cols)
+ )
)
orig_query = context.query
@@ -2525,13 +2524,12 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
for key, state, state_dict, overwrite in chunk
]
- data = {
- k: [vv[1] for vv in v]
- for k, v in itertools.groupby(
- q(context.session).params(primary_keys=primary_keys),
- lambda x: x[0],
- )
- }
+ data = collections.defaultdict(list)
+ for k, v in itertools.groupby(
+ q(context.session).params(primary_keys=primary_keys),
+ lambda x: x[0],
+ ):
+ data[k].extend(vv[1] for vv in v)
for key, state, state_dict, overwrite in chunk: