summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/build/changelog/changelog_11.rst19
-rw-r--r--doc/build/changelog/migration_11.rst41
-rw-r--r--lib/sqlalchemy/orm/query.py26
-rw-r--r--lib/sqlalchemy/sql/util.py22
-rw-r--r--test/orm/test_query.py144
5 files changed, 229 insertions, 23 deletions
diff --git a/doc/build/changelog/changelog_11.rst b/doc/build/changelog/changelog_11.rst
index 5c3ad7163..2473a02a2 100644
--- a/doc/build/changelog/changelog_11.rst
+++ b/doc/build/changelog/changelog_11.rst
@@ -22,6 +22,25 @@
:version: 1.1.0b1
.. change::
+ :tags: bug, orm
+ :tickets: 3641
+
+ A refinement to the logic which adds columns to the resulting SQL when
+ :meth:`.Query.distinct` is combined with :meth:`.Query.order_by` such
+ that columns which are already present will not be added
+ a second time, even if they are labeled with a different name.
+ Regardless of this change, the extra columns added to the SQL have
+ never been returned in the final result, so this change only impacts
+ the string form of the statement as well as its behavior when used in
+ a Core execution context. Additionally, columns are no longer added
+ when the DISTINCT ON format is used, provided the query is not
+ wrapped inside a subquery due to joined eager loading.
+
+ .. seealso::
+
+ :ref:`change_3641`
+
+ .. change::
:tags: feature, sql
:tickets: 3292, 3095
diff --git a/doc/build/changelog/migration_11.rst b/doc/build/changelog/migration_11.rst
index 07223be34..3d65ede80 100644
--- a/doc/build/changelog/migration_11.rst
+++ b/doc/build/changelog/migration_11.rst
@@ -16,7 +16,7 @@ What's New in SQLAlchemy 1.1?
some issues may be moved to later milestones in order to allow
for a timely release.
- Document last updated: January 29, 2016
+ Document last updated: Feburary 9, 2016
Introduction
============
@@ -487,6 +487,45 @@ associated with any bound :class:`.Engine`, then the fallback to the
:ticket:`3081`
+.. _change_3641:
+
+Columns no longer added redundantly with DISTINCT + ORDER BY
+------------------------------------------------------------
+
+A query such as the following will now augment only those columns
+that are missing from the SELECT list, without duplicates::
+
+ q = session.query(User.id, User.name.label('name')).\
+ distinct().\
+ order_by(User.id, User.name, User.fullname)
+
+Produces::
+
+ SELECT DISTINCT user.id AS a_id, user.name AS name,
+ user.fullname AS a_fullname
+ FROM a ORDER BY user.id, user.name, user.fullname
+
+Previously, it would produce::
+
+ SELECT DISTINCT user.id AS a_id, user.name AS name, user.name AS a_name,
+ user.fullname AS a_fullname
+ FROM a ORDER BY user.id, user.name, user.fullname
+
+Where above, the ``user.name`` column is added unnecessarily. The results
+would not be affected, as the additional columns are not included in the
+result in any case, but the columns are unnecessary.
+
+Additionally, when the Postgresql DISTINCT ON format is used by passing
+expressions to :meth:`.Query.distinct`, the above "column adding" logic
+is disabled entirely.
+
+When the query is being bundled into a subquery for the purposes of
+joined eager loading, the "augment column list" rules are are necessarily
+more aggressive so that the ORDER BY can still be satisifed, so this case
+remains unchanged.
+
+:ticket:`3641`
+
New Features and Improvements - Core
====================================
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 08600c357..8a25f570a 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -3254,12 +3254,11 @@ class Query(object):
# then append eager joins onto that
if context.order_by:
- order_by_col_expr = list(
- chain(*[
- sql_util.unwrap_order_by(o)
- for o in context.order_by
- ])
- )
+ order_by_col_expr = \
+ sql_util.expand_column_list_from_order_by(
+ context.primary_columns,
+ context.order_by
+ )
else:
context.order_by = None
order_by_col_expr = []
@@ -3319,15 +3318,12 @@ class Query(object):
if not context.order_by:
context.order_by = None
- if self._distinct and context.order_by:
- order_by_col_expr = list(
- chain(*[
- sql_util.unwrap_order_by(o)
- for o in context.order_by
- ])
- )
- context.primary_columns += order_by_col_expr
-
+ if self._distinct is True and context.order_by:
+ context.primary_columns += \
+ sql_util.expand_column_list_from_order_by(
+ context.primary_columns,
+ context.order_by
+ )
context.froms += tuple(context.eager_joins.values())
statement = sql.select(
diff --git a/lib/sqlalchemy/sql/util.py b/lib/sqlalchemy/sql/util.py
index 1dcf0ee66..7e294d85f 100644
--- a/lib/sqlalchemy/sql/util.py
+++ b/lib/sqlalchemy/sql/util.py
@@ -176,6 +176,28 @@ def unwrap_order_by(clause):
return result
+def expand_column_list_from_order_by(collist, order_by):
+ """Given the columns clause and ORDER BY of a selectable,
+ return a list of column expressions that can be added to the collist
+ corresponding to the ORDER BY, without repeating those already
+ in the collist.
+
+ """
+ cols_already_present = set([
+ col.element if col._order_by_label_element is not None
+ else col for col in collist
+ ])
+
+ return [
+ col for col in
+ chain(*[
+ unwrap_order_by(o)
+ for o in order_by
+ ])
+ if col not in cols_already_present
+ ]
+
+
def clause_is_present(clause, search):
"""Given a target clause and a second to search within, return True
if the target is plainly present in the search without any
diff --git a/test/orm/test_query.py b/test/orm/test_query.py
index 6445ffefd..c4c62c319 100644
--- a/test/orm/test_query.py
+++ b/test/orm/test_query.py
@@ -2695,7 +2695,9 @@ class CountTest(QueryTest):
eq_(q.distinct().count(), 3)
-class DistinctTest(QueryTest):
+class DistinctTest(QueryTest, AssertsCompiledSQL):
+ __dialect__ = 'default'
+
def test_basic(self):
User = self.classes.User
@@ -2709,19 +2711,22 @@ class DistinctTest(QueryTest):
order_by(desc(User.name)).all()
)
- def test_joined(self):
- """test that orderbys from a joined table get placed into the columns
- clause when DISTINCT is used"""
-
+ def test_columns_augmented_roundtrip_one(self):
User, Address = self.classes.User, self.classes.Address
sess = create_session()
q = sess.query(User).join('addresses').distinct(). \
order_by(desc(Address.email_address))
- assert [User(id=7), User(id=9), User(id=8)] == q.all()
+ eq_(
+ [User(id=7), User(id=9), User(id=8)],
+ q.all()
+ )
+
+ def test_columns_augmented_roundtrip_two(self):
+ User, Address = self.classes.User, self.classes.Address
- sess.expunge_all()
+ sess = create_session()
# test that it works on embedded joinedload/LIMIT subquery
q = sess.query(User).join('addresses').distinct(). \
@@ -2739,6 +2744,131 @@ class DistinctTest(QueryTest):
] == q.all()
self.assert_sql_count(testing.db, go, 1)
+ def test_columns_augmented_roundtrip_three(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ filter(User.name == 'jack').\
+ distinct().\
+ order_by(User.id, User.name, Address.email_address)
+
+ # even though columns are added, they aren't in the result
+ eq_(
+ q.all(),
+ [(7, 'jack', 3), (7, 'jack', 4), (7, 'jack', 2),
+ (7, 'jack', 5), (7, 'jack', 1)]
+ )
+ for row in q:
+ eq_(row.keys(), ['id', 'foo', 'id'])
+
+ def test_columns_augmented_sql_one(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ distinct().\
+ order_by(User.id, User.name, Address.email_address)
+
+ # Address.email_address is added because of DISTINCT,
+ # however User.id, User.name are not b.c. they're already there,
+ # even though User.name is labeled
+ self.assert_compile(
+ q,
+ "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
+ "addresses.id AS addresses_id, "
+ "addresses.email_address AS addresses_email_address FROM users, "
+ "addresses ORDER BY users.id, users.name, addresses.email_address"
+ )
+
+ def test_columns_augmented_sql_two(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User).\
+ options(joinedload(User.addresses)).\
+ distinct().\
+ order_by(User.name, Address.email_address).\
+ limit(5)
+
+ # addresses.email_address is added to inner query so that
+ # it is available in ORDER BY
+ self.assert_compile(
+ q,
+ "SELECT anon_1.users_id AS anon_1_users_id, "
+ "anon_1.users_name AS anon_1_users_name, "
+ "anon_1.addresses_email_address AS "
+ "anon_1_addresses_email_address, "
+ "addresses_1.id AS addresses_1_id, "
+ "addresses_1.user_id AS addresses_1_user_id, "
+ "addresses_1.email_address AS addresses_1_email_address "
+ "FROM (SELECT DISTINCT users.id AS users_id, "
+ "users.name AS users_name, "
+ "addresses.email_address AS addresses_email_address "
+ "FROM users, addresses "
+ "ORDER BY users.name, addresses.email_address "
+ "LIMIT :param_1) AS anon_1 LEFT OUTER JOIN "
+ "addresses AS addresses_1 "
+ "ON anon_1.users_id = addresses_1.user_id "
+ "ORDER BY anon_1.users_name, "
+ "anon_1.addresses_email_address, addresses_1.id"
+ )
+
+ def test_columns_augmented_sql_three(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ distinct(User.name).\
+ order_by(User.id, User.name, Address.email_address)
+
+ # no columns are added when DISTINCT ON is used
+ self.assert_compile(
+ q,
+ "SELECT DISTINCT ON (users.name) users.id AS users_id, "
+ "users.name AS foo, addresses.id AS addresses_id FROM users, "
+ "addresses ORDER BY users.id, users.name, addresses.email_address",
+ dialect='postgresql'
+ )
+
+ def test_columns_augmented_sql_four(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User).join('addresses').\
+ distinct(Address.email_address). \
+ options(joinedload('addresses')).\
+ order_by(desc(Address.email_address)).limit(2)
+
+ # but for the subquery / eager load case, we still need to make
+ # the inner columns available for the ORDER BY even though its
+ # a DISTINCT ON
+ self.assert_compile(
+ q,
+ "SELECT anon_1.users_id AS anon_1_users_id, "
+ "anon_1.users_name AS anon_1_users_name, "
+ "anon_1.addresses_email_address AS "
+ "anon_1_addresses_email_address, "
+ "addresses_1.id AS addresses_1_id, "
+ "addresses_1.user_id AS addresses_1_user_id, "
+ "addresses_1.email_address AS addresses_1_email_address "
+ "FROM (SELECT DISTINCT ON (addresses.email_address) "
+ "users.id AS users_id, users.name AS users_name, "
+ "addresses.email_address AS addresses_email_address "
+ "FROM users JOIN addresses ON users.id = addresses.user_id "
+ "ORDER BY addresses.email_address DESC "
+ "LIMIT %(param_1)s) AS anon_1 "
+ "LEFT OUTER JOIN addresses AS addresses_1 "
+ "ON anon_1.users_id = addresses_1.user_id "
+ "ORDER BY anon_1.addresses_email_address DESC, addresses_1.id",
+ dialect='postgresql'
+ )
+
class PrefixWithTest(QueryTest, AssertsCompiledSQL):