diff options
| -rw-r--r-- | doc/build/changelog/changelog_11.rst | 19 | ||||
| -rw-r--r-- | doc/build/changelog/migration_11.rst | 41 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/query.py | 26 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/util.py | 22 | ||||
| -rw-r--r-- | test/orm/test_query.py | 144 |
5 files changed, 229 insertions, 23 deletions
diff --git a/doc/build/changelog/changelog_11.rst b/doc/build/changelog/changelog_11.rst index 5c3ad7163..2473a02a2 100644 --- a/doc/build/changelog/changelog_11.rst +++ b/doc/build/changelog/changelog_11.rst @@ -22,6 +22,25 @@ :version: 1.1.0b1 .. change:: + :tags: bug, orm + :tickets: 3641 + + A refinement to the logic which adds columns to the resulting SQL when + :meth:`.Query.distinct` is combined with :meth:`.Query.order_by` such + that columns which are already present will not be added + a second time, even if they are labeled with a different name. + Regardless of this change, the extra columns added to the SQL have + never been returned in the final result, so this change only impacts + the string form of the statement as well as its behavior when used in + a Core execution context. Additionally, columns are no longer added + when the DISTINCT ON format is used, provided the query is not + wrapped inside a subquery due to joined eager loading. + + .. seealso:: + + :ref:`change_3641` + + .. change:: :tags: feature, sql :tickets: 3292, 3095 diff --git a/doc/build/changelog/migration_11.rst b/doc/build/changelog/migration_11.rst index 07223be34..3d65ede80 100644 --- a/doc/build/changelog/migration_11.rst +++ b/doc/build/changelog/migration_11.rst @@ -16,7 +16,7 @@ What's New in SQLAlchemy 1.1? some issues may be moved to later milestones in order to allow for a timely release. - Document last updated: January 29, 2016 + Document last updated: Feburary 9, 2016 Introduction ============ @@ -487,6 +487,45 @@ associated with any bound :class:`.Engine`, then the fallback to the :ticket:`3081` +.. _change_3641: + +Columns no longer added redundantly with DISTINCT + ORDER BY +------------------------------------------------------------ + +A query such as the following will now augment only those columns +that are missing from the SELECT list, without duplicates:: + + q = session.query(User.id, User.name.label('name')).\ + distinct().\ + order_by(User.id, User.name, User.fullname) + +Produces:: + + SELECT DISTINCT user.id AS a_id, user.name AS name, + user.fullname AS a_fullname + FROM a ORDER BY user.id, user.name, user.fullname + +Previously, it would produce:: + + SELECT DISTINCT user.id AS a_id, user.name AS name, user.name AS a_name, + user.fullname AS a_fullname + FROM a ORDER BY user.id, user.name, user.fullname + +Where above, the ``user.name`` column is added unnecessarily. The results +would not be affected, as the additional columns are not included in the +result in any case, but the columns are unnecessary. + +Additionally, when the Postgresql DISTINCT ON format is used by passing +expressions to :meth:`.Query.distinct`, the above "column adding" logic +is disabled entirely. + +When the query is being bundled into a subquery for the purposes of +joined eager loading, the "augment column list" rules are are necessarily +more aggressive so that the ORDER BY can still be satisifed, so this case +remains unchanged. + +:ticket:`3641` + New Features and Improvements - Core ==================================== diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 08600c357..8a25f570a 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -3254,12 +3254,11 @@ class Query(object): # then append eager joins onto that if context.order_by: - order_by_col_expr = list( - chain(*[ - sql_util.unwrap_order_by(o) - for o in context.order_by - ]) - ) + order_by_col_expr = \ + sql_util.expand_column_list_from_order_by( + context.primary_columns, + context.order_by + ) else: context.order_by = None order_by_col_expr = [] @@ -3319,15 +3318,12 @@ class Query(object): if not context.order_by: context.order_by = None - if self._distinct and context.order_by: - order_by_col_expr = list( - chain(*[ - sql_util.unwrap_order_by(o) - for o in context.order_by - ]) - ) - context.primary_columns += order_by_col_expr - + if self._distinct is True and context.order_by: + context.primary_columns += \ + sql_util.expand_column_list_from_order_by( + context.primary_columns, + context.order_by + ) context.froms += tuple(context.eager_joins.values()) statement = sql.select( diff --git a/lib/sqlalchemy/sql/util.py b/lib/sqlalchemy/sql/util.py index 1dcf0ee66..7e294d85f 100644 --- a/lib/sqlalchemy/sql/util.py +++ b/lib/sqlalchemy/sql/util.py @@ -176,6 +176,28 @@ def unwrap_order_by(clause): return result +def expand_column_list_from_order_by(collist, order_by): + """Given the columns clause and ORDER BY of a selectable, + return a list of column expressions that can be added to the collist + corresponding to the ORDER BY, without repeating those already + in the collist. + + """ + cols_already_present = set([ + col.element if col._order_by_label_element is not None + else col for col in collist + ]) + + return [ + col for col in + chain(*[ + unwrap_order_by(o) + for o in order_by + ]) + if col not in cols_already_present + ] + + def clause_is_present(clause, search): """Given a target clause and a second to search within, return True if the target is plainly present in the search without any diff --git a/test/orm/test_query.py b/test/orm/test_query.py index 6445ffefd..c4c62c319 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -2695,7 +2695,9 @@ class CountTest(QueryTest): eq_(q.distinct().count(), 3) -class DistinctTest(QueryTest): +class DistinctTest(QueryTest, AssertsCompiledSQL): + __dialect__ = 'default' + def test_basic(self): User = self.classes.User @@ -2709,19 +2711,22 @@ class DistinctTest(QueryTest): order_by(desc(User.name)).all() ) - def test_joined(self): - """test that orderbys from a joined table get placed into the columns - clause when DISTINCT is used""" - + def test_columns_augmented_roundtrip_one(self): User, Address = self.classes.User, self.classes.Address sess = create_session() q = sess.query(User).join('addresses').distinct(). \ order_by(desc(Address.email_address)) - assert [User(id=7), User(id=9), User(id=8)] == q.all() + eq_( + [User(id=7), User(id=9), User(id=8)], + q.all() + ) + + def test_columns_augmented_roundtrip_two(self): + User, Address = self.classes.User, self.classes.Address - sess.expunge_all() + sess = create_session() # test that it works on embedded joinedload/LIMIT subquery q = sess.query(User).join('addresses').distinct(). \ @@ -2739,6 +2744,131 @@ class DistinctTest(QueryTest): ] == q.all() self.assert_sql_count(testing.db, go, 1) + def test_columns_augmented_roundtrip_three(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = sess.query(User.id, User.name.label('foo'), Address.id).\ + filter(User.name == 'jack').\ + distinct().\ + order_by(User.id, User.name, Address.email_address) + + # even though columns are added, they aren't in the result + eq_( + q.all(), + [(7, 'jack', 3), (7, 'jack', 4), (7, 'jack', 2), + (7, 'jack', 5), (7, 'jack', 1)] + ) + for row in q: + eq_(row.keys(), ['id', 'foo', 'id']) + + def test_columns_augmented_sql_one(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = sess.query(User.id, User.name.label('foo'), Address.id).\ + distinct().\ + order_by(User.id, User.name, Address.email_address) + + # Address.email_address is added because of DISTINCT, + # however User.id, User.name are not b.c. they're already there, + # even though User.name is labeled + self.assert_compile( + q, + "SELECT DISTINCT users.id AS users_id, users.name AS foo, " + "addresses.id AS addresses_id, " + "addresses.email_address AS addresses_email_address FROM users, " + "addresses ORDER BY users.id, users.name, addresses.email_address" + ) + + def test_columns_augmented_sql_two(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = sess.query(User).\ + options(joinedload(User.addresses)).\ + distinct().\ + order_by(User.name, Address.email_address).\ + limit(5) + + # addresses.email_address is added to inner query so that + # it is available in ORDER BY + self.assert_compile( + q, + "SELECT anon_1.users_id AS anon_1_users_id, " + "anon_1.users_name AS anon_1_users_name, " + "anon_1.addresses_email_address AS " + "anon_1_addresses_email_address, " + "addresses_1.id AS addresses_1_id, " + "addresses_1.user_id AS addresses_1_user_id, " + "addresses_1.email_address AS addresses_1_email_address " + "FROM (SELECT DISTINCT users.id AS users_id, " + "users.name AS users_name, " + "addresses.email_address AS addresses_email_address " + "FROM users, addresses " + "ORDER BY users.name, addresses.email_address " + "LIMIT :param_1) AS anon_1 LEFT OUTER JOIN " + "addresses AS addresses_1 " + "ON anon_1.users_id = addresses_1.user_id " + "ORDER BY anon_1.users_name, " + "anon_1.addresses_email_address, addresses_1.id" + ) + + def test_columns_augmented_sql_three(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = sess.query(User.id, User.name.label('foo'), Address.id).\ + distinct(User.name).\ + order_by(User.id, User.name, Address.email_address) + + # no columns are added when DISTINCT ON is used + self.assert_compile( + q, + "SELECT DISTINCT ON (users.name) users.id AS users_id, " + "users.name AS foo, addresses.id AS addresses_id FROM users, " + "addresses ORDER BY users.id, users.name, addresses.email_address", + dialect='postgresql' + ) + + def test_columns_augmented_sql_four(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = sess.query(User).join('addresses').\ + distinct(Address.email_address). \ + options(joinedload('addresses')).\ + order_by(desc(Address.email_address)).limit(2) + + # but for the subquery / eager load case, we still need to make + # the inner columns available for the ORDER BY even though its + # a DISTINCT ON + self.assert_compile( + q, + "SELECT anon_1.users_id AS anon_1_users_id, " + "anon_1.users_name AS anon_1_users_name, " + "anon_1.addresses_email_address AS " + "anon_1_addresses_email_address, " + "addresses_1.id AS addresses_1_id, " + "addresses_1.user_id AS addresses_1_user_id, " + "addresses_1.email_address AS addresses_1_email_address " + "FROM (SELECT DISTINCT ON (addresses.email_address) " + "users.id AS users_id, users.name AS users_name, " + "addresses.email_address AS addresses_email_address " + "FROM users JOIN addresses ON users.id = addresses.user_id " + "ORDER BY addresses.email_address DESC " + "LIMIT %(param_1)s) AS anon_1 " + "LEFT OUTER JOIN addresses AS addresses_1 " + "ON anon_1.users_id = addresses_1.user_id " + "ORDER BY anon_1.addresses_email_address DESC, addresses_1.id", + dialect='postgresql' + ) + class PrefixWithTest(QueryTest, AssertsCompiledSQL): |
