5 files changed, 229 insertions, 23 deletions
diff --git a/doc/build/changelog/changelog_11.rst b/doc/build/changelog/changelog_11.rst
index 5c3ad7163..2473a02a2 100644
--- a/doc/build/changelog/changelog_11.rst
+++ b/doc/build/changelog/changelog_11.rst
@@ -22,6 +22,25 @@
     :version: 1.1.0b1
 
     .. change::
+        :tags: bug, orm
+        :tickets: 3641
+
+        A refinement to the logic which adds columns to the resulting SQL when
+        :meth:`.Query.distinct` is combined with :meth:`.Query.order_by` such
+        that columns which are already present will not be added
+        a second time, even if they are labeled with a different name.
+        Regardless of this change, the extra columns added to the SQL have
+        never been returned in the final result, so this change only impacts
+        the string form of the statement as well as its behavior when used in
+        a Core execution context.   Additionally, columns are no longer added
+        when the DISTINCT ON format is used, provided the query is not
+        wrapped inside a subquery due to joined eager loading.
+
+        .. seealso::
+
+            :ref:`change_3641`
+
+    .. change::
         :tags: feature, sql
         :tickets: 3292, 3095
 
diff --git a/doc/build/changelog/migration_11.rst b/doc/build/changelog/migration_11.rst
index 07223be34..3d65ede80 100644
--- a/doc/build/changelog/migration_11.rst
+++ b/doc/build/changelog/migration_11.rst
@@ -16,7 +16,7 @@ What's New in SQLAlchemy 1.1?
     some issues may be moved to later milestones in order to allow
     for a timely release.
 
-    Document last updated: January 29, 2016
+    Document last updated: Feburary 9, 2016
 
 Introduction
 ============
@@ -487,6 +487,45 @@ associated with any bound :class:`.Engine`, then the fallback to the
 
 :ticket:`3081`
 
+.. _change_3641:
+
+Columns no longer added redundantly with DISTINCT + ORDER BY
+------------------------------------------------------------
+
+A query such as the following will now augment only those columns
+that are missing from the SELECT list, without duplicates::
+
+    q = session.query(User.id, User.name.label('name')).\
+        distinct().\
+        order_by(User.id, User.name, User.fullname)
+
+Produces::
+
+    SELECT DISTINCT user.id AS a_id, user.name AS name,
+     user.fullname AS a_fullname
+    FROM a ORDER BY user.id, user.name, user.fullname
+
+Previously, it would produce::
+
+    SELECT DISTINCT user.id AS a_id, user.name AS name, user.name AS a_name,
+      user.fullname AS a_fullname
+    FROM a ORDER BY user.id, user.name, user.fullname
+
+Where above, the ``user.name`` column is added unnecessarily.  The results
+would not be affected, as the additional columns are not included in the
+result in any case, but the columns are unnecessary.
+
+Additionally, when the Postgresql DISTINCT ON format is used by passing
+expressions to :meth:`.Query.distinct`, the above "column adding" logic
+is disabled entirely.
+
+When the query is being bundled into a subquery for the purposes of
+joined eager loading, the "augment column list" rules are are necessarily
+more aggressive so that the ORDER BY can still be satisifed, so this case
+remains unchanged.
+
+:ticket:`3641`
+
 New Features and Improvements - Core
 ====================================
 
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 08600c357..8a25f570a 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -3254,12 +3254,11 @@ class Query(object):
         # then append eager joins onto that
 
         if context.order_by:
-            order_by_col_expr = list(
-                chain(*[
-                    sql_util.unwrap_order_by(o)
-                    for o in context.order_by
-                ])
-            )
+            order_by_col_expr = \
+                sql_util.expand_column_list_from_order_by(
+                    context.primary_columns,
+                    context.order_by
+                )
         else:
             context.order_by = None
             order_by_col_expr = []
@@ -3319,15 +3318,12 @@ class Query(object):
         if not context.order_by:
             context.order_by = None
 
-        if self._distinct and context.order_by:
-            order_by_col_expr = list(
-                chain(*[
-                    sql_util.unwrap_order_by(o)
-                    for o in context.order_by
-                ])
-            )
-            context.primary_columns += order_by_col_expr
-
+        if self._distinct is True and context.order_by:
+            context.primary_columns += \
+                sql_util.expand_column_list_from_order_by(
+                    context.primary_columns,
+                    context.order_by
+                )
         context.froms += tuple(context.eager_joins.values())
 
         statement = sql.select(
diff --git a/lib/sqlalchemy/sql/util.py b/lib/sqlalchemy/sql/util.py
index 1dcf0ee66..7e294d85f 100644
--- a/lib/sqlalchemy/sql/util.py
+++ b/lib/sqlalchemy/sql/util.py
@@ -176,6 +176,28 @@ def unwrap_order_by(clause):
     return result
 
 
+def expand_column_list_from_order_by(collist, order_by):
+    """Given the columns clause and ORDER BY of a selectable,
+    return a list of column expressions that can be added to the collist
+    corresponding to the ORDER BY, without repeating those already
+    in the collist.
+
+    """
+    cols_already_present = set([
+        col.element if col._order_by_label_element is not None
+        else col for col in collist
+    ])
+
+    return [
+        col for col in
+        chain(*[
+            unwrap_order_by(o)
+            for o in order_by
+        ])
+        if col not in cols_already_present
+    ]
+
+
 def clause_is_present(clause, search):
     """Given a target clause and a second to search within, return True
     if the target is plainly present in the search without any
diff --git a/test/orm/test_query.py b/test/orm/test_query.py
index 6445ffefd..c4c62c319 100644
--- a/test/orm/test_query.py
+++ b/test/orm/test_query.py
@@ -2695,7 +2695,9 @@ class CountTest(QueryTest):
         eq_(q.distinct().count(), 3)
 
 
-class DistinctTest(QueryTest):
+class DistinctTest(QueryTest, AssertsCompiledSQL):
+    __dialect__ = 'default'
+
     def test_basic(self):
         User = self.classes.User
 
@@ -2709,19 +2711,22 @@ class DistinctTest(QueryTest):
             order_by(desc(User.name)).all()
         )
 
-    def test_joined(self):
-        """test that orderbys from a joined table get placed into the columns
-        clause when DISTINCT is used"""
-
+    def test_columns_augmented_roundtrip_one(self):
         User, Address = self.classes.User, self.classes.Address
 
         sess = create_session()
         q = sess.query(User).join('addresses').distinct(). \
             order_by(desc(Address.email_address))
 
-        assert [User(id=7), User(id=9), User(id=8)] == q.all()
+        eq_(
+            [User(id=7), User(id=9), User(id=8)],
+            q.all()
+        )
+
+    def test_columns_augmented_roundtrip_two(self):
+        User, Address = self.classes.User, self.classes.Address
 
-        sess.expunge_all()
+        sess = create_session()
 
         # test that it works on embedded joinedload/LIMIT subquery
         q = sess.query(User).join('addresses').distinct(). \
@@ -2739,6 +2744,131 @@ class DistinctTest(QueryTest):
             ] == q.all()
         self.assert_sql_count(testing.db, go, 1)
 
+    def test_columns_augmented_roundtrip_three(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = sess.query(User.id, User.name.label('foo'), Address.id).\
+            filter(User.name == 'jack').\
+            distinct().\
+            order_by(User.id, User.name, Address.email_address)
+
+        # even though columns are added, they aren't in the result
+        eq_(
+            q.all(),
+            [(7, 'jack', 3), (7, 'jack', 4), (7, 'jack', 2),
+             (7, 'jack', 5), (7, 'jack', 1)]
+        )
+        for row in q:
+            eq_(row.keys(), ['id', 'foo', 'id'])
+
+    def test_columns_augmented_sql_one(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = sess.query(User.id, User.name.label('foo'), Address.id).\
+            distinct().\
+            order_by(User.id, User.name, Address.email_address)
+
+        # Address.email_address is added because of DISTINCT,
+        # however User.id, User.name are not b.c. they're already there,
+        # even though User.name is labeled
+        self.assert_compile(
+            q,
+            "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
+            "addresses.id AS addresses_id, "
+            "addresses.email_address AS addresses_email_address FROM users, "
+            "addresses ORDER BY users.id, users.name, addresses.email_address"
+        )
+
+    def test_columns_augmented_sql_two(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = sess.query(User).\
+            options(joinedload(User.addresses)).\
+            distinct().\
+            order_by(User.name, Address.email_address).\
+            limit(5)
+
+        # addresses.email_address is added to inner query so that
+        # it is available in ORDER BY
+        self.assert_compile(
+            q,
+            "SELECT anon_1.users_id AS anon_1_users_id, "
+            "anon_1.users_name AS anon_1_users_name, "
+            "anon_1.addresses_email_address AS "
+            "anon_1_addresses_email_address, "
+            "addresses_1.id AS addresses_1_id, "
+            "addresses_1.user_id AS addresses_1_user_id, "
+            "addresses_1.email_address AS addresses_1_email_address "
+            "FROM (SELECT DISTINCT users.id AS users_id, "
+            "users.name AS users_name, "
+            "addresses.email_address AS addresses_email_address "
+            "FROM users, addresses "
+            "ORDER BY users.name, addresses.email_address "
+            "LIMIT :param_1) AS anon_1 LEFT OUTER JOIN "
+            "addresses AS addresses_1 "
+            "ON anon_1.users_id = addresses_1.user_id "
+            "ORDER BY anon_1.users_name, "
+            "anon_1.addresses_email_address, addresses_1.id"
+        )
+
+    def test_columns_augmented_sql_three(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = sess.query(User.id, User.name.label('foo'), Address.id).\
+            distinct(User.name).\
+            order_by(User.id, User.name, Address.email_address)
+
+        # no columns are added when DISTINCT ON is used
+        self.assert_compile(
+            q,
+            "SELECT DISTINCT ON (users.name) users.id AS users_id, "
+            "users.name AS foo, addresses.id AS addresses_id FROM users, "
+            "addresses ORDER BY users.id, users.name, addresses.email_address",
+            dialect='postgresql'
+        )
+
+    def test_columns_augmented_sql_four(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = sess.query(User).join('addresses').\
+            distinct(Address.email_address). \
+            options(joinedload('addresses')).\
+            order_by(desc(Address.email_address)).limit(2)
+
+        # but for the subquery / eager load case, we still need to make
+        # the inner columns available for the ORDER BY even though its
+        # a DISTINCT ON
+        self.assert_compile(
+            q,
+            "SELECT anon_1.users_id AS anon_1_users_id, "
+            "anon_1.users_name AS anon_1_users_name, "
+            "anon_1.addresses_email_address AS "
+            "anon_1_addresses_email_address, "
+            "addresses_1.id AS addresses_1_id, "
+            "addresses_1.user_id AS addresses_1_user_id, "
+            "addresses_1.email_address AS addresses_1_email_address "
+            "FROM (SELECT DISTINCT ON (addresses.email_address) "
+            "users.id AS users_id, users.name AS users_name, "
+            "addresses.email_address AS addresses_email_address "
+            "FROM users JOIN addresses ON users.id = addresses.user_id "
+            "ORDER BY addresses.email_address DESC  "
+            "LIMIT %(param_1)s) AS anon_1 "
+            "LEFT OUTER JOIN addresses AS addresses_1 "
+            "ON anon_1.users_id = addresses_1.user_id "
+            "ORDER BY anon_1.addresses_email_address DESC, addresses_1.id",
+            dialect='postgresql'
+        )
+
 
 class PrefixWithTest(QueryTest, AssertsCompiledSQL):