create second level deduping when use_labels is turned on

As of #4753 we allow duplicate columns. This creates some new problems that there can be duplicate columns in a subquery which are then not addressible on the outside because they are ambiguous (Postgresql has this behavior at least). Additionally it creates situations where we are making an anon label of an anon label which is leaking into the query. New logic for generating anon labels handles this situation and also alters the .c collection of a subquery such that we are only getting the first column from the derived selectable that has that name, the subsequent ones have a new deduping label with two underscores and are not exposed in .c. The dedupe logic when rendering the columns will handle duplicate label names for different columns, vs. the same column repeated, as separate cases. Fixes: #4892 Change-Id: I929fbd8da14bcc239e0481c24bbd9b5ce826e8fa
author: Mike Bayer <mike_mp@zzzcomputing.com> 2019-10-05 18:27:44 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2019-10-07 11:26:10 -0400
commit: e0396633e72bc09bd7cec715101d516ea87fa840 (patch)
tree: 9486b4566942af674e669eddc38c14bee1e2ecfd /lib/sqlalchemy/sql
parent: c6abd4766abb0396c9bf532d81d16226b970a35a (diff)
download: sqlalchemy-e0396633e72bc09bd7cec715101d516ea87fa840.tar.gz
2 files changed, 62 insertions, 7 deletions
diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py
index 8ee157b6f..b6462b334 100644
--- a/lib/sqlalchemy/sql/elements.py
+++ b/lib/sqlalchemy/sql/elements.py
@@ -878,7 +878,13 @@ class ColumnElement(
         while self._is_clone_of is not None:
             self = self._is_clone_of
 
-        return _anonymous_label("%%(%d %s)s" % (id(self), seed or "anon"))
+        # as of 1.4 anonymous label for ColumnElement uses hash(), not id(),
+        # as the identifier, because a column and its annotated version are
+        # the same thing in a SQL statement
+        if isinstance(seed, _anonymous_label):
+            return _anonymous_label("%s%%(%d %s)s" % (seed, hash(self), ""))
+
+        return _anonymous_label("%%(%d %s)s" % (hash(self), seed or "anon"))
 
     @util.memoized_property
     def anon_label(self):
@@ -900,6 +906,10 @@ class ColumnElement(
     def _label_anon_label(self):
         return self._anon_label(getattr(self, "_label", None))
 
+    @util.memoized_property
+    def _dedupe_label_anon_label(self):
+        return self._anon_label(getattr(self, "_label", "anon") + "_")
+
 
 class WrapsColumnExpression(object):
     """Mixin that defines a :class:`.ColumnElement` as a wrapper with special
diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py
index 33ba95717..ddbcdf91d 100644
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -4174,33 +4174,78 @@ class Select(
             (name_for_col(c), c) for c in cols
         ).as_immutable()
 
-    @_memoized_property
-    def _columns_plus_names(self):
+    def _generate_columns_plus_names(self, anon_for_dupe_key):
         cols = _select_iterables(self._raw_columns)
 
+        # when use_labels is on:
+        # in all cases == if we see the same label name, use _label_anon_label
+        # for subsequent occurences of that label
+        #
+        # anon_for_dupe_key == if we see the same column object multiple
+        # times under a particular name, whether it's the _label name or the
+        # anon label, apply _dedupe_label_anon_label to the subsequent
+        # occurrences of it.
+
         if self.use_labels:
-            names = set()
+            names = {}
 
             def name_for_col(c):
                 if c._label is None or not c._render_label_in_columns_clause:
                     return (None, c)
 
                 name = c._label
+
                 if name in names:
-                    name = c._label_anon_label
+                    # when looking to see if names[name] is the same column as
+                    # c, use hash(), so that an annotated version of the column
+                    # is seen as the same as the non-annotated
+                    if hash(names[name]) != hash(c):
+
+                        # different column under the same name.  apply
+                        # disambiguating label
+                        name = c._label_anon_label
+
+                        if anon_for_dupe_key and name in names:
+                            # here, c._label_anon_label is definitely unique to
+                            # that column identity (or annotated version), so
+                            # this should always be true.
+                            # this is also an infrequent codepath because
+                            # you need two levels of duplication to be here
+                            assert hash(names[name]) == hash(c)
+
+                            # the column under the disambiguating label is
+                            # already present.  apply the "dedupe" label to
+                            # subsequent occurrences of the column so that the
+                            # original stays non-ambiguous
+                            name = c._dedupe_label_anon_label
+                        else:
+                            names[name] = c
+                    elif anon_for_dupe_key:
+                        # same column under the same name. apply the "dedupe"
+                        # label so that the original stays non-ambiguous
+                        name = c._dedupe_label_anon_label
                 else:
-                    names.add(name)
+                    names[name] = c
                 return name, c
 
             return [name_for_col(c) for c in cols]
         else:
             return [(None, c) for c in cols]
 
+    @_memoized_property
+    def _columns_plus_names(self):
+        """generate label names plus columns to render in a SELECT."""
+
+        return self._generate_columns_plus_names(True)
+
     def _generate_fromclause_column_proxies(self, subquery):
+        """generate column proxies to place in the exported .c collection
+        of a subquery."""
+
         keys_seen = set()
         prox = []
 
-        for name, c in self._columns_plus_names:
+        for name, c in self._generate_columns_plus_names(False):
             if not hasattr(c, "_make_proxy"):
                 continue
             if name is None:
author	Mike Bayer <mike_mp@zzzcomputing.com>	2019-10-05 18:27:44 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2019-10-07 11:26:10 -0400
commit	e0396633e72bc09bd7cec715101d516ea87fa840 (patch)
tree	9486b4566942af674e669eddc38c14bee1e2ecfd /lib/sqlalchemy/sql
parent	c6abd4766abb0396c9bf532d81d16226b970a35a (diff)
download	sqlalchemy-e0396633e72bc09bd7cec715101d516ea87fa840.tar.gz