diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-10-05 18:27:44 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-10-07 11:26:10 -0400 |
| commit | e0396633e72bc09bd7cec715101d516ea87fa840 (patch) | |
| tree | 9486b4566942af674e669eddc38c14bee1e2ecfd /lib/sqlalchemy/sql | |
| parent | c6abd4766abb0396c9bf532d81d16226b970a35a (diff) | |
| download | sqlalchemy-e0396633e72bc09bd7cec715101d516ea87fa840.tar.gz | |
create second level deduping when use_labels is turned on
As of #4753 we allow duplicate columns. This creates some new
problems that there can be duplicate columns in a subquery
which are then not addressible on the outside because they
are ambiguous (Postgresql has this behavior at least). Additionally
it creates situations where we are making an anon label of an
anon label which is leaking into the query.
New logic for generating anon labels handles this situation and
also alters the .c collection
of a subquery such that we are only getting the first column
from the derived selectable that has that name, the subsequent ones
have a new deduping label with two underscores and are not exposed
in .c. The dedupe logic when rendering the columns will handle
duplicate label names for different columns, vs. the same column
repeated, as separate cases.
Fixes: #4892
Change-Id: I929fbd8da14bcc239e0481c24bbd9b5ce826e8fa
Diffstat (limited to 'lib/sqlalchemy/sql')
| -rw-r--r-- | lib/sqlalchemy/sql/elements.py | 12 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 57 |
2 files changed, 62 insertions, 7 deletions
diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 8ee157b6f..b6462b334 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -878,7 +878,13 @@ class ColumnElement( while self._is_clone_of is not None: self = self._is_clone_of - return _anonymous_label("%%(%d %s)s" % (id(self), seed or "anon")) + # as of 1.4 anonymous label for ColumnElement uses hash(), not id(), + # as the identifier, because a column and its annotated version are + # the same thing in a SQL statement + if isinstance(seed, _anonymous_label): + return _anonymous_label("%s%%(%d %s)s" % (seed, hash(self), "")) + + return _anonymous_label("%%(%d %s)s" % (hash(self), seed or "anon")) @util.memoized_property def anon_label(self): @@ -900,6 +906,10 @@ class ColumnElement( def _label_anon_label(self): return self._anon_label(getattr(self, "_label", None)) + @util.memoized_property + def _dedupe_label_anon_label(self): + return self._anon_label(getattr(self, "_label", "anon") + "_") + class WrapsColumnExpression(object): """Mixin that defines a :class:`.ColumnElement` as a wrapper with special diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 33ba95717..ddbcdf91d 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -4174,33 +4174,78 @@ class Select( (name_for_col(c), c) for c in cols ).as_immutable() - @_memoized_property - def _columns_plus_names(self): + def _generate_columns_plus_names(self, anon_for_dupe_key): cols = _select_iterables(self._raw_columns) + # when use_labels is on: + # in all cases == if we see the same label name, use _label_anon_label + # for subsequent occurences of that label + # + # anon_for_dupe_key == if we see the same column object multiple + # times under a particular name, whether it's the _label name or the + # anon label, apply _dedupe_label_anon_label to the subsequent + # occurrences of it. + if self.use_labels: - names = set() + names = {} def name_for_col(c): if c._label is None or not c._render_label_in_columns_clause: return (None, c) name = c._label + if name in names: - name = c._label_anon_label + # when looking to see if names[name] is the same column as + # c, use hash(), so that an annotated version of the column + # is seen as the same as the non-annotated + if hash(names[name]) != hash(c): + + # different column under the same name. apply + # disambiguating label + name = c._label_anon_label + + if anon_for_dupe_key and name in names: + # here, c._label_anon_label is definitely unique to + # that column identity (or annotated version), so + # this should always be true. + # this is also an infrequent codepath because + # you need two levels of duplication to be here + assert hash(names[name]) == hash(c) + + # the column under the disambiguating label is + # already present. apply the "dedupe" label to + # subsequent occurrences of the column so that the + # original stays non-ambiguous + name = c._dedupe_label_anon_label + else: + names[name] = c + elif anon_for_dupe_key: + # same column under the same name. apply the "dedupe" + # label so that the original stays non-ambiguous + name = c._dedupe_label_anon_label else: - names.add(name) + names[name] = c return name, c return [name_for_col(c) for c in cols] else: return [(None, c) for c in cols] + @_memoized_property + def _columns_plus_names(self): + """generate label names plus columns to render in a SELECT.""" + + return self._generate_columns_plus_names(True) + def _generate_fromclause_column_proxies(self, subquery): + """generate column proxies to place in the exported .c collection + of a subquery.""" + keys_seen = set() prox = [] - for name, c in self._columns_plus_names: + for name, c in self._generate_columns_plus_names(False): if not hasattr(c, "_make_proxy"): continue if name is None: |
