diff options
| -rw-r--r-- | doc/build/changelog/changelog_08.rst | 19 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/relationships.py | 23 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/strategies.py | 26 | ||||
| -rw-r--r-- | test/orm/test_subquery_relations.py | 194 |
4 files changed, 257 insertions, 5 deletions
diff --git a/doc/build/changelog/changelog_08.rst b/doc/build/changelog/changelog_08.rst index f37c0ec51..fb407b86f 100644 --- a/doc/build/changelog/changelog_08.rst +++ b/doc/build/changelog/changelog_08.rst @@ -11,6 +11,25 @@ :version: 0.8.3 .. change:: + :tags: feature, orm + :tickets: 2836 + :versions: 0.9.0 + + Added new option to :func:`.relationship` ``distinct_target_key``. + This enables the subquery eager loader strategy to apply a DISTINCT + to the innermost SELECT subquery, to assist in the case where + duplicate rows are generated by the innermost query which corresponds + to this relationship (there's not yet a general solution to the issue + of dupe rows within subquery eager loading, however, when joins outside + of the innermost subquery produce dupes). When the flag + is set to ``True``, the DISTINCT is rendered unconditionally, and when + it is set to ``None``, DISTINCT is rendered if the innermost relationship + targets columns that do not comprise a full primary key. + The option defaults to False in 0.8 (e.g. off by default in all cases), + None in 0.9 (e.g. automatic by default). Thanks to Alexander + for help with this. + + .. change:: :tags: bug, mysql :tickets: 2515 :versions: 0.9.0 diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py index 2393df26b..dbc37a4eb 100644 --- a/lib/sqlalchemy/orm/relationships.py +++ b/lib/sqlalchemy/orm/relationships.py @@ -103,6 +103,7 @@ class RelationshipProperty(StrategizedProperty): enable_typechecks=True, join_depth=None, comparator_factory=None, single_parent=False, innerjoin=False, + distinct_target_key=None, doc=None, active_history=False, cascade_backrefs=True, @@ -372,6 +373,27 @@ class RelationshipProperty(StrategizedProperty): or when the reference is one-to-one or a collection that is guaranteed to have one or at least one entry. + :param distinct_target_key=None: + Indicate if a "subquery" eager load should apply the DISTINCT + keyword to the innermost SELECT statement. When left as ``None``, + the DISTINCT keyword will be applied in those cases when the target + columns do not comprise the full primary key of the target table. + When set to ``True``, the DISTINCT keyword is applied to the innermost + SELECT unconditionally. + + It may be desirable to set this flag to False when the DISTINCT is + reducing performance of the innermost subquery beyond that of what + duplicate innermost rows may be causing. + + .. versionadded:: 0.8.3 - distinct_target_key allows the + subquery eager loader to apply a DISTINCT modifier to the + innermost SELECT. + + .. versionchanged:: 0.9.0 - distinct_target_key now defaults to + ``None``, so that the feature enables itself automatically for + those cases where the innermost query targets a non-unique + key. + :param join_depth: when non-``None``, an integer value indicating how many levels deep "eager" loaders should join on a self-referring or cyclical @@ -621,6 +643,7 @@ class RelationshipProperty(StrategizedProperty): self.enable_typechecks = enable_typechecks self.query_class = query_class self.innerjoin = innerjoin + self.distinct_target_key = distinct_target_key self.doc = doc self.active_history = active_history self.join_depth = join_depth diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 6ca737c64..009bf74a4 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -692,7 +692,7 @@ class SubqueryLoader(AbstractRelationshipLoader): elif subq_path.contains_mapper(self.mapper): return - subq_mapper, leftmost_mapper, leftmost_attr = \ + subq_mapper, leftmost_mapper, leftmost_attr, leftmost_relationship = \ self._get_leftmost(subq_path) orig_query = context.attributes.get( @@ -703,7 +703,8 @@ class SubqueryLoader(AbstractRelationshipLoader): # produce a subquery from it. left_alias = self._generate_from_original_query( orig_query, leftmost_mapper, - leftmost_attr, entity.mapper + leftmost_attr, leftmost_relationship, + entity.mapper ) # generate another Query that will join the @@ -752,11 +753,12 @@ class SubqueryLoader(AbstractRelationshipLoader): leftmost_mapper._columntoproperty[c].class_attribute for c in leftmost_cols ] - return subq_mapper, leftmost_mapper, leftmost_attr + return subq_mapper, leftmost_mapper, leftmost_attr, leftmost_prop def _generate_from_original_query(self, orig_query, leftmost_mapper, - leftmost_attr, entity_mapper + leftmost_attr, leftmost_relationship, + entity_mapper ): # reformat the original query # to look only for significant columns @@ -767,8 +769,22 @@ class SubqueryLoader(AbstractRelationshipLoader): if not q._from_obj and entity_mapper.isa(leftmost_mapper): q._set_select_from([entity_mapper], False) + target_cols = q._adapt_col_list(leftmost_attr) + # select from the identity columns of the outer - q._set_entities(q._adapt_col_list(leftmost_attr)) + q._set_entities(target_cols) + + distinct_target_key = leftmost_relationship.distinct_target_key + + if distinct_target_key is True: + q._distinct = True + elif distinct_target_key is None: + # if target_cols refer to a non-primary key or only + # part of a composite primary key, set the q as distinct + for t in set(c.table for c in target_cols): + if not set(target_cols).issuperset(t.primary_key): + q._distinct = True + break if q._order_by is False: q._order_by = leftmost_mapper.order_by diff --git a/test/orm/test_subquery_relations.py b/test/orm/test_subquery_relations.py index a6cc37691..176a30078 100644 --- a/test/orm/test_subquery_relations.py +++ b/test/orm/test_subquery_relations.py @@ -1563,3 +1563,197 @@ class CyclicalInheritingEagerTestTwo(fixtures.DeclarativeMappedTest, d = session.query(Director).options(subqueryload('*')).first() assert len(list(session)) == 3 + +class SubqueryloadDistinctTest(fixtures.DeclarativeMappedTest, + testing.AssertsCompiledSQL): + __dialect__ = 'default' + + run_inserts = 'once' + run_deletes = None + + @classmethod + def setup_classes(cls): + Base = cls.DeclarativeBasic + + class Director(Base): + __tablename__ = 'director' + id = Column(Integer, primary_key=True, + test_needs_autoincrement=True) + name = Column(String(50)) + + class DirectorPhoto(Base): + __tablename__ = 'director_photo' + id = Column(Integer, primary_key=True, + test_needs_autoincrement=True) + path = Column(String(255)) + director_id = Column(Integer, ForeignKey('director.id')) + director = relationship(Director, backref="photos") + + class Movie(Base): + __tablename__ = 'movie' + id = Column(Integer, primary_key=True, test_needs_autoincrement=True) + director_id = Column(Integer, ForeignKey('director.id')) + director = relationship(Director, backref="movies") + title = Column(String(50)) + credits = relationship("Credit", backref="movie") + + class Credit(Base): + __tablename__ = 'credit' + id = Column(Integer, primary_key=True, test_needs_autoincrement=True) + movie_id = Column(Integer, ForeignKey('movie.id')) + + @classmethod + def insert_data(cls): + Movie = cls.classes.Movie + Director = cls.classes.Director + DirectorPhoto = cls.classes.DirectorPhoto + Credit = cls.classes.Credit + + d = Director(name='Woody Allen') + d.photos = [DirectorPhoto(path='/1.jpg'), + DirectorPhoto(path='/2.jpg')] + d.movies = [Movie(title='Manhattan', credits=[Credit(), Credit()]), + Movie(title='Sweet and Lowdown', credits=[Credit()])] + sess = create_session() + sess.add_all([d]) + sess.flush() + + def test_distinct_strategy_opt_m2o(self): + self._run_test_m2o(True, None) + self._run_test_m2o(False, None) + + def test_distinct_unrelated_opt_m2o(self): + self._run_test_m2o(None, True) + self._run_test_m2o(None, False) + + def _run_test_m2o(self, + director_strategy_level, + photo_strategy_level): + + # test where the innermost is m2o, e.g. + # Movie->director + + Movie = self.classes.Movie + Director = self.classes.Director + + Movie.director.property.distinct_target_key = director_strategy_level + Director.photos.property.distinct_target_key = photo_strategy_level + + # the DISTINCT is controlled by + # only the Movie->director relationship, *not* the + # Director.photos + expect_distinct = director_strategy_level in (True, None) + + s = create_session() + + q = ( + s.query(Movie) + .options( + subqueryload(Movie.director) + .subqueryload(Director.photos) + ) + ) + ctx = q._compile_context() + + q2 = ctx.attributes[ + ('subquery', (inspect(Movie), inspect(Movie).attrs.director)) + ] + self.assert_compile( + q2, + 'SELECT director.id AS director_id, ' + 'director.name AS director_name, ' + 'anon_1.movie_director_id AS anon_1_movie_director_id ' + 'FROM (SELECT%s movie.director_id AS movie_director_id ' + 'FROM movie) AS anon_1 ' + 'JOIN director ON director.id = anon_1.movie_director_id ' + 'ORDER BY anon_1.movie_director_id' % ( + " DISTINCT" if expect_distinct else "") + ) + + ctx2 = q2._compile_context() + result = s.execute(q2) + rows = result.fetchall() + + if expect_distinct: + eq_(rows, [ + (1, 'Woody Allen', 1), + ]) + else: + eq_(rows, [ + (1, 'Woody Allen', 1), (1, 'Woody Allen', 1), + ]) + + q3 = ctx2.attributes[ + ('subquery', (inspect(Director), inspect(Director).attrs.photos)) + ] + + self.assert_compile( + q3, + 'SELECT director_photo.id AS director_photo_id, ' + 'director_photo.path AS director_photo_path, ' + 'director_photo.director_id AS director_photo_director_id, ' + 'director_1.id AS director_1_id ' + 'FROM (SELECT%s movie.director_id AS movie_director_id ' + 'FROM movie) AS anon_1 ' + 'JOIN director AS director_1 ON director_1.id = anon_1.movie_director_id ' + 'JOIN director_photo ON director_1.id = director_photo.director_id ' + 'ORDER BY director_1.id' % ( + " DISTINCT" if expect_distinct else "") + ) + result = s.execute(q3) + rows = result.fetchall() + if expect_distinct: + eq_(rows, [ + (1, u'/1.jpg', 1, 1), + (2, u'/2.jpg', 1, 1), + ]) + else: + eq_(rows, [ + (1, u'/1.jpg', 1, 1), + (2, u'/2.jpg', 1, 1), + (1, u'/1.jpg', 1, 1), + (2, u'/2.jpg', 1, 1), + ]) + + + movies = q.all() + + # check number of persistent objects in session + eq_(len(list(s)), 5) + + def test_cant_do_distinct_in_joins(self): + """the DISTINCT feature here works when the m2o is in the innermost + mapper, but when we are just joining along relationships outside + of that, we can still have dupes, and there's no solution to that. + + """ + Movie = self.classes.Movie + Credit = self.classes.Credit + + s = create_session() + + q = ( + s.query(Credit) + .options( + subqueryload(Credit.movie) + .subqueryload(Movie.director) + ) + ) + + ctx = q._compile_context() + + q2 = ctx.attributes[ + ('subquery', (inspect(Credit), Credit.movie.property)) + ] + ctx2 = q2._compile_context() + q3 = ctx2.attributes[ + ('subquery', (inspect(Movie), Movie.director.property)) + ] + + result = s.execute(q3) + eq_( + result.fetchall(), + [ + (1, 'Woody Allen', 1), (1, 'Woody Allen', 1), + ] + )
\ No newline at end of file |
