diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-02-23 13:37:18 -0500 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-03-06 11:01:51 -0500 |
| commit | 851fb8f5a661c66ee76308181118369c8c4df9e0 (patch) | |
| tree | b6c786e78e090752f5c0922d1f09d277ab94e365 /lib/sqlalchemy/sql/traversals.py | |
| parent | d72bda5ed23a46bcbf31d40684200dcb79012a33 (diff) | |
| download | sqlalchemy-851fb8f5a661c66ee76308181118369c8c4df9e0.tar.gz | |
Decouple compiler state from DML objects; make cacheable
Targeting select / insert / update / delete, the goal
is to minimize overhead of construction and generative methods
so that only the raw arguments passed are handled. An interim
stage that converts the raw state into more compiler-ready state
is added, which is analogous to the ORM QueryContext which will
also be rolled in to be a similar concept, as is currently
being prototyped in I19e05b3424b07114cce6c439b05198ac47f7ac10.
the ORM update/delete BulkUD concept is also going to be rolled
onto this idea. So while the compiler-ready state object,
here called DMLState, looks a little thin, it's the
base of a bigger pattern that will allow for ORM functionality
to embed itself directly into the compiler, execution
context, and result set objects.
This change targets the DML objects, primarily focused on the
values() method which is the most complex process. The
work done by values() is minimized as much as possible
while still being able to create a cache key. Additional
computation is then offloaded to a new object ValuesState
that is handled by the compiler.
Architecturally, a big change here is that insert.values()
and update.values() will generate BindParameter objects for
the values now, which are then carefully received by crud.py
so that they generate the expected names. This is so that
the values() portion of these constructs is cacheable.
for the "multi-values" version of Insert, this is all skipped
and the plan right now is that a multi-values insert is
not worth caching (can always be revisited).
Using the
coercions system in values() also gets us nicer validation
for free, we can remove the NotAClauseElement thing from
schema, and we also now require scalar_subquery() is called
for an insert/update that uses a SELECT as a column value,
1.x deprecation path is added.
The traversal system is then applied to the DML objects
including tests so that they have traversal, cloning, and
cache key support. cloning is not a use case for DML however
having it present allows better validation of the structure
within the tests.
Special per-dialect DML is explicitly not cacheable at the moment,
more as a proof of concept that third party DML constructs can
exist as gracefully not-cacheable rather than producing an
incomplete cache key.
A few selected performance improvements have been added as well,
simplifying the immutabledict.union() method and adding
a new SQLCompiler function that can generate delimeter-separated
clauses like WHERE and ORDER BY without having to build
a ClauseList object at all. The use of ClauseList will
be removed from Select in an upcoming commit. Overall,
ClaustList is unnecessary for internal use and only adds
overhead to statement construction and will likely be removed
as much as possible except for explcit use of conjunctions like
and_() and or_().
Change-Id: I408e0b8be91fddd77cf279da97f55020871f75a9
Diffstat (limited to 'lib/sqlalchemy/sql/traversals.py')
| -rw-r--r-- | lib/sqlalchemy/sql/traversals.py | 218 |
1 files changed, 213 insertions, 5 deletions
diff --git a/lib/sqlalchemy/sql/traversals.py b/lib/sqlalchemy/sql/traversals.py index 03ff7c439..c29a04ee0 100644 --- a/lib/sqlalchemy/sql/traversals.py +++ b/lib/sqlalchemy/sql/traversals.py @@ -200,6 +200,9 @@ class _CacheKey(ExtendedInternalTraversal): attrname, inspect(obj), parent, anon_map, bindparams ) + def visit_string_list(self, attrname, obj, parent, anon_map, bindparams): + return tuple(obj) + def visit_multi(self, attrname, obj, parent, anon_map, bindparams): return ( attrname, @@ -336,6 +339,25 @@ class _CacheKey(ExtendedInternalTraversal): def visit_plain_dict(self, attrname, obj, parent, anon_map, bindparams): return (attrname, tuple([(key, obj[key]) for key in sorted(obj)])) + def visit_dialect_options( + self, attrname, obj, parent, anon_map, bindparams + ): + return ( + attrname, + tuple( + ( + dialect_name, + tuple( + [ + (key, obj[dialect_name][key]) + for key in sorted(obj[dialect_name]) + ] + ), + ) + for dialect_name in sorted(obj) + ), + ) + def visit_string_clauseelement_dict( self, attrname, obj, parent, anon_map, bindparams ): @@ -366,9 +388,13 @@ class _CacheKey(ExtendedInternalTraversal): def visit_fromclause_canonical_column_collection( self, attrname, obj, parent, anon_map, bindparams ): + # inlining into the internals of ColumnCollection return ( attrname, - tuple(col._gen_cache_key(anon_map, bindparams) for col in obj), + tuple( + col._gen_cache_key(anon_map, bindparams) + for k, col in obj._collection + ), ) def visit_unknown_structure( @@ -377,6 +403,48 @@ class _CacheKey(ExtendedInternalTraversal): anon_map[NO_CACHE] = True return () + def visit_dml_ordered_values( + self, attrname, obj, parent, anon_map, bindparams + ): + return ( + attrname, + tuple( + ( + key._gen_cache_key(anon_map, bindparams) + if hasattr(key, "__clause_element__") + else key, + value._gen_cache_key(anon_map, bindparams), + ) + for key, value in obj + ), + ) + + def visit_dml_values(self, attrname, obj, parent, anon_map, bindparams): + + expr_values = {k for k in obj if hasattr(k, "__clause_element__")} + if expr_values: + # expr values can't be sorted deterministically right now, + # so no cache + anon_map[NO_CACHE] = True + return () + + str_values = expr_values.symmetric_difference(obj) + + return ( + attrname, + tuple( + (k, obj[k]._gen_cache_key(anon_map, bindparams)) + for k in sorted(str_values) + ), + ) + + def visit_dml_multi_values( + self, attrname, obj, parent, anon_map, bindparams + ): + # multivalues are simply not cacheable right now + anon_map[NO_CACHE] = True + return () + _cache_key_traversal_visitor = _CacheKey() @@ -404,6 +472,70 @@ class _CopyInternals(InternalTraversal): (key, clone(value, **kw)) for key, value in element.items() ) + def visit_dml_ordered_values(self, parent, element, clone=_clone, **kw): + # sequence of 2-tuples + return [ + ( + clone(key, **kw) + if hasattr(key, "__clause_element__") + else key, + clone(value, **kw), + ) + for key, value in element + ] + + def visit_dml_values(self, parent, element, clone=_clone, **kw): + # sequence of dictionaries + return [ + { + ( + clone(key, **kw) + if hasattr(key, "__clause_element__") + else key + ): clone(value, **kw) + for key, value in sub_element.items() + } + for sub_element in element + ] + + def visit_dml_multi_values(self, parent, element, clone=_clone, **kw): + # sequence of sequences, each sequence contains a list/dict/tuple + + def copy(elem): + if isinstance(elem, (list, tuple)): + return [ + ( + clone(key, **kw) + if hasattr(key, "__clause_element__") + else key, + clone(value, **kw) + if hasattr(value, "__clause_element__") + else value, + ) + for key, value in elem + ] + elif isinstance(elem, dict): + return { + ( + clone(key, **kw) + if hasattr(key, "__clause_element__") + else key + ): ( + clone(value, **kw) + if hasattr(value, "__clause_element__") + else value + ) + for key, value in elem + } + else: + # TODO: use abc classes + assert False + + return [ + [copy(sub_element) for sub_element in sequence] + for sequence in element + ] + _copy_internals = _CopyInternals() @@ -442,6 +574,25 @@ class _GetChildren(InternalTraversal): def visit_clauseelement_unordered_set(self, element, **kw): return tuple(element) + def visit_dml_ordered_values(self, element, **kw): + for k, v in element: + if hasattr(k, "__clause_element__"): + yield k + yield v + + def visit_dml_values(self, element, **kw): + expr_values = {k for k in element if hasattr(k, "__clause_element__")} + str_values = expr_values.symmetric_difference(element) + + for k in sorted(str_values): + yield element[k] + for k in expr_values: + yield k + yield element[k] + + def visit_dml_multi_values(self, element, **kw): + return () + _get_children = _GetChildren() @@ -644,6 +795,9 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots): def visit_string(self, left_parent, left, right_parent, right, **kw): return left == right + def visit_string_list(self, left_parent, left, right_parent, right, **kw): + return left == right + def visit_anon_name(self, left_parent, left, right_parent, right, **kw): return _resolve_name_for_compare( left_parent, left, self.anon_map[0], **kw @@ -663,6 +817,11 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots): def visit_plain_dict(self, left_parent, left, right_parent, right, **kw): return left == right + def visit_dialect_options( + self, left_parent, left, right_parent, right, **kw + ): + return left == right + def visit_plain_obj(self, left_parent, left, right_parent, right, **kw): return left == right @@ -713,6 +872,55 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots): ): raise NotImplementedError() + def visit_dml_ordered_values( + self, left_parent, left, right_parent, right, **kw + ): + # sequence of tuple pairs + + for (lk, lv), (rk, rv) in util.zip_longest( + left, right, fillvalue=(None, None) + ): + lkce = hasattr(lk, "__clause_element__") + rkce = hasattr(rk, "__clause_element__") + if lkce != rkce: + return COMPARE_FAILED + elif lkce and not self.compare_inner(lk, rk, **kw): + return COMPARE_FAILED + elif not lkce and lk != rk: + return COMPARE_FAILED + elif not self.compare_inner(lv, rv, **kw): + return COMPARE_FAILED + + def visit_dml_values(self, left_parent, left, right_parent, right, **kw): + if left is None or right is None or len(left) != len(right): + return COMPARE_FAILED + + for lk in left: + lv = left[lk] + + if lk not in right: + return COMPARE_FAILED + rv = right[lk] + + if not self.compare_inner(lv, rv, **kw): + return COMPARE_FAILED + + def visit_dml_multi_values( + self, left_parent, left, right_parent, right, **kw + ): + for lseq, rseq in util.zip_longest(left, right, fillvalue=None): + if lseq is None or rseq is None: + return COMPARE_FAILED + + for ld, rd in util.zip_longest(lseq, rseq, fillvalue=None): + if ( + self.visit_dml_values( + left_parent, ld, right_parent, rd, **kw + ) + is COMPARE_FAILED + ): + return COMPARE_FAILED + def compare_clauselist(self, left, right, **kw): if left.operator is right.operator: if operators.is_associative(left.operator): @@ -731,11 +939,11 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots): if left.operator == right.operator: if operators.is_commutative(left.operator): if ( - compare(left.left, right.left, **kw) - and compare(left.right, right.right, **kw) + self.compare_inner(left.left, right.left, **kw) + and self.compare_inner(left.right, right.right, **kw) ) or ( - compare(left.left, right.right, **kw) - and compare(left.right, right.left, **kw) + self.compare_inner(left.left, right.right, **kw) + and self.compare_inner(left.right, right.left, **kw) ): return ["operator", "negate", "left", "right"] else: |
