summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/sql/traversals.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2020-02-23 13:37:18 -0500
committerMike Bayer <mike_mp@zzzcomputing.com>2020-03-06 11:01:51 -0500
commit851fb8f5a661c66ee76308181118369c8c4df9e0 (patch)
treeb6c786e78e090752f5c0922d1f09d277ab94e365 /lib/sqlalchemy/sql/traversals.py
parentd72bda5ed23a46bcbf31d40684200dcb79012a33 (diff)
downloadsqlalchemy-851fb8f5a661c66ee76308181118369c8c4df9e0.tar.gz
Decouple compiler state from DML objects; make cacheable
Targeting select / insert / update / delete, the goal is to minimize overhead of construction and generative methods so that only the raw arguments passed are handled. An interim stage that converts the raw state into more compiler-ready state is added, which is analogous to the ORM QueryContext which will also be rolled in to be a similar concept, as is currently being prototyped in I19e05b3424b07114cce6c439b05198ac47f7ac10. the ORM update/delete BulkUD concept is also going to be rolled onto this idea. So while the compiler-ready state object, here called DMLState, looks a little thin, it's the base of a bigger pattern that will allow for ORM functionality to embed itself directly into the compiler, execution context, and result set objects. This change targets the DML objects, primarily focused on the values() method which is the most complex process. The work done by values() is minimized as much as possible while still being able to create a cache key. Additional computation is then offloaded to a new object ValuesState that is handled by the compiler. Architecturally, a big change here is that insert.values() and update.values() will generate BindParameter objects for the values now, which are then carefully received by crud.py so that they generate the expected names. This is so that the values() portion of these constructs is cacheable. for the "multi-values" version of Insert, this is all skipped and the plan right now is that a multi-values insert is not worth caching (can always be revisited). Using the coercions system in values() also gets us nicer validation for free, we can remove the NotAClauseElement thing from schema, and we also now require scalar_subquery() is called for an insert/update that uses a SELECT as a column value, 1.x deprecation path is added. The traversal system is then applied to the DML objects including tests so that they have traversal, cloning, and cache key support. cloning is not a use case for DML however having it present allows better validation of the structure within the tests. Special per-dialect DML is explicitly not cacheable at the moment, more as a proof of concept that third party DML constructs can exist as gracefully not-cacheable rather than producing an incomplete cache key. A few selected performance improvements have been added as well, simplifying the immutabledict.union() method and adding a new SQLCompiler function that can generate delimeter-separated clauses like WHERE and ORDER BY without having to build a ClauseList object at all. The use of ClauseList will be removed from Select in an upcoming commit. Overall, ClaustList is unnecessary for internal use and only adds overhead to statement construction and will likely be removed as much as possible except for explcit use of conjunctions like and_() and or_(). Change-Id: I408e0b8be91fddd77cf279da97f55020871f75a9
Diffstat (limited to 'lib/sqlalchemy/sql/traversals.py')
-rw-r--r--lib/sqlalchemy/sql/traversals.py218
1 files changed, 213 insertions, 5 deletions
diff --git a/lib/sqlalchemy/sql/traversals.py b/lib/sqlalchemy/sql/traversals.py
index 03ff7c439..c29a04ee0 100644
--- a/lib/sqlalchemy/sql/traversals.py
+++ b/lib/sqlalchemy/sql/traversals.py
@@ -200,6 +200,9 @@ class _CacheKey(ExtendedInternalTraversal):
attrname, inspect(obj), parent, anon_map, bindparams
)
+ def visit_string_list(self, attrname, obj, parent, anon_map, bindparams):
+ return tuple(obj)
+
def visit_multi(self, attrname, obj, parent, anon_map, bindparams):
return (
attrname,
@@ -336,6 +339,25 @@ class _CacheKey(ExtendedInternalTraversal):
def visit_plain_dict(self, attrname, obj, parent, anon_map, bindparams):
return (attrname, tuple([(key, obj[key]) for key in sorted(obj)]))
+ def visit_dialect_options(
+ self, attrname, obj, parent, anon_map, bindparams
+ ):
+ return (
+ attrname,
+ tuple(
+ (
+ dialect_name,
+ tuple(
+ [
+ (key, obj[dialect_name][key])
+ for key in sorted(obj[dialect_name])
+ ]
+ ),
+ )
+ for dialect_name in sorted(obj)
+ ),
+ )
+
def visit_string_clauseelement_dict(
self, attrname, obj, parent, anon_map, bindparams
):
@@ -366,9 +388,13 @@ class _CacheKey(ExtendedInternalTraversal):
def visit_fromclause_canonical_column_collection(
self, attrname, obj, parent, anon_map, bindparams
):
+ # inlining into the internals of ColumnCollection
return (
attrname,
- tuple(col._gen_cache_key(anon_map, bindparams) for col in obj),
+ tuple(
+ col._gen_cache_key(anon_map, bindparams)
+ for k, col in obj._collection
+ ),
)
def visit_unknown_structure(
@@ -377,6 +403,48 @@ class _CacheKey(ExtendedInternalTraversal):
anon_map[NO_CACHE] = True
return ()
+ def visit_dml_ordered_values(
+ self, attrname, obj, parent, anon_map, bindparams
+ ):
+ return (
+ attrname,
+ tuple(
+ (
+ key._gen_cache_key(anon_map, bindparams)
+ if hasattr(key, "__clause_element__")
+ else key,
+ value._gen_cache_key(anon_map, bindparams),
+ )
+ for key, value in obj
+ ),
+ )
+
+ def visit_dml_values(self, attrname, obj, parent, anon_map, bindparams):
+
+ expr_values = {k for k in obj if hasattr(k, "__clause_element__")}
+ if expr_values:
+ # expr values can't be sorted deterministically right now,
+ # so no cache
+ anon_map[NO_CACHE] = True
+ return ()
+
+ str_values = expr_values.symmetric_difference(obj)
+
+ return (
+ attrname,
+ tuple(
+ (k, obj[k]._gen_cache_key(anon_map, bindparams))
+ for k in sorted(str_values)
+ ),
+ )
+
+ def visit_dml_multi_values(
+ self, attrname, obj, parent, anon_map, bindparams
+ ):
+ # multivalues are simply not cacheable right now
+ anon_map[NO_CACHE] = True
+ return ()
+
_cache_key_traversal_visitor = _CacheKey()
@@ -404,6 +472,70 @@ class _CopyInternals(InternalTraversal):
(key, clone(value, **kw)) for key, value in element.items()
)
+ def visit_dml_ordered_values(self, parent, element, clone=_clone, **kw):
+ # sequence of 2-tuples
+ return [
+ (
+ clone(key, **kw)
+ if hasattr(key, "__clause_element__")
+ else key,
+ clone(value, **kw),
+ )
+ for key, value in element
+ ]
+
+ def visit_dml_values(self, parent, element, clone=_clone, **kw):
+ # sequence of dictionaries
+ return [
+ {
+ (
+ clone(key, **kw)
+ if hasattr(key, "__clause_element__")
+ else key
+ ): clone(value, **kw)
+ for key, value in sub_element.items()
+ }
+ for sub_element in element
+ ]
+
+ def visit_dml_multi_values(self, parent, element, clone=_clone, **kw):
+ # sequence of sequences, each sequence contains a list/dict/tuple
+
+ def copy(elem):
+ if isinstance(elem, (list, tuple)):
+ return [
+ (
+ clone(key, **kw)
+ if hasattr(key, "__clause_element__")
+ else key,
+ clone(value, **kw)
+ if hasattr(value, "__clause_element__")
+ else value,
+ )
+ for key, value in elem
+ ]
+ elif isinstance(elem, dict):
+ return {
+ (
+ clone(key, **kw)
+ if hasattr(key, "__clause_element__")
+ else key
+ ): (
+ clone(value, **kw)
+ if hasattr(value, "__clause_element__")
+ else value
+ )
+ for key, value in elem
+ }
+ else:
+ # TODO: use abc classes
+ assert False
+
+ return [
+ [copy(sub_element) for sub_element in sequence]
+ for sequence in element
+ ]
+
_copy_internals = _CopyInternals()
@@ -442,6 +574,25 @@ class _GetChildren(InternalTraversal):
def visit_clauseelement_unordered_set(self, element, **kw):
return tuple(element)
+ def visit_dml_ordered_values(self, element, **kw):
+ for k, v in element:
+ if hasattr(k, "__clause_element__"):
+ yield k
+ yield v
+
+ def visit_dml_values(self, element, **kw):
+ expr_values = {k for k in element if hasattr(k, "__clause_element__")}
+ str_values = expr_values.symmetric_difference(element)
+
+ for k in sorted(str_values):
+ yield element[k]
+ for k in expr_values:
+ yield k
+ yield element[k]
+
+ def visit_dml_multi_values(self, element, **kw):
+ return ()
+
_get_children = _GetChildren()
@@ -644,6 +795,9 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots):
def visit_string(self, left_parent, left, right_parent, right, **kw):
return left == right
+ def visit_string_list(self, left_parent, left, right_parent, right, **kw):
+ return left == right
+
def visit_anon_name(self, left_parent, left, right_parent, right, **kw):
return _resolve_name_for_compare(
left_parent, left, self.anon_map[0], **kw
@@ -663,6 +817,11 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots):
def visit_plain_dict(self, left_parent, left, right_parent, right, **kw):
return left == right
+ def visit_dialect_options(
+ self, left_parent, left, right_parent, right, **kw
+ ):
+ return left == right
+
def visit_plain_obj(self, left_parent, left, right_parent, right, **kw):
return left == right
@@ -713,6 +872,55 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots):
):
raise NotImplementedError()
+ def visit_dml_ordered_values(
+ self, left_parent, left, right_parent, right, **kw
+ ):
+ # sequence of tuple pairs
+
+ for (lk, lv), (rk, rv) in util.zip_longest(
+ left, right, fillvalue=(None, None)
+ ):
+ lkce = hasattr(lk, "__clause_element__")
+ rkce = hasattr(rk, "__clause_element__")
+ if lkce != rkce:
+ return COMPARE_FAILED
+ elif lkce and not self.compare_inner(lk, rk, **kw):
+ return COMPARE_FAILED
+ elif not lkce and lk != rk:
+ return COMPARE_FAILED
+ elif not self.compare_inner(lv, rv, **kw):
+ return COMPARE_FAILED
+
+ def visit_dml_values(self, left_parent, left, right_parent, right, **kw):
+ if left is None or right is None or len(left) != len(right):
+ return COMPARE_FAILED
+
+ for lk in left:
+ lv = left[lk]
+
+ if lk not in right:
+ return COMPARE_FAILED
+ rv = right[lk]
+
+ if not self.compare_inner(lv, rv, **kw):
+ return COMPARE_FAILED
+
+ def visit_dml_multi_values(
+ self, left_parent, left, right_parent, right, **kw
+ ):
+ for lseq, rseq in util.zip_longest(left, right, fillvalue=None):
+ if lseq is None or rseq is None:
+ return COMPARE_FAILED
+
+ for ld, rd in util.zip_longest(lseq, rseq, fillvalue=None):
+ if (
+ self.visit_dml_values(
+ left_parent, ld, right_parent, rd, **kw
+ )
+ is COMPARE_FAILED
+ ):
+ return COMPARE_FAILED
+
def compare_clauselist(self, left, right, **kw):
if left.operator is right.operator:
if operators.is_associative(left.operator):
@@ -731,11 +939,11 @@ class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots):
if left.operator == right.operator:
if operators.is_commutative(left.operator):
if (
- compare(left.left, right.left, **kw)
- and compare(left.right, right.right, **kw)
+ self.compare_inner(left.left, right.left, **kw)
+ and self.compare_inner(left.right, right.right, **kw)
) or (
- compare(left.left, right.right, **kw)
- and compare(left.right, right.left, **kw)
+ self.compare_inner(left.left, right.right, **kw)
+ and self.compare_inner(left.right, right.left, **kw)
):
return ["operator", "negate", "left", "right"]
else: