From cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Wed, 5 Apr 2023 11:58:52 -0400 Subject: add deterministic imv returning ordering using sentinel columns Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c --- lib/sqlalchemy/testing/config.py | 14 +++++++- lib/sqlalchemy/testing/fixtures.py | 53 ++++++++++++++++++++++++++++++ lib/sqlalchemy/testing/provision.py | 10 +++--- lib/sqlalchemy/testing/suite/test_types.py | 25 ++++++++++++++ 4 files changed, 97 insertions(+), 5 deletions(-) (limited to 'lib/sqlalchemy/testing') diff --git a/lib/sqlalchemy/testing/config.py b/lib/sqlalchemy/testing/config.py index 6c5ebd3dd..855918365 100644 --- a/lib/sqlalchemy/testing/config.py +++ b/lib/sqlalchemy/testing/config.py @@ -10,6 +10,7 @@ from __future__ import annotations import collections +import inspect import typing from typing import Any from typing import Callable @@ -185,7 +186,7 @@ class Variation: return [typ(casename, argname, case_names) for casename in case_names] -def variation(argname, cases): +def variation(argname_or_fn, cases=None): """a helper around testing.combinations that provides a single namespace that can be used as a switch. @@ -221,6 +222,17 @@ def variation(argname, cases): """ + if inspect.isfunction(argname_or_fn): + argname = argname_or_fn.__name__ + cases = argname_or_fn(None) + + @variation_fixture(argname, cases) + def go(self, request): + yield request.param + + return go + else: + argname = argname_or_fn cases_plus_limitations = [ entry if (isinstance(entry, tuple) and len(entry) == 2) diff --git a/lib/sqlalchemy/testing/fixtures.py b/lib/sqlalchemy/testing/fixtures.py index a8bc6c50a..fc1fa1483 100644 --- a/lib/sqlalchemy/testing/fixtures.py +++ b/lib/sqlalchemy/testing/fixtures.py @@ -10,12 +10,14 @@ from __future__ import annotations import itertools +import random import re import sys import sqlalchemy as sa from . import assertions from . import config +from . import mock from . import schema from .assertions import eq_ from .assertions import ne_ @@ -1003,3 +1005,54 @@ class CacheKeyFixture: range(len(case_a)), 2 ): self._compare_equal(case_a[a], case_b[b], compare_values) + + +def insertmanyvalues_fixture( + connection, randomize_rows=False, warn_on_downgraded=False +): + + dialect = connection.dialect + orig_dialect = dialect._deliver_insertmanyvalues_batches + orig_conn = connection._exec_insertmany_context + + class RandomCursor: + __slots__ = ("cursor",) + + def __init__(self, cursor): + self.cursor = cursor + + # only this method is called by the deliver method. + # by not having the other methods we assert that those aren't being + # used + + def fetchall(self): + rows = self.cursor.fetchall() + rows = list(rows) + random.shuffle(rows) + return rows + + def _deliver_insertmanyvalues_batches( + cursor, statement, parameters, generic_setinputsizes, context + ): + if randomize_rows: + cursor = RandomCursor(cursor) + for batch in orig_dialect( + cursor, statement, parameters, generic_setinputsizes, context + ): + if warn_on_downgraded and batch.is_downgraded: + util.warn("Batches were downgraded for sorted INSERT") + + yield batch + + def _exec_insertmany_context( + dialect, + context, + ): + with mock.patch.object( + dialect, + "_deliver_insertmanyvalues_batches", + new=_deliver_insertmanyvalues_batches, + ): + return orig_conn(dialect, context) + + connection._exec_insertmany_context = _exec_insertmany_context diff --git a/lib/sqlalchemy/testing/provision.py b/lib/sqlalchemy/testing/provision.py index 43821854c..66ccb3ee8 100644 --- a/lib/sqlalchemy/testing/provision.py +++ b/lib/sqlalchemy/testing/provision.py @@ -43,7 +43,7 @@ class register: return decorate - def __call__(self, cfg, *arg): + def __call__(self, cfg, *arg, **kw): if isinstance(cfg, str): url = sa_url.make_url(cfg) elif isinstance(cfg, sa_url.URL): @@ -52,9 +52,9 @@ class register: url = cfg.db.url backend = url.get_backend_name() if backend in self.fns: - return self.fns[backend](cfg, *arg) + return self.fns[backend](cfg, *arg, **kw) else: - return self.fns["*"](cfg, *arg) + return self.fns["*"](cfg, *arg, **kw) def create_follower_db(follower_ident): @@ -462,7 +462,9 @@ def set_default_schema_on_connection(cfg, dbapi_connection, schema_name): @register.init -def upsert(cfg, table, returning, set_lambda=None): +def upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): """return the backends insert..on conflict / on dupe etc. construct. while we should add a backend-neutral upsert construct as well, such as diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 44d174086..3ba7f8e89 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -1891,6 +1891,31 @@ class UuidTest(_LiteralRoundTripFixture, fixtures.TablesTest): filter_=lambda x: x.lower(), ) + @testing.requires.insert_returning + def test_uuid_returning(self, connection): + data = uuid.uuid4() + str_data = str(data) + uuid_table = self.tables.uuid_table + + result = connection.execute( + uuid_table.insert().returning( + uuid_table.c.uuid_data, + uuid_table.c.uuid_text_data, + uuid_table.c.uuid_data_nonnative, + uuid_table.c.uuid_text_data_nonnative, + ), + { + "id": 1, + "uuid_data": data, + "uuid_text_data": str_data, + "uuid_data_nonnative": data, + "uuid_text_data_nonnative": str_data, + }, + ) + row = result.first() + + eq_(row, (data, str_data, data, str_data)) + class NativeUUIDTest(UuidTest): __requires__ = ("uuid_data_type",) -- cgit v1.2.1