add deterministic imv returning ordering using sentinel columns

Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
author: Mike Bayer <mike_mp@zzzcomputing.com> 2023-04-05 11:58:52 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2023-04-21 11:30:40 -0400
commit: cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree: 3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/dialects
parent: 63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
download: sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz
9 files changed, 139 insertions, 58 deletions
diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py
index f32665792..4a7e48ab8 100644
--- a/lib/sqlalchemy/dialects/mssql/base.py
+++ b/lib/sqlalchemy/dialects/mssql/base.py
@@ -251,12 +251,16 @@ The process for fetching this value has several variants:
 
     INSERT INTO t (x) OUTPUT inserted.id VALUES (?)
 
-  .. note::  SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues`
-     feature for SQL Server, which is used by default to optimize many-row
-     INSERT statements; however as of SQLAlchemy 2.0.9 this feature is
-     temporarily disabled for SQL Server, until adjustments can be made
-     so that the ORM unit of work does not rely upon the ordering of returned
-     rows.
+  As of SQLAlchemy 2.0, the :ref:`engine_insertmanyvalues` feature is also
+  used by default to optimize many-row INSERT statements; for SQL Server
+  the feature takes place for both RETURNING and-non RETURNING
+  INSERT statements.
+
+  .. versionchanged:: 2.0.10 The :ref:`engine_insertmanyvalues` feature for
+     SQL Server was temporarily disabled for SQLAlchemy version 2.0.9 due to
+     issues with row ordering. As of 2.0.10 the feature is re-enabled, with
+     special case handling for the unit of work's requirement for RETURNING to
+     be ordered.
 
 * When RETURNING is not available or has been disabled via
   ``implicit_returning=False``, either the ``scope_identity()`` function or
@@ -936,6 +940,7 @@ from ...sql import roles
 from ...sql import sqltypes
 from ...sql import util as sql_util
 from ...sql._typing import is_sql_compiler
+from ...sql.compiler import InsertmanyvaluesSentinelOpts
 from ...types import BIGINT
 from ...types import BINARY
 from ...types import CHAR
@@ -1514,6 +1519,39 @@ class MSUUid(sqltypes.Uuid):
 
                 return process
 
+    def _sentinel_value_resolver(self, dialect):
+        """Return a callable that will receive the uuid object or string
+        as it is normally passed to the DB in the parameter set, after
+        bind_processor() is called.  Convert this value to match
+        what it would be as coming back from an INSERT..OUTPUT inserted.
+
+        for the UUID type, there are four varieties of settings so here
+        we seek to convert to the string or UUID representation that comes
+        back from the driver.
+
+        """
+        character_based_uuid = (
+            not dialect.supports_native_uuid or not self.native_uuid
+        )
+
+        if character_based_uuid:
+            if self.native_uuid:
+                # for pyodbc, uuid.uuid() objects are accepted for incoming
+                # data, as well as strings. but the driver will always return
+                # uppercase strings in result sets.
+                def process(value):
+                    return str(value).upper()
+
+            else:
+
+                def process(value):
+                    return str(value)
+
+            return process
+        else:
+            # for pymssql, we get uuid.uuid() objects back.
+            return None
+
 
 class UNIQUEIDENTIFIER(sqltypes.Uuid[sqltypes._UUID_RETURN]):
     __visit_name__ = "UNIQUEIDENTIFIER"
@@ -2995,13 +3033,18 @@ class MSDialect(default.DefaultDialect):
     # may be changed at server inspection time for older SQL server versions
     supports_multivalues_insert = True
 
-    # disabled due to #9603
-    use_insertmanyvalues = False
+    use_insertmanyvalues = True
 
     # note pyodbc will set this to False if fast_executemany is set,
     # as of SQLAlchemy 2.0.9
     use_insertmanyvalues_wo_returning = True
 
+    insertmanyvalues_implicit_sentinel = (
+        InsertmanyvaluesSentinelOpts.AUTOINCREMENT
+        | InsertmanyvaluesSentinelOpts.IDENTITY
+        | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
+    )
+
     # "The incoming request has too many parameters. The server supports a "
     # "maximum of 2100 parameters."
     # in fact you can have 2099 parameters.
@@ -3064,14 +3107,6 @@ class MSDialect(default.DefaultDialect):
 
         super().__init__(**opts)
 
-        if self.use_insertmanyvalues:
-            raise exc.ArgumentError(
-                "The use_insertmanyvalues feature on SQL Server is currently "
-                "not safe to use, as returned result rows may be returned in "
-                "random order.  Ensure use_insertmanyvalues is left at its "
-                "default of False (this setting changed in SQLAlchemy 2.0.9)"
-            )
-
         self._json_serializer = json_serializer
         self._json_deserializer = json_deserializer
 
diff --git a/lib/sqlalchemy/dialects/mssql/pyodbc.py b/lib/sqlalchemy/dialects/mssql/pyodbc.py
index 08c6bc48f..6af527e73 100644
--- a/lib/sqlalchemy/dialects/mssql/pyodbc.py
+++ b/lib/sqlalchemy/dialects/mssql/pyodbc.py
@@ -290,19 +290,6 @@ Pyodbc have been resolved as of SQLAlchemy 2.0.5. See the notes at
 Fast Executemany Mode
 ---------------------
 
-  .. note::  SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues`
-     feature for SQL Server, which is used by default to optimize many-row
-     INSERT statements; however as of SQLAlchemy 2.0.9 this feature had
-     to be turned off for SQL Server as the database does not support
-     deterministic RETURNING of INSERT rows for a multi-row INSERT statement.
-
-.. versionchanged:: 2.0.9 - ``fast_executemany`` executions will be used
-   for INSERT statements that don't include RETURNING, when
-   ``fast_executemany`` is set. Previously, ``use_insertmanyvalues`` would
-   cause ``fast_executemany`` to not be used in most cases.
-
-   ``use_insertmanyvalues`` is disabled for SQL Server overall as of 2.0.9.
-
 The PyODBC driver includes support for a "fast executemany" mode of execution
 which greatly reduces round trips for a DBAPI ``executemany()`` call when using
 Microsoft ODBC drivers, for **limited size batches that fit in memory**.  The
@@ -316,6 +303,12 @@ Server dialect supports this parameter by passing the
         "mssql+pyodbc://scott:tiger@mssql2017:1433/test?driver=ODBC+Driver+17+for+SQL+Server",
         fast_executemany=True)
 
+.. versionchanged:: 2.0.9 - the ``fast_executemany`` parameter now has its
+   intended effect of this PyODBC feature taking effect for all INSERT
+   statements that are executed with multiple parameter sets, which don't
+   include RETURNING.  Previously, SQLAlchemy 2.0's :term:`insertmanyvalues`
+   feature would cause ``fast_executemany`` to not be used in most cases
+   even if specified.
 
 .. versionadded:: 1.3
 
diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py
index b5a5b2ca4..eb9ccc606 100644
--- a/lib/sqlalchemy/dialects/mysql/base.py
+++ b/lib/sqlalchemy/dialects/mysql/base.py
@@ -1062,6 +1062,7 @@ from ...sql import operators
 from ...sql import roles
 from ...sql import sqltypes
 from ...sql import util as sql_util
+from ...sql.compiler import InsertmanyvaluesSentinelOpts
 from ...types import BINARY
 from ...types import BLOB
 from ...types import BOOLEAN
@@ -2414,6 +2415,9 @@ class MySQLDialect(default.DefaultDialect):
     supports_default_metavalue = True
 
     use_insertmanyvalues: bool = True
+    insertmanyvalues_implicit_sentinel = (
+        InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT
+    )
 
     supports_sane_rowcount = True
     supports_sane_multi_rowcount = False
diff --git a/lib/sqlalchemy/dialects/mysql/provision.py b/lib/sqlalchemy/dialects/mysql/provision.py
index 36a5e9f54..b7faf7712 100644
--- a/lib/sqlalchemy/dialects/mysql/provision.py
+++ b/lib/sqlalchemy/dialects/mysql/provision.py
@@ -82,7 +82,9 @@ def _mysql_temp_table_keyword_args(cfg, eng):
 
 
 @upsert.for_db("mariadb")
-def _upsert(cfg, table, returning, set_lambda=None):
+def _upsert(
+    cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False
+):
     from sqlalchemy.dialects.mysql import insert
 
     stmt = insert(table)
@@ -93,5 +95,7 @@ def _upsert(cfg, table, returning, set_lambda=None):
         pk1 = table.primary_key.c[0]
         stmt = stmt.on_duplicate_key_update({pk1.key: pk1})
 
-    stmt = stmt.returning(*returning)
+    stmt = stmt.returning(
+        *returning, sort_by_parameter_order=sort_by_parameter_order
+    )
     return stmt
diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
index 0fb6295fa..f6f10c476 100644
--- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py
+++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
@@ -543,6 +543,11 @@ class _OracleNumeric(sqltypes.Numeric):
         return handler
 
 
+class _OracleUUID(sqltypes.Uuid):
+    def get_dbapi_type(self, dbapi):
+        return dbapi.STRING
+
+
 class _OracleBinaryFloat(_OracleNumeric):
     def get_dbapi_type(self, dbapi):
         return dbapi.NATIVE_FLOAT
@@ -878,29 +883,9 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
             and is_sql_compiler(self.compiled)
             and self.compiled._oracle_returning
         ):
-            # create a fake cursor result from the out parameters. unlike
-            # get_out_parameter_values(), the result-row handlers here will be
-            # applied at the Result level
-
-            numcols = len(self.out_parameters)
-
-            # [stmt_result for stmt_result in outparam.values] == each
-            # statement in executemany
-            # [val for val in stmt_result] == each row for a particular
-            # statement
-            initial_buffer = list(
-                zip(
-                    *[
-                        [
-                            val
-                            for stmt_result in self.out_parameters[
-                                f"ret_{j}"
-                            ].values
-                            for val in stmt_result
-                        ]
-                        for j in range(numcols)
-                    ]
-                )
+
+            initial_buffer = self.fetchall_for_returning(
+                self.cursor, _internal=True
             )
 
             fetch_strategy = _cursor.FullyBufferedCursorFetchStrategy(
@@ -921,6 +906,43 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
 
         return c
 
+    def fetchall_for_returning(self, cursor, *, _internal=False):
+        compiled = self.compiled
+        if (
+            not _internal
+            and compiled is None
+            or not is_sql_compiler(compiled)
+            or not compiled._oracle_returning
+        ):
+            raise NotImplementedError(
+                "execution context was not prepared for Oracle RETURNING"
+            )
+
+        # create a fake cursor result from the out parameters. unlike
+        # get_out_parameter_values(), the result-row handlers here will be
+        # applied at the Result level
+
+        numcols = len(self.out_parameters)
+
+        # [stmt_result for stmt_result in outparam.values] == each
+        # statement in executemany
+        # [val for val in stmt_result] == each row for a particular
+        # statement
+        return list(
+            zip(
+                *[
+                    [
+                        val
+                        for stmt_result in self.out_parameters[
+                            f"ret_{j}"
+                        ].values
+                        for val in (stmt_result or ())
+                    ]
+                    for j in range(numcols)
+                ]
+            )
+        )
+
     def get_out_parameter_values(self, out_param_names):
         # this method should not be called when the compiler has
         # RETURNING as we've turned the has_out_parameters flag set to
@@ -942,6 +964,7 @@ class OracleDialect_cx_oracle(OracleDialect):
     supports_sane_multi_rowcount = True
 
     insert_executemany_returning = True
+    insert_executemany_returning_sort_by_parameter_order = True
     update_executemany_returning = True
     delete_executemany_returning = True
 
@@ -974,6 +997,7 @@ class OracleDialect_cx_oracle(OracleDialect):
             oracle.RAW: _OracleRaw,
             sqltypes.Unicode: _OracleUnicodeStringCHAR,
             sqltypes.NVARCHAR: _OracleUnicodeStringNCHAR,
+            sqltypes.Uuid: _OracleUUID,
             oracle.NCLOB: _OracleUnicodeTextNCLOB,
             oracle.ROWID: _OracleRowid,
         }
diff --git a/lib/sqlalchemy/dialects/postgresql/asyncpg.py b/lib/sqlalchemy/dialects/postgresql/asyncpg.py
index 2acc5fea3..d1a52afd6 100644
--- a/lib/sqlalchemy/dialects/postgresql/asyncpg.py
+++ b/lib/sqlalchemy/dialects/postgresql/asyncpg.py
@@ -131,6 +131,7 @@ from typing import TYPE_CHECKING
 
 from . import json
 from . import ranges
+from .array import ARRAY as PGARRAY
 from .base import _DECIMAL_TYPES
 from .base import _FLOAT_TYPES
 from .base import _INT_TYPES
@@ -157,6 +158,10 @@ if TYPE_CHECKING:
     from typing import Iterable
 
 
+class AsyncpgARRAY(PGARRAY):
+    render_bind_cast = True
+
+
 class AsyncpgString(sqltypes.String):
     render_bind_cast = True
 
@@ -904,6 +909,7 @@ class PGDialect_asyncpg(PGDialect):
         PGDialect.colspecs,
         {
             sqltypes.String: AsyncpgString,
+            sqltypes.ARRAY: AsyncpgARRAY,
             REGCONFIG: AsyncpgREGCONFIG,
             sqltypes.Time: AsyncpgTime,
             sqltypes.Date: AsyncpgDate,
diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py
index 1ce5600e1..ad5e346b7 100644
--- a/lib/sqlalchemy/dialects/postgresql/base.py
+++ b/lib/sqlalchemy/dialects/postgresql/base.py
@@ -1468,6 +1468,7 @@ from ...sql import expression
 from ...sql import roles
 from ...sql import sqltypes
 from ...sql import util as sql_util
+from ...sql.compiler import InsertmanyvaluesSentinelOpts
 from ...sql.visitors import InternalTraversal
 from ...types import BIGINT
 from ...types import BOOLEAN
@@ -2911,6 +2912,12 @@ class PGDialect(default.DefaultDialect):
     postfetch_lastrowid = False
     use_insertmanyvalues = True
 
+    insertmanyvalues_implicit_sentinel = (
+        InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT
+        | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
+        | InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS
+    )
+
     supports_comments = True
     supports_constraint_comments = True
     supports_default_values = True
diff --git a/lib/sqlalchemy/dialects/postgresql/provision.py b/lib/sqlalchemy/dialects/postgresql/provision.py
index 582157604..87f1c9a4c 100644
--- a/lib/sqlalchemy/dialects/postgresql/provision.py
+++ b/lib/sqlalchemy/dialects/postgresql/provision.py
@@ -130,7 +130,9 @@ def prepare_for_drop_tables(config, connection):
 
 
 @upsert.for_db("postgresql")
-def _upsert(cfg, table, returning, set_lambda=None):
+def _upsert(
+    cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False
+):
     from sqlalchemy.dialects.postgresql import insert
 
     stmt = insert(table)
@@ -144,7 +146,9 @@ def _upsert(cfg, table, returning, set_lambda=None):
     else:
         stmt = stmt.on_conflict_do_nothing()
 
-    stmt = stmt.returning(*returning)
+    stmt = stmt.returning(
+        *returning, sort_by_parameter_order=sort_by_parameter_order
+    )
     return stmt
 
 
diff --git a/lib/sqlalchemy/dialects/sqlite/provision.py b/lib/sqlalchemy/dialects/sqlite/provision.py
index 3f86d5a60..2ed8253ab 100644
--- a/lib/sqlalchemy/dialects/sqlite/provision.py
+++ b/lib/sqlalchemy/dialects/sqlite/provision.py
@@ -174,7 +174,9 @@ def _reap_sqlite_dbs(url, idents):
 
 
 @upsert.for_db("sqlite")
-def _upsert(cfg, table, returning, set_lambda=None):
+def _upsert(
+    cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False
+):
     from sqlalchemy.dialects.sqlite import insert
 
     stmt = insert(table)
@@ -184,5 +186,7 @@ def _upsert(cfg, table, returning, set_lambda=None):
     else:
         stmt = stmt.on_conflict_do_nothing()
 
-    stmt = stmt.returning(*returning)
+    stmt = stmt.returning(
+        *returning, sort_by_parameter_order=sort_by_parameter_order
+    )
     return stmt
author	Mike Bayer <mike_mp@zzzcomputing.com>	2023-04-05 11:58:52 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2023-04-21 11:30:40 -0400
commit	cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree	3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/dialects
parent	63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
download	sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz