add deterministic imv returning ordering using sentinel columns

Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
author: Mike Bayer <mike_mp@zzzcomputing.com> 2023-04-05 11:58:52 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2023-04-21 11:30:40 -0400
commit: cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree: 3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/orm
parent: 63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
download: sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz
8 files changed, 129 insertions, 32 deletions
diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py
index 69cd7f598..7d70d3c7f 100644
--- a/lib/sqlalchemy/orm/__init__.py
+++ b/lib/sqlalchemy/orm/__init__.py
@@ -32,6 +32,7 @@ from ._orm_constructors import deferred as deferred
 from ._orm_constructors import dynamic_loader as dynamic_loader
 from ._orm_constructors import join as join
 from ._orm_constructors import mapped_column as mapped_column
+from ._orm_constructors import orm_insert_sentinel as orm_insert_sentinel
 from ._orm_constructors import outerjoin as outerjoin
 from ._orm_constructors import query_expression as query_expression
 from ._orm_constructors import relationship as relationship
diff --git a/lib/sqlalchemy/orm/_orm_constructors.py b/lib/sqlalchemy/orm/_orm_constructors.py
index a0fa4da79..563fef3c5 100644
--- a/lib/sqlalchemy/orm/_orm_constructors.py
+++ b/lib/sqlalchemy/orm/_orm_constructors.py
@@ -42,6 +42,7 @@ from ..exc import InvalidRequestError
 from ..sql._typing import _no_kw
 from ..sql.base import _NoArg
 from ..sql.base import SchemaEventTarget
+from ..sql.schema import _InsertSentinelColumnDefault
 from ..sql.schema import SchemaConst
 from ..sql.selectable import FromClause
 from ..util.typing import Annotated
@@ -68,7 +69,7 @@ if TYPE_CHECKING:
     from ..sql._typing import _OnClauseArgument
     from ..sql._typing import _TypeEngineArgument
     from ..sql.elements import ColumnElement
-    from ..sql.schema import _ServerDefaultType
+    from ..sql.schema import _ServerDefaultArgument
     from ..sql.schema import FetchedValue
     from ..sql.selectable import Alias
     from ..sql.selectable import Subquery
@@ -124,7 +125,7 @@ def mapped_column(
     info: Optional[_InfoType] = None,
     onupdate: Optional[Any] = None,
     insert_default: Optional[Any] = _NoArg.NO_ARG,
-    server_default: Optional[_ServerDefaultType] = None,
+    server_default: Optional[_ServerDefaultArgument] = None,
     server_onupdate: Optional[FetchedValue] = None,
     active_history: bool = False,
     quote: Optional[bool] = None,
@@ -334,6 +335,56 @@ def mapped_column(
     )
 
 
+def orm_insert_sentinel(
+    name: Optional[str] = None,
+    type_: Optional[_TypeEngineArgument[Any]] = None,
+    *,
+    default: Optional[Any] = None,
+    omit_from_statements: bool = True,
+) -> MappedColumn[Any]:
+    """Provides a surrogate :func:`_orm.mapped_column` that generates
+    a so-called :term:`sentinel` column, allowing efficient bulk
+    inserts with deterministic RETURNING sorting for tables that don't
+    otherwise have qualifying primary key configurations.
+
+    Use of :func:`_orm.orm_insert_sentinel` is analogous to the use of the
+    :func:`_schema.insert_sentinel` construct within a Core
+    :class:`_schema.Table` construct.
+
+    Guidelines for adding this construct to a Declarative mapped class
+    are the same as that of the :func:`_schema.insert_sentinel` construct;
+    the database table itself also needs to have a column with this name
+    present.
+
+    For background on how this object is used, see the section
+    :ref:`engine_insertmanyvalues_sentinel_columns` as part of the
+    section :ref:`engine_insertmanyvalues`.
+
+    .. seealso::
+
+        :func:`_schema.insert_sentinel`
+
+        :ref:`engine_insertmanyvalues`
+
+        :ref:`engine_insertmanyvalues_sentinel_columns`
+
+
+    .. versionadded:: 2.0.10
+
+    """
+
+    return mapped_column(
+        name=name,
+        default=default
+        if default is not None
+        else _InsertSentinelColumnDefault(),
+        _omit_from_statements=omit_from_statements,
+        insert_sentinel=True,
+        use_existing_column=True,
+        nullable=True,
+    )
+
+
 @util.deprecated_params(
     **{
         arg: (
diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py
index c096dc3e5..8388d3980 100644
--- a/lib/sqlalchemy/orm/bulk_persistence.py
+++ b/lib/sqlalchemy/orm/bulk_persistence.py
@@ -131,19 +131,24 @@ def _bulk_insert(
 
     return_result: Optional[cursor.CursorResult[Any]] = None
 
-    for table, super_mapper in base_mapper._sorted_tables.items():
-        if not mapper.isa(super_mapper) or table not in mapper._pks_by_table:
-            continue
+    mappers_to_run = [
+        (table, mp)
+        for table, mp in base_mapper._sorted_tables.items()
+        if table in mapper._pks_by_table
+    ]
+
+    if return_defaults:
+        # not used by new-style bulk inserts, only used for legacy
+        bookkeeping = True
+    elif len(mappers_to_run) > 1:
+        # if we have more than one table, mapper to run where we will be
+        # either horizontally splicing, or copying values between tables,
+        # we need the "bookkeeping" / deterministic returning order
+        bookkeeping = True
+    else:
+        bookkeeping = False
 
-        is_joined_inh_supertable = super_mapper is not mapper
-        bookkeeping = (
-            is_joined_inh_supertable
-            or return_defaults
-            or (
-                use_orm_insert_stmt is not None
-                and bool(use_orm_insert_stmt._returning)
-            )
-        )
+    for table, super_mapper in mappers_to_run:
 
         records = (
             (
@@ -173,6 +178,7 @@ def _bulk_insert(
                 render_nulls=render_nulls,
             )
         )
+
         result = persistence._emit_insert_statements(
             base_mapper,
             None,
@@ -187,6 +193,7 @@ def _bulk_insert(
             if not use_orm_insert_stmt._returning or return_result is None:
                 return_result = result
             elif result.returns_rows:
+                assert bookkeeping
                 return_result = return_result.splice_horizontally(result)
 
     if return_defaults and isstates:
@@ -507,9 +514,11 @@ class ORMDMLState(AbstractORMCompileState):
                 dml_level_statement = dml_level_statement.return_defaults(
                     # this is a little weird looking, but by passing
                     # primary key as the main list of cols, this tells
-                    # return_defaults to omit server-default cols.  Since
-                    # we have cols_to_return, just return what we asked for
-                    # (plus primary key, which ORM persistence needs since
+                    # return_defaults to omit server-default cols (and
+                    # actually all cols, due to some weird thing we should
+                    # clean up in crud.py).
+                    # Since we have cols_to_return, just return what we asked
+                    # for (plus primary key, which ORM persistence needs since
                     # we likely set bookkeeping=True here, which is another
                     # whole thing...).   We dont want to clutter the
                     # statement up with lots of other cols the user didn't
diff --git a/lib/sqlalchemy/orm/decl_api.py b/lib/sqlalchemy/orm/decl_api.py
index ed001023b..2f8289acf 100644
--- a/lib/sqlalchemy/orm/decl_api.py
+++ b/lib/sqlalchemy/orm/decl_api.py
@@ -241,6 +241,7 @@ class _declared_attr_common:
         self,
         fn: Callable[..., Any],
         cascading: bool = False,
+        quiet: bool = False,
     ):
         # suppport
         # @declared_attr
@@ -254,6 +255,7 @@ class _declared_attr_common:
 
         self.fget = fn
         self._cascading = cascading
+        self._quiet = quiet
         self.__doc__ = fn.__doc__
 
     def _collect_return_annotation(self) -> Optional[Type[Any]]:
diff --git a/lib/sqlalchemy/orm/decl_base.py b/lib/sqlalchemy/orm/decl_base.py
index beede0ddb..b7d6dd8cf 100644
--- a/lib/sqlalchemy/orm/decl_base.py
+++ b/lib/sqlalchemy/orm/decl_base.py
@@ -459,6 +459,7 @@ class _ClassScanMapperConfig(_MapperConfig):
         "mapper_args",
         "mapper_args_fn",
         "inherits",
+        "single",
         "allow_dataclass_fields",
         "dataclass_setup_arguments",
         "is_dataclass_prior_to_mapping",
@@ -483,6 +484,7 @@ class _ClassScanMapperConfig(_MapperConfig):
     table_args: Optional[_TableArgsType]
     mapper_args_fn: Optional[Callable[[], Dict[str, Any]]]
     inherits: Optional[Type[Any]]
+    single: bool
 
     is_dataclass_prior_to_mapping: bool
     allow_unmapped_annotations: bool
@@ -527,7 +529,7 @@ class _ClassScanMapperConfig(_MapperConfig):
         self.declared_columns = util.OrderedSet()
         self.column_ordering = {}
         self.column_copies = {}
-
+        self.single = False
         self.dataclass_setup_arguments = dca = getattr(
             self.cls, "_sa_apply_dc_transforms", None
         )
@@ -866,7 +868,7 @@ class _ClassScanMapperConfig(_MapperConfig):
                         # should only be __table__
                         continue
                 elif class_mapped:
-                    if _is_declarative_props(obj):
+                    if _is_declarative_props(obj) and not obj._quiet:
                         util.warn(
                             "Regular (i.e. not __special__) "
                             "attribute '%s.%s' uses @declared_attr, "
@@ -1783,6 +1785,10 @@ class _ClassScanMapperConfig(_MapperConfig):
 
         self.inherits = inherits
 
+        clsdict_view = self.clsdict_view
+        if "__table__" not in clsdict_view and self.tablename is None:
+            self.single = True
+
     def _setup_inheriting_columns(self, mapper_kw: _MapperKwArgs) -> None:
         table = self.local_table
         cls = self.cls
@@ -1809,6 +1815,7 @@ class _ClassScanMapperConfig(_MapperConfig):
             )
 
             if table is None:
+
                 # single table inheritance.
                 # ensure no table args
                 if table_args:
diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py
index 81b66de03..731983ff4 100644
--- a/lib/sqlalchemy/orm/mapper.py
+++ b/lib/sqlalchemy/orm/mapper.py
@@ -3273,6 +3273,9 @@ class Mapper(
 
         """
 
+        if column is not None and sql_base._never_select_column(column):
+            return True
+
         # check for class-bound attributes and/or descriptors,
         # either local or from an inherited class
         # ignore dataclass field default values
diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py
index a12156eb5..1af55df00 100644
--- a/lib/sqlalchemy/orm/persistence.py
+++ b/lib/sqlalchemy/orm/persistence.py
@@ -955,8 +955,13 @@ def _emit_insert_statements(
         # if a user query with RETURNING was passed, we definitely need
         # to use RETURNING.
         returning_is_required_anyway = bool(use_orm_insert_stmt._returning)
+        deterministic_results_reqd = (
+            returning_is_required_anyway
+            and use_orm_insert_stmt._sort_by_parameter_order
+        ) or bookkeeping
     else:
         returning_is_required_anyway = False
+        deterministic_results_reqd = bookkeeping
         cached_stmt = base_mapper._memo(("insert", table), table.insert)
         exec_opt = {"compiled_cache": base_mapper._compiled_cache}
 
@@ -1057,22 +1062,31 @@ def _emit_insert_statements(
             # know that we are using RETURNING in any case
 
             records = list(records)
-            if (
-                not hasvalue
-                and connection.dialect.insert_executemany_returning
-                and len(records) > 1
+
+            if returning_is_required_anyway or (
+                not hasvalue and len(records) > 1
             ):
-                do_executemany = True
-            elif returning_is_required_anyway:
-                if connection.dialect.insert_executemany_returning:
+                if (
+                    deterministic_results_reqd
+                    and connection.dialect.insert_executemany_returning_sort_by_parameter_order  # noqa: E501
+                ) or (
+                    not deterministic_results_reqd
+                    and connection.dialect.insert_executemany_returning
+                ):
                     do_executemany = True
-                else:
+                elif returning_is_required_anyway:
+                    if deterministic_results_reqd:
+                        dt = " with RETURNING and sort by parameter order"
+                    else:
+                        dt = " with RETURNING"
                     raise sa_exc.InvalidRequestError(
                         f"Can't use explicit RETURNING for bulk INSERT "
                         f"operation with "
                         f"{connection.dialect.dialect_description} backend; "
-                        f"executemany is not supported with RETURNING"
+                        f"executemany{dt} is not enabled for this dialect."
                     )
+                else:
+                    do_executemany = False
             else:
                 do_executemany = False
 
@@ -1084,13 +1098,19 @@ def _emit_insert_statements(
                     )
                 ):
                     statement = statement.return_defaults(
-                        *mapper._server_default_cols[table]
+                        *mapper._server_default_cols[table],
+                        sort_by_parameter_order=bookkeeping,
                     )
 
             if mapper.version_id_col is not None:
-                statement = statement.return_defaults(mapper.version_id_col)
+                statement = statement.return_defaults(
+                    mapper.version_id_col,
+                    sort_by_parameter_order=bookkeeping,
+                )
             elif do_executemany:
-                statement = statement.return_defaults(*table.primary_key)
+                statement = statement.return_defaults(
+                    *table.primary_key, sort_by_parameter_order=bookkeeping
+                )
 
             if do_executemany:
                 multiparams = [rec[2] for rec in records]
diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py
index f00775874..916b9d901 100644
--- a/lib/sqlalchemy/orm/properties.py
+++ b/lib/sqlalchemy/orm/properties.py
@@ -665,7 +665,11 @@ class MappedColumn(
     ) -> None:
         column = self.column
 
-        if self._use_existing_column and decl_scan.inherits:
+        if (
+            self._use_existing_column
+            and decl_scan.inherits
+            and decl_scan.single
+        ):
             if decl_scan.is_deferred:
                 raise sa_exc.ArgumentError(
                     "Can't use use_existing_column with deferred mappers"
author	Mike Bayer <mike_mp@zzzcomputing.com>	2023-04-05 11:58:52 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2023-04-21 11:30:40 -0400
commit	cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree	3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/orm
parent	63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
download	sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz