diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2023-04-05 11:58:52 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2023-04-21 11:30:40 -0400 |
| commit | cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch) | |
| tree | 3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/orm | |
| parent | 63f51491c5f0cb22883c800a065d7c4b4c54774e (diff) | |
| download | sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz | |
add deterministic imv returning ordering using sentinel columns
Repaired a major shortcoming which was identified in the
:ref:`engine_insertmanyvalues` performance optimization feature first
introduced in the 2.0 series. This was a continuation of the change in
2.0.9 which disabled the SQL Server version of the feature due to a
reliance in the ORM on apparent row ordering that is not guaranteed to take
place. The fix applies new logic to all "insertmanyvalues" operations,
which takes effect when a new parameter
:paramref:`_dml.Insert.returning.sort_by_parameter_order` on the
:meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults`
methods, that through a combination of alternate SQL forms, direct
correspondence of client side parameters, and in some cases downgrading to
running row-at-a-time, will apply sorting to each batch of returned rows
using correspondence to primary key or other unique values in each row
which can be correlated to the input data.
Performance impact is expected to be minimal as nearly all common primary
key scenarios are suitable for parameter-ordered batching to be
achieved for all backends other than SQLite, while "row-at-a-time"
mode operates with a bare minimum of Python overhead compared to the very
heavyweight approaches used in the 1.x series. For SQLite, there is no
difference in performance when "row-at-a-time" mode is used.
It's anticipated that with an efficient "row-at-a-time" INSERT with
RETURNING batching capability, the "insertmanyvalues" feature can be later
be more easily generalized to third party backends that include RETURNING
support but not necessarily easy ways to guarantee a correspondence
with parameter order.
Fixes: #9618
References: #9603
Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
Diffstat (limited to 'lib/sqlalchemy/orm')
| -rw-r--r-- | lib/sqlalchemy/orm/__init__.py | 1 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/_orm_constructors.py | 55 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/bulk_persistence.py | 39 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/decl_api.py | 2 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/decl_base.py | 11 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/mapper.py | 3 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/persistence.py | 44 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/properties.py | 6 |
8 files changed, 129 insertions, 32 deletions
diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 69cd7f598..7d70d3c7f 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -32,6 +32,7 @@ from ._orm_constructors import deferred as deferred from ._orm_constructors import dynamic_loader as dynamic_loader from ._orm_constructors import join as join from ._orm_constructors import mapped_column as mapped_column +from ._orm_constructors import orm_insert_sentinel as orm_insert_sentinel from ._orm_constructors import outerjoin as outerjoin from ._orm_constructors import query_expression as query_expression from ._orm_constructors import relationship as relationship diff --git a/lib/sqlalchemy/orm/_orm_constructors.py b/lib/sqlalchemy/orm/_orm_constructors.py index a0fa4da79..563fef3c5 100644 --- a/lib/sqlalchemy/orm/_orm_constructors.py +++ b/lib/sqlalchemy/orm/_orm_constructors.py @@ -42,6 +42,7 @@ from ..exc import InvalidRequestError from ..sql._typing import _no_kw from ..sql.base import _NoArg from ..sql.base import SchemaEventTarget +from ..sql.schema import _InsertSentinelColumnDefault from ..sql.schema import SchemaConst from ..sql.selectable import FromClause from ..util.typing import Annotated @@ -68,7 +69,7 @@ if TYPE_CHECKING: from ..sql._typing import _OnClauseArgument from ..sql._typing import _TypeEngineArgument from ..sql.elements import ColumnElement - from ..sql.schema import _ServerDefaultType + from ..sql.schema import _ServerDefaultArgument from ..sql.schema import FetchedValue from ..sql.selectable import Alias from ..sql.selectable import Subquery @@ -124,7 +125,7 @@ def mapped_column( info: Optional[_InfoType] = None, onupdate: Optional[Any] = None, insert_default: Optional[Any] = _NoArg.NO_ARG, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, active_history: bool = False, quote: Optional[bool] = None, @@ -334,6 +335,56 @@ def mapped_column( ) +def orm_insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[Any]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> MappedColumn[Any]: + """Provides a surrogate :func:`_orm.mapped_column` that generates + a so-called :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that don't + otherwise have qualifying primary key configurations. + + Use of :func:`_orm.orm_insert_sentinel` is analogous to the use of the + :func:`_schema.insert_sentinel` construct within a Core + :class:`_schema.Table` construct. + + Guidelines for adding this construct to a Declarative mapped class + are the same as that of the :func:`_schema.insert_sentinel` construct; + the database table itself also needs to have a column with this name + present. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + .. seealso:: + + :func:`_schema.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + + return mapped_column( + name=name, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + use_existing_column=True, + nullable=True, + ) + + @util.deprecated_params( **{ arg: ( diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py index c096dc3e5..8388d3980 100644 --- a/lib/sqlalchemy/orm/bulk_persistence.py +++ b/lib/sqlalchemy/orm/bulk_persistence.py @@ -131,19 +131,24 @@ def _bulk_insert( return_result: Optional[cursor.CursorResult[Any]] = None - for table, super_mapper in base_mapper._sorted_tables.items(): - if not mapper.isa(super_mapper) or table not in mapper._pks_by_table: - continue + mappers_to_run = [ + (table, mp) + for table, mp in base_mapper._sorted_tables.items() + if table in mapper._pks_by_table + ] + + if return_defaults: + # not used by new-style bulk inserts, only used for legacy + bookkeeping = True + elif len(mappers_to_run) > 1: + # if we have more than one table, mapper to run where we will be + # either horizontally splicing, or copying values between tables, + # we need the "bookkeeping" / deterministic returning order + bookkeeping = True + else: + bookkeeping = False - is_joined_inh_supertable = super_mapper is not mapper - bookkeeping = ( - is_joined_inh_supertable - or return_defaults - or ( - use_orm_insert_stmt is not None - and bool(use_orm_insert_stmt._returning) - ) - ) + for table, super_mapper in mappers_to_run: records = ( ( @@ -173,6 +178,7 @@ def _bulk_insert( render_nulls=render_nulls, ) ) + result = persistence._emit_insert_statements( base_mapper, None, @@ -187,6 +193,7 @@ def _bulk_insert( if not use_orm_insert_stmt._returning or return_result is None: return_result = result elif result.returns_rows: + assert bookkeeping return_result = return_result.splice_horizontally(result) if return_defaults and isstates: @@ -507,9 +514,11 @@ class ORMDMLState(AbstractORMCompileState): dml_level_statement = dml_level_statement.return_defaults( # this is a little weird looking, but by passing # primary key as the main list of cols, this tells - # return_defaults to omit server-default cols. Since - # we have cols_to_return, just return what we asked for - # (plus primary key, which ORM persistence needs since + # return_defaults to omit server-default cols (and + # actually all cols, due to some weird thing we should + # clean up in crud.py). + # Since we have cols_to_return, just return what we asked + # for (plus primary key, which ORM persistence needs since # we likely set bookkeeping=True here, which is another # whole thing...). We dont want to clutter the # statement up with lots of other cols the user didn't diff --git a/lib/sqlalchemy/orm/decl_api.py b/lib/sqlalchemy/orm/decl_api.py index ed001023b..2f8289acf 100644 --- a/lib/sqlalchemy/orm/decl_api.py +++ b/lib/sqlalchemy/orm/decl_api.py @@ -241,6 +241,7 @@ class _declared_attr_common: self, fn: Callable[..., Any], cascading: bool = False, + quiet: bool = False, ): # suppport # @declared_attr @@ -254,6 +255,7 @@ class _declared_attr_common: self.fget = fn self._cascading = cascading + self._quiet = quiet self.__doc__ = fn.__doc__ def _collect_return_annotation(self) -> Optional[Type[Any]]: diff --git a/lib/sqlalchemy/orm/decl_base.py b/lib/sqlalchemy/orm/decl_base.py index beede0ddb..b7d6dd8cf 100644 --- a/lib/sqlalchemy/orm/decl_base.py +++ b/lib/sqlalchemy/orm/decl_base.py @@ -459,6 +459,7 @@ class _ClassScanMapperConfig(_MapperConfig): "mapper_args", "mapper_args_fn", "inherits", + "single", "allow_dataclass_fields", "dataclass_setup_arguments", "is_dataclass_prior_to_mapping", @@ -483,6 +484,7 @@ class _ClassScanMapperConfig(_MapperConfig): table_args: Optional[_TableArgsType] mapper_args_fn: Optional[Callable[[], Dict[str, Any]]] inherits: Optional[Type[Any]] + single: bool is_dataclass_prior_to_mapping: bool allow_unmapped_annotations: bool @@ -527,7 +529,7 @@ class _ClassScanMapperConfig(_MapperConfig): self.declared_columns = util.OrderedSet() self.column_ordering = {} self.column_copies = {} - + self.single = False self.dataclass_setup_arguments = dca = getattr( self.cls, "_sa_apply_dc_transforms", None ) @@ -866,7 +868,7 @@ class _ClassScanMapperConfig(_MapperConfig): # should only be __table__ continue elif class_mapped: - if _is_declarative_props(obj): + if _is_declarative_props(obj) and not obj._quiet: util.warn( "Regular (i.e. not __special__) " "attribute '%s.%s' uses @declared_attr, " @@ -1783,6 +1785,10 @@ class _ClassScanMapperConfig(_MapperConfig): self.inherits = inherits + clsdict_view = self.clsdict_view + if "__table__" not in clsdict_view and self.tablename is None: + self.single = True + def _setup_inheriting_columns(self, mapper_kw: _MapperKwArgs) -> None: table = self.local_table cls = self.cls @@ -1809,6 +1815,7 @@ class _ClassScanMapperConfig(_MapperConfig): ) if table is None: + # single table inheritance. # ensure no table args if table_args: diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py index 81b66de03..731983ff4 100644 --- a/lib/sqlalchemy/orm/mapper.py +++ b/lib/sqlalchemy/orm/mapper.py @@ -3273,6 +3273,9 @@ class Mapper( """ + if column is not None and sql_base._never_select_column(column): + return True + # check for class-bound attributes and/or descriptors, # either local or from an inherited class # ignore dataclass field default values diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py index a12156eb5..1af55df00 100644 --- a/lib/sqlalchemy/orm/persistence.py +++ b/lib/sqlalchemy/orm/persistence.py @@ -955,8 +955,13 @@ def _emit_insert_statements( # if a user query with RETURNING was passed, we definitely need # to use RETURNING. returning_is_required_anyway = bool(use_orm_insert_stmt._returning) + deterministic_results_reqd = ( + returning_is_required_anyway + and use_orm_insert_stmt._sort_by_parameter_order + ) or bookkeeping else: returning_is_required_anyway = False + deterministic_results_reqd = bookkeeping cached_stmt = base_mapper._memo(("insert", table), table.insert) exec_opt = {"compiled_cache": base_mapper._compiled_cache} @@ -1057,22 +1062,31 @@ def _emit_insert_statements( # know that we are using RETURNING in any case records = list(records) - if ( - not hasvalue - and connection.dialect.insert_executemany_returning - and len(records) > 1 + + if returning_is_required_anyway or ( + not hasvalue and len(records) > 1 ): - do_executemany = True - elif returning_is_required_anyway: - if connection.dialect.insert_executemany_returning: + if ( + deterministic_results_reqd + and connection.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501 + ) or ( + not deterministic_results_reqd + and connection.dialect.insert_executemany_returning + ): do_executemany = True - else: + elif returning_is_required_anyway: + if deterministic_results_reqd: + dt = " with RETURNING and sort by parameter order" + else: + dt = " with RETURNING" raise sa_exc.InvalidRequestError( f"Can't use explicit RETURNING for bulk INSERT " f"operation with " f"{connection.dialect.dialect_description} backend; " - f"executemany is not supported with RETURNING" + f"executemany{dt} is not enabled for this dialect." ) + else: + do_executemany = False else: do_executemany = False @@ -1084,13 +1098,19 @@ def _emit_insert_statements( ) ): statement = statement.return_defaults( - *mapper._server_default_cols[table] + *mapper._server_default_cols[table], + sort_by_parameter_order=bookkeeping, ) if mapper.version_id_col is not None: - statement = statement.return_defaults(mapper.version_id_col) + statement = statement.return_defaults( + mapper.version_id_col, + sort_by_parameter_order=bookkeeping, + ) elif do_executemany: - statement = statement.return_defaults(*table.primary_key) + statement = statement.return_defaults( + *table.primary_key, sort_by_parameter_order=bookkeeping + ) if do_executemany: multiparams = [rec[2] for rec in records] diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py index f00775874..916b9d901 100644 --- a/lib/sqlalchemy/orm/properties.py +++ b/lib/sqlalchemy/orm/properties.py @@ -665,7 +665,11 @@ class MappedColumn( ) -> None: column = self.column - if self._use_existing_column and decl_scan.inherits: + if ( + self._use_existing_column + and decl_scan.inherits + and decl_scan.single + ): if decl_scan.is_deferred: raise sa_exc.ArgumentError( "Can't use use_existing_column with deferred mappers" |
