summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/orm
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2023-04-05 11:58:52 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2023-04-21 11:30:40 -0400
commitcf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/orm
parent63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
downloadsqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz
add deterministic imv returning ordering using sentinel columns
Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
Diffstat (limited to 'lib/sqlalchemy/orm')
-rw-r--r--lib/sqlalchemy/orm/__init__.py1
-rw-r--r--lib/sqlalchemy/orm/_orm_constructors.py55
-rw-r--r--lib/sqlalchemy/orm/bulk_persistence.py39
-rw-r--r--lib/sqlalchemy/orm/decl_api.py2
-rw-r--r--lib/sqlalchemy/orm/decl_base.py11
-rw-r--r--lib/sqlalchemy/orm/mapper.py3
-rw-r--r--lib/sqlalchemy/orm/persistence.py44
-rw-r--r--lib/sqlalchemy/orm/properties.py6
8 files changed, 129 insertions, 32 deletions
diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py
index 69cd7f598..7d70d3c7f 100644
--- a/lib/sqlalchemy/orm/__init__.py
+++ b/lib/sqlalchemy/orm/__init__.py
@@ -32,6 +32,7 @@ from ._orm_constructors import deferred as deferred
from ._orm_constructors import dynamic_loader as dynamic_loader
from ._orm_constructors import join as join
from ._orm_constructors import mapped_column as mapped_column
+from ._orm_constructors import orm_insert_sentinel as orm_insert_sentinel
from ._orm_constructors import outerjoin as outerjoin
from ._orm_constructors import query_expression as query_expression
from ._orm_constructors import relationship as relationship
diff --git a/lib/sqlalchemy/orm/_orm_constructors.py b/lib/sqlalchemy/orm/_orm_constructors.py
index a0fa4da79..563fef3c5 100644
--- a/lib/sqlalchemy/orm/_orm_constructors.py
+++ b/lib/sqlalchemy/orm/_orm_constructors.py
@@ -42,6 +42,7 @@ from ..exc import InvalidRequestError
from ..sql._typing import _no_kw
from ..sql.base import _NoArg
from ..sql.base import SchemaEventTarget
+from ..sql.schema import _InsertSentinelColumnDefault
from ..sql.schema import SchemaConst
from ..sql.selectable import FromClause
from ..util.typing import Annotated
@@ -68,7 +69,7 @@ if TYPE_CHECKING:
from ..sql._typing import _OnClauseArgument
from ..sql._typing import _TypeEngineArgument
from ..sql.elements import ColumnElement
- from ..sql.schema import _ServerDefaultType
+ from ..sql.schema import _ServerDefaultArgument
from ..sql.schema import FetchedValue
from ..sql.selectable import Alias
from ..sql.selectable import Subquery
@@ -124,7 +125,7 @@ def mapped_column(
info: Optional[_InfoType] = None,
onupdate: Optional[Any] = None,
insert_default: Optional[Any] = _NoArg.NO_ARG,
- server_default: Optional[_ServerDefaultType] = None,
+ server_default: Optional[_ServerDefaultArgument] = None,
server_onupdate: Optional[FetchedValue] = None,
active_history: bool = False,
quote: Optional[bool] = None,
@@ -334,6 +335,56 @@ def mapped_column(
)
+def orm_insert_sentinel(
+ name: Optional[str] = None,
+ type_: Optional[_TypeEngineArgument[Any]] = None,
+ *,
+ default: Optional[Any] = None,
+ omit_from_statements: bool = True,
+) -> MappedColumn[Any]:
+ """Provides a surrogate :func:`_orm.mapped_column` that generates
+ a so-called :term:`sentinel` column, allowing efficient bulk
+ inserts with deterministic RETURNING sorting for tables that don't
+ otherwise have qualifying primary key configurations.
+
+ Use of :func:`_orm.orm_insert_sentinel` is analogous to the use of the
+ :func:`_schema.insert_sentinel` construct within a Core
+ :class:`_schema.Table` construct.
+
+ Guidelines for adding this construct to a Declarative mapped class
+ are the same as that of the :func:`_schema.insert_sentinel` construct;
+ the database table itself also needs to have a column with this name
+ present.
+
+ For background on how this object is used, see the section
+ :ref:`engine_insertmanyvalues_sentinel_columns` as part of the
+ section :ref:`engine_insertmanyvalues`.
+
+ .. seealso::
+
+ :func:`_schema.insert_sentinel`
+
+ :ref:`engine_insertmanyvalues`
+
+ :ref:`engine_insertmanyvalues_sentinel_columns`
+
+
+ .. versionadded:: 2.0.10
+
+ """
+
+ return mapped_column(
+ name=name,
+ default=default
+ if default is not None
+ else _InsertSentinelColumnDefault(),
+ _omit_from_statements=omit_from_statements,
+ insert_sentinel=True,
+ use_existing_column=True,
+ nullable=True,
+ )
+
+
@util.deprecated_params(
**{
arg: (
diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py
index c096dc3e5..8388d3980 100644
--- a/lib/sqlalchemy/orm/bulk_persistence.py
+++ b/lib/sqlalchemy/orm/bulk_persistence.py
@@ -131,19 +131,24 @@ def _bulk_insert(
return_result: Optional[cursor.CursorResult[Any]] = None
- for table, super_mapper in base_mapper._sorted_tables.items():
- if not mapper.isa(super_mapper) or table not in mapper._pks_by_table:
- continue
+ mappers_to_run = [
+ (table, mp)
+ for table, mp in base_mapper._sorted_tables.items()
+ if table in mapper._pks_by_table
+ ]
+
+ if return_defaults:
+ # not used by new-style bulk inserts, only used for legacy
+ bookkeeping = True
+ elif len(mappers_to_run) > 1:
+ # if we have more than one table, mapper to run where we will be
+ # either horizontally splicing, or copying values between tables,
+ # we need the "bookkeeping" / deterministic returning order
+ bookkeeping = True
+ else:
+ bookkeeping = False
- is_joined_inh_supertable = super_mapper is not mapper
- bookkeeping = (
- is_joined_inh_supertable
- or return_defaults
- or (
- use_orm_insert_stmt is not None
- and bool(use_orm_insert_stmt._returning)
- )
- )
+ for table, super_mapper in mappers_to_run:
records = (
(
@@ -173,6 +178,7 @@ def _bulk_insert(
render_nulls=render_nulls,
)
)
+
result = persistence._emit_insert_statements(
base_mapper,
None,
@@ -187,6 +193,7 @@ def _bulk_insert(
if not use_orm_insert_stmt._returning or return_result is None:
return_result = result
elif result.returns_rows:
+ assert bookkeeping
return_result = return_result.splice_horizontally(result)
if return_defaults and isstates:
@@ -507,9 +514,11 @@ class ORMDMLState(AbstractORMCompileState):
dml_level_statement = dml_level_statement.return_defaults(
# this is a little weird looking, but by passing
# primary key as the main list of cols, this tells
- # return_defaults to omit server-default cols. Since
- # we have cols_to_return, just return what we asked for
- # (plus primary key, which ORM persistence needs since
+ # return_defaults to omit server-default cols (and
+ # actually all cols, due to some weird thing we should
+ # clean up in crud.py).
+ # Since we have cols_to_return, just return what we asked
+ # for (plus primary key, which ORM persistence needs since
# we likely set bookkeeping=True here, which is another
# whole thing...). We dont want to clutter the
# statement up with lots of other cols the user didn't
diff --git a/lib/sqlalchemy/orm/decl_api.py b/lib/sqlalchemy/orm/decl_api.py
index ed001023b..2f8289acf 100644
--- a/lib/sqlalchemy/orm/decl_api.py
+++ b/lib/sqlalchemy/orm/decl_api.py
@@ -241,6 +241,7 @@ class _declared_attr_common:
self,
fn: Callable[..., Any],
cascading: bool = False,
+ quiet: bool = False,
):
# suppport
# @declared_attr
@@ -254,6 +255,7 @@ class _declared_attr_common:
self.fget = fn
self._cascading = cascading
+ self._quiet = quiet
self.__doc__ = fn.__doc__
def _collect_return_annotation(self) -> Optional[Type[Any]]:
diff --git a/lib/sqlalchemy/orm/decl_base.py b/lib/sqlalchemy/orm/decl_base.py
index beede0ddb..b7d6dd8cf 100644
--- a/lib/sqlalchemy/orm/decl_base.py
+++ b/lib/sqlalchemy/orm/decl_base.py
@@ -459,6 +459,7 @@ class _ClassScanMapperConfig(_MapperConfig):
"mapper_args",
"mapper_args_fn",
"inherits",
+ "single",
"allow_dataclass_fields",
"dataclass_setup_arguments",
"is_dataclass_prior_to_mapping",
@@ -483,6 +484,7 @@ class _ClassScanMapperConfig(_MapperConfig):
table_args: Optional[_TableArgsType]
mapper_args_fn: Optional[Callable[[], Dict[str, Any]]]
inherits: Optional[Type[Any]]
+ single: bool
is_dataclass_prior_to_mapping: bool
allow_unmapped_annotations: bool
@@ -527,7 +529,7 @@ class _ClassScanMapperConfig(_MapperConfig):
self.declared_columns = util.OrderedSet()
self.column_ordering = {}
self.column_copies = {}
-
+ self.single = False
self.dataclass_setup_arguments = dca = getattr(
self.cls, "_sa_apply_dc_transforms", None
)
@@ -866,7 +868,7 @@ class _ClassScanMapperConfig(_MapperConfig):
# should only be __table__
continue
elif class_mapped:
- if _is_declarative_props(obj):
+ if _is_declarative_props(obj) and not obj._quiet:
util.warn(
"Regular (i.e. not __special__) "
"attribute '%s.%s' uses @declared_attr, "
@@ -1783,6 +1785,10 @@ class _ClassScanMapperConfig(_MapperConfig):
self.inherits = inherits
+ clsdict_view = self.clsdict_view
+ if "__table__" not in clsdict_view and self.tablename is None:
+ self.single = True
+
def _setup_inheriting_columns(self, mapper_kw: _MapperKwArgs) -> None:
table = self.local_table
cls = self.cls
@@ -1809,6 +1815,7 @@ class _ClassScanMapperConfig(_MapperConfig):
)
if table is None:
+
# single table inheritance.
# ensure no table args
if table_args:
diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py
index 81b66de03..731983ff4 100644
--- a/lib/sqlalchemy/orm/mapper.py
+++ b/lib/sqlalchemy/orm/mapper.py
@@ -3273,6 +3273,9 @@ class Mapper(
"""
+ if column is not None and sql_base._never_select_column(column):
+ return True
+
# check for class-bound attributes and/or descriptors,
# either local or from an inherited class
# ignore dataclass field default values
diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py
index a12156eb5..1af55df00 100644
--- a/lib/sqlalchemy/orm/persistence.py
+++ b/lib/sqlalchemy/orm/persistence.py
@@ -955,8 +955,13 @@ def _emit_insert_statements(
# if a user query with RETURNING was passed, we definitely need
# to use RETURNING.
returning_is_required_anyway = bool(use_orm_insert_stmt._returning)
+ deterministic_results_reqd = (
+ returning_is_required_anyway
+ and use_orm_insert_stmt._sort_by_parameter_order
+ ) or bookkeeping
else:
returning_is_required_anyway = False
+ deterministic_results_reqd = bookkeeping
cached_stmt = base_mapper._memo(("insert", table), table.insert)
exec_opt = {"compiled_cache": base_mapper._compiled_cache}
@@ -1057,22 +1062,31 @@ def _emit_insert_statements(
# know that we are using RETURNING in any case
records = list(records)
- if (
- not hasvalue
- and connection.dialect.insert_executemany_returning
- and len(records) > 1
+
+ if returning_is_required_anyway or (
+ not hasvalue and len(records) > 1
):
- do_executemany = True
- elif returning_is_required_anyway:
- if connection.dialect.insert_executemany_returning:
+ if (
+ deterministic_results_reqd
+ and connection.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501
+ ) or (
+ not deterministic_results_reqd
+ and connection.dialect.insert_executemany_returning
+ ):
do_executemany = True
- else:
+ elif returning_is_required_anyway:
+ if deterministic_results_reqd:
+ dt = " with RETURNING and sort by parameter order"
+ else:
+ dt = " with RETURNING"
raise sa_exc.InvalidRequestError(
f"Can't use explicit RETURNING for bulk INSERT "
f"operation with "
f"{connection.dialect.dialect_description} backend; "
- f"executemany is not supported with RETURNING"
+ f"executemany{dt} is not enabled for this dialect."
)
+ else:
+ do_executemany = False
else:
do_executemany = False
@@ -1084,13 +1098,19 @@ def _emit_insert_statements(
)
):
statement = statement.return_defaults(
- *mapper._server_default_cols[table]
+ *mapper._server_default_cols[table],
+ sort_by_parameter_order=bookkeeping,
)
if mapper.version_id_col is not None:
- statement = statement.return_defaults(mapper.version_id_col)
+ statement = statement.return_defaults(
+ mapper.version_id_col,
+ sort_by_parameter_order=bookkeeping,
+ )
elif do_executemany:
- statement = statement.return_defaults(*table.primary_key)
+ statement = statement.return_defaults(
+ *table.primary_key, sort_by_parameter_order=bookkeeping
+ )
if do_executemany:
multiparams = [rec[2] for rec in records]
diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py
index f00775874..916b9d901 100644
--- a/lib/sqlalchemy/orm/properties.py
+++ b/lib/sqlalchemy/orm/properties.py
@@ -665,7 +665,11 @@ class MappedColumn(
) -> None:
column = self.column
- if self._use_existing_column and decl_scan.inherits:
+ if (
+ self._use_existing_column
+ and decl_scan.inherits
+ and decl_scan.single
+ ):
if decl_scan.is_deferred:
raise sa_exc.ArgumentError(
"Can't use use_existing_column with deferred mappers"