diff options
author | Federico Caselli <cfederico87@gmail.com> | 2023-04-25 13:47:04 +0200 |
---|---|---|
committer | Federico Caselli <cfederico87@gmail.com> | 2023-04-28 20:38:24 +0200 |
commit | f45f4a3afc3c260d50773c647eb7b1c270bb8e00 (patch) | |
tree | 5486c2a26cc7e868161c97a326cd20291f305a18 | |
parent | 0596adcc27e4e14c4692a58cd32d39df6f48b09a (diff) | |
download | sqlalchemy-f45f4a3afc3c260d50773c647eb7b1c270bb8e00.tar.gz |
Improve oracle index reflection
Added reflection support in the Oracle dialect to expression based indexes
and the ordering direction of index expressions.
Fixes: #9597
Change-Id: I40e163496789774e9930f46823d2208c35eab6f8
-rw-r--r-- | doc/build/changelog/unreleased_20/9597.rst | 14 | ||||
-rw-r--r-- | lib/sqlalchemy/dialects/oracle/base.py | 66 | ||||
-rw-r--r-- | lib/sqlalchemy/dialects/oracle/dictionary.py | 11 | ||||
-rw-r--r-- | lib/sqlalchemy/engine/interfaces.py | 5 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/base.py | 20 | ||||
-rw-r--r-- | lib/sqlalchemy/testing/requirements.py | 6 | ||||
-rw-r--r-- | lib/sqlalchemy/testing/suite/test_reflection.py | 30 | ||||
-rw-r--r-- | test/dialect/oracle/test_reflection.py | 112 | ||||
-rw-r--r-- | test/dialect/postgresql/test_reflection.py | 3 | ||||
-rw-r--r-- | test/perf/many_table_reflection.py | 68 | ||||
-rw-r--r-- | test/requirements.py | 12 |
11 files changed, 270 insertions, 77 deletions
diff --git a/doc/build/changelog/unreleased_20/9597.rst b/doc/build/changelog/unreleased_20/9597.rst new file mode 100644 index 000000000..f8a69d92b --- /dev/null +++ b/doc/build/changelog/unreleased_20/9597.rst @@ -0,0 +1,14 @@ +.. change:: + :tags: oracle, reflection + :tickets: 9597 + + Added reflection support in the Oracle dialect to expression based indexes + and the ordering direction of index expressions. + +.. change:: + :tags: performance, schema + :tickets: 9597 + + Improved how table columns are added, avoiding unnecessary allocations, + significantly speeding up the creation of many table, like when reflecting + entire schemas. diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py index a3e724cbe..d20175b0a 100644 --- a/lib/sqlalchemy/dialects/oracle/base.py +++ b/lib/sqlalchemy/dialects/oracle/base.py @@ -2304,6 +2304,8 @@ class OracleDialect(default.DefaultDialect): else: return value + remove_size = re.compile(r"\(\d+\)") + for row_dict in result: table_name = self.normalize_name(row_dict["table_name"]) orig_colname = row_dict["column_name"] @@ -2339,7 +2341,7 @@ class OracleDialect(default.DefaultDialect): elif "WITH LOCAL TIME ZONE" in coltype: coltype = TIMESTAMP(local_timezone=True) else: - coltype = re.sub(r"\(\d+\)", "", coltype) + coltype = re.sub(remove_size, "", coltype) try: coltype = self.ischema_names[coltype] except KeyError: @@ -2557,6 +2559,8 @@ class OracleDialect(default.DefaultDialect): dictionary.all_indexes.c.uniqueness, dictionary.all_indexes.c.compression, dictionary.all_indexes.c.prefix_length, + dictionary.all_ind_columns.c.descend, + dictionary.all_ind_expressions.c.column_expression, ) .select_from(dictionary.all_ind_columns) .join( @@ -2564,17 +2568,30 @@ class OracleDialect(default.DefaultDialect): sql.and_( dictionary.all_ind_columns.c.index_name == dictionary.all_indexes.c.index_name, - dictionary.all_ind_columns.c.table_owner - == dictionary.all_indexes.c.table_owner, - # NOTE: this condition on table_name is not required - # but it improves the query performance noticeably - dictionary.all_ind_columns.c.table_name - == dictionary.all_indexes.c.table_name, + dictionary.all_ind_columns.c.index_owner + == dictionary.all_indexes.c.owner, + ), + ) + .outerjoin( + # NOTE: this adds about 20% to the query time. Using a + # case expression with a scalar subquery only when needed + # with the assumption that most indexes are not expression + # would be faster but oracle does not like that with + # LONG datatype. It errors with: + # ORA-00997: illegal use of LONG datatype + dictionary.all_ind_expressions, + sql.and_( + dictionary.all_ind_expressions.c.index_name + == dictionary.all_ind_columns.c.index_name, + dictionary.all_ind_expressions.c.index_owner + == dictionary.all_ind_columns.c.index_owner, + dictionary.all_ind_expressions.c.column_position + == dictionary.all_ind_columns.c.column_position, ), ) .where( - dictionary.all_ind_columns.c.table_owner == owner, - dictionary.all_ind_columns.c.table_name.in_( + dictionary.all_indexes.c.table_owner == owner, + dictionary.all_indexes.c.table_name.in_( bindparam("all_objects") ), ) @@ -2604,11 +2621,12 @@ class OracleDialect(default.DefaultDialect): if row_dict["constraint_type"] == "P" } + # all_ind_expressions.column_expression is LONG result = self._run_batches( connection, query, dblink, - returns_long=False, + returns_long=True, mappings=True, all_objects=all_objects, ) @@ -2642,8 +2660,6 @@ class OracleDialect(default.DefaultDialect): enabled = {"DISABLED": False, "ENABLED": True} is_bitmap = {"BITMAP", "FUNCTION-BASED BITMAP"} - oracle_sys_col = re.compile(r"SYS_NC\d+\$", re.IGNORECASE) - indexes = defaultdict(dict) for row_dict in self._get_indexes_rows( @@ -2669,13 +2685,25 @@ class OracleDialect(default.DefaultDialect): else: index_dict = table_indexes[index_name] - # filter out Oracle SYS_NC names. could also do an outer join - # to the all_tab_columns table and check for real col names - # there. - if not oracle_sys_col.match(row_dict["column_name"]): - index_dict["column_names"].append( - self.normalize_name(row_dict["column_name"]) - ) + expr = row_dict["column_expression"] + if expr is not None: + index_dict["column_names"].append(None) + if "expressions" in index_dict: + index_dict["expressions"].append(expr) + else: + index_dict["expressions"] = index_dict["column_names"][:-1] + index_dict["expressions"].append(expr) + + if row_dict["descend"].lower() != "asc": + assert row_dict["descend"].lower() == "desc" + cs = index_dict.setdefault("column_sorting", {}) + cs[expr] = ("desc",) + else: + assert row_dict["descend"].lower() == "asc" + cn = self.normalize_name(row_dict["column_name"]) + index_dict["column_names"].append(cn) + if "expressions" in index_dict: + index_dict["expressions"].append(cn) default = ReflectionDefaults.indexes diff --git a/lib/sqlalchemy/dialects/oracle/dictionary.py b/lib/sqlalchemy/dialects/oracle/dictionary.py index f79501072..fdf47ef31 100644 --- a/lib/sqlalchemy/dialects/oracle/dictionary.py +++ b/lib/sqlalchemy/dialects/oracle/dictionary.py @@ -393,6 +393,17 @@ all_indexes = Table( Column("auto", VARCHAR2(3)), ).alias("a_indexes") +all_ind_expressions = Table( + "all_ind_expressions" + DB_LINK_PLACEHOLDER, + dictionary_meta, + Column("index_owner", VARCHAR2(128), nullable=False), + Column("index_name", VARCHAR2(128), nullable=False), + Column("table_owner", VARCHAR2(128), nullable=False), + Column("table_name", VARCHAR2(128), nullable=False), + Column("column_expression", LONG), + Column("column_position", NUMBER, nullable=False), +).alias("a_ind_expressions") + all_constraints = Table( "all_constraints" + DB_LINK_PLACEHOLDER, dictionary_meta, diff --git a/lib/sqlalchemy/engine/interfaces.py b/lib/sqlalchemy/engine/interfaces.py index e4914551c..e9e1d8ced 100644 --- a/lib/sqlalchemy/engine/interfaces.py +++ b/lib/sqlalchemy/engine/interfaces.py @@ -549,8 +549,9 @@ class ReflectedIndex(TypedDict): """ column_sorting: NotRequired[Dict[str, Tuple[str]]] - """optional dict mapping column names to tuple of sort keywords, - which may include ``asc``, ``desc``, ``nulls_first``, ``nulls_last``. + """optional dict mapping column names or expressions to tuple of sort + keywords, which may include ``asc``, ``desc``, ``nulls_first``, + ``nulls_last``. .. versionadded:: 1.3.5 """ diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 309555338..ee80b0514 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -1962,13 +1962,16 @@ class DedupeColumnCollection(ColumnCollection[str, _NAMEDCOL]): # in a _make_proxy operation util.memoized_property.reset(named_column, "proxy_set") else: - l = len(self._collection) - self._collection.append( - (key, named_column, _ColumnMetrics(self, named_column)) - ) - self._colset.add(named_column._deannotate()) - self._index[l] = (key, named_column) - self._index[key] = (key, named_column) + self._append_new_column(key, named_column) + + def _append_new_column(self, key: str, named_column: _NAMEDCOL) -> None: + l = len(self._collection) + self._collection.append( + (key, named_column, _ColumnMetrics(self, named_column)) + ) + self._colset.add(named_column._deannotate()) + self._index[l] = (key, named_column) + self._index[key] = (key, named_column) def _populate_separate_keys( self, iter_: Iterable[Tuple[str, _NAMEDCOL]] @@ -2057,6 +2060,9 @@ class DedupeColumnCollection(ColumnCollection[str, _NAMEDCOL]): if column.key in self._index: remove_col.add(self._index[column.key][1]) + if not remove_col: + self._append_new_column(column.key, column) + return new_cols: List[Tuple[str, _NAMEDCOL, _ColumnMetrics[_NAMEDCOL]]] = [] replaced = False for k, col, metrics in self._collection: diff --git a/lib/sqlalchemy/testing/requirements.py b/lib/sqlalchemy/testing/requirements.py index b59cce374..7286cd81b 100644 --- a/lib/sqlalchemy/testing/requirements.py +++ b/lib/sqlalchemy/testing/requirements.py @@ -763,6 +763,12 @@ class SuiteRequirements(Requirements): return exclusions.open() @property + def reflect_indexes_with_ascdesc_as_expression(self): + """target database supports reflecting INDEX with per-column + ASC/DESC but reflects them as expressions (like oracle).""" + return exclusions.closed() + + @property def indexes_with_expressions(self): """target database supports CREATE INDEX against SQL expressions.""" return exclusions.closed() diff --git a/lib/sqlalchemy/testing/suite/test_reflection.py b/lib/sqlalchemy/testing/suite/test_reflection.py index 5927df065..8c26c265b 100644 --- a/lib/sqlalchemy/testing/suite/test_reflection.py +++ b/lib/sqlalchemy/testing/suite/test_reflection.py @@ -1109,6 +1109,10 @@ class ComponentReflectionTest(ComparesTables, OneConnectionTablesTest): ): fk_req = testing.requires.foreign_keys_reflect_as_index dup_req = testing.requires.unique_constraints_reflect_as_index + sorting_expression = ( + testing.requires.reflect_indexes_with_ascdesc_as_expression + ) + if (fk and not fk_req.enabled) or ( duplicates and not dup_req.enabled ): @@ -1121,7 +1125,13 @@ class ComponentReflectionTest(ComparesTables, OneConnectionTablesTest): "include_columns": [], } if column_sorting: - res["column_sorting"] = {"q": ("desc",)} + res["column_sorting"] = column_sorting + if sorting_expression.enabled: + res["expressions"] = orig = res["column_names"] + res["column_names"] = [ + None if c in column_sorting else c for c in orig + ] + if duplicates: res["duplicates_constraint"] = name return [res] @@ -2065,6 +2075,15 @@ class ComponentReflectionTest(ComparesTables, OneConnectionTablesTest): insp.clear_cache() eq_(insp.get_multi_table_comment(**kw), exp) + def _check_expressions(self, result, exp, err_msg): + def _clean(text: str): + return re.sub(r"['\" ]", "", text).lower() + + if isinstance(exp, dict): + eq_({_clean(e): v for e, v in result.items()}, exp, err_msg) + else: + eq_([_clean(e) for e in result], exp, err_msg) + def _check_list(self, result, exp, req_keys=None, msg=None): if req_keys is None: eq_(result, exp, msg) @@ -2073,7 +2092,11 @@ class ComponentReflectionTest(ComparesTables, OneConnectionTablesTest): for r, e in zip(result, exp): for k in set(r) | set(e): if k in req_keys or (k in r and k in e): - eq_(r[k], e[k], f"{msg} - {k} - {r}") + err_msg = f"{msg} - {k} - {r}" + if k in ("expressions", "column_sorting"): + self._check_expressions(r[k], e[k], err_msg) + else: + eq_(r[k], e[k], err_msg) def _check_table_dict(self, result, exp, req_keys=None, make_lists=False): eq_(set(result.keys()), set(exp.keys())) @@ -2427,8 +2450,9 @@ class ComponentReflectionTestExtra(ComparesIndexes, fixtures.TestBase): class lower_index_str(str): def __eq__(self, other): + ol = other.lower() # test that lower and x or y are in the string - return "lower" in other and ("x" in other or "y" in other) + return "lower" in ol and ("x" in ol or "y" in ol) class coalesce_index_str(str): def __eq__(self, other): diff --git a/test/dialect/oracle/test_reflection.py b/test/dialect/oracle/test_reflection.py index ac58d3694..6c6f1f21f 100644 --- a/test/dialect/oracle/test_reflection.py +++ b/test/dialect/oracle/test_reflection.py @@ -5,7 +5,6 @@ from sqlalchemy import FLOAT from sqlalchemy import Float from sqlalchemy import ForeignKey from sqlalchemy import ForeignKeyConstraint -from sqlalchemy import func from sqlalchemy import Identity from sqlalchemy import Index from sqlalchemy import inspect @@ -982,36 +981,109 @@ class RoundTripIndexTest(fixtures.TestBase): ) def test_reflect_fn_index(self, metadata, connection): - """test reflection of a functional index. + """test reflection of a functional index.""" - it appears this emitted a warning at some point but does not right now. - the returned data is not exactly correct, but this is what it's - likely been doing for many years. + Table( + "sometable", + metadata, + Column("group", Unicode(255)), + Column("col", Unicode(255)), + Column("other", Unicode(255), index=True), + ) + metadata.create_all(connection) + connection.exec_driver_sql( + """create index idx3 on sometable( + lower("group"), other, upper(other))""" + ) + connection.exec_driver_sql( + """create index idx1 on sometable + (("group" || col), col || other desc)""" + ) + connection.exec_driver_sql( + """ + create unique index idx2 on sometable + (col desc, lower(other), "group" asc) + """ + ) - """ + expected = [ + { + "name": "idx1", + "column_names": [None, None], + "expressions": ['"group"||"COL"', '"COL"||"OTHER"'], + "unique": False, + "dialect_options": {}, + "column_sorting": {'"COL"||"OTHER"': ("desc",)}, + }, + { + "name": "idx2", + "column_names": [None, None, "group"], + "expressions": ['"COL"', 'LOWER("OTHER")', "group"], + "unique": True, + "column_sorting": {'"COL"': ("desc",)}, + "dialect_options": {}, + }, + { + "name": "idx3", + "column_names": [None, "other", None], + "expressions": [ + 'LOWER("group")', + "other", + 'UPPER("OTHER")', + ], + "unique": False, + "dialect_options": {}, + }, + { + "name": "ix_sometable_other", + "column_names": ["other"], + "unique": False, + "dialect_options": {}, + }, + ] + + eq_(inspect(connection).get_indexes("sometable"), expected) + def test_indexes_asc_desc(self, metadata, connection): s_table = Table( "sometable", metadata, - Column("group", Unicode(255), primary_key=True), + Column("a", Unicode(255), primary_key=True), + Column("b", Unicode(255)), + Column("group", Unicode(255)), Column("col", Unicode(255)), ) - - Index("data_idx", func.upper(s_table.c.col)) + Index("id1", s_table.c.b.asc()) + Index("id2", s_table.c.col.desc()) + Index("id3", s_table.c.b.asc(), s_table.c.group.desc()) metadata.create_all(connection) - eq_( - inspect(connection).get_indexes("sometable"), - [ - { - "column_names": [], - "dialect_options": {}, - "name": "data_idx", - "unique": False, - } - ], - ) + expected = [ + { + "name": "id1", + "column_names": ["b"], + "unique": False, + "dialect_options": {}, + }, + { + "name": "id2", + "column_names": [None], + "expressions": ['"COL"'], + "unique": False, + "column_sorting": {'"COL"': ("desc",)}, + "dialect_options": {}, + }, + { + "name": "id3", + "column_names": ["b", None], + "expressions": ["b", '"group"'], + "unique": False, + "column_sorting": {'"group"': ("desc",)}, + "dialect_options": {}, + }, + ] + eq_(inspect(connection).get_indexes("sometable"), expected) def test_basic(self, metadata, connection): diff --git a/test/dialect/postgresql/test_reflection.py b/test/dialect/postgresql/test_reflection.py index 4fb1f8e70..eacb4b149 100644 --- a/test/dialect/postgresql/test_reflection.py +++ b/test/dialect/postgresql/test_reflection.py @@ -1166,7 +1166,7 @@ class ReflectionTest( connection.exec_driver_sql( """ create index idx3 on party - (lower(name::text), other, lower(aname::text)) + (lower(name::text), other, lower(aname::text) desc) """ ) connection.exec_driver_sql( @@ -1216,6 +1216,7 @@ class ReflectionTest( "unique": False, "include_columns": [], "dialect_options": {"postgresql_include": []}, + "column_sorting": {"lower(aname::text)": ("desc",)}, }, { "name": "idx4", diff --git a/test/perf/many_table_reflection.py b/test/perf/many_table_reflection.py index 804419f28..8fb654bbe 100644 --- a/test/perf/many_table_reflection.py +++ b/test/perf/many_table_reflection.py @@ -27,16 +27,15 @@ def generate_table(meta: sa.MetaData, min_cols, max_cols, dialect_name): cols = [] for i in range(col_number - (0 if is_mssql else add_identity)): args = [] - if random.random() < 0.95 or table_num == 0: + if random.random() < 0.99 or table_num == 0: if is_mssql and add_identity and i == 0: args.append(sa.Integer) args.append(identity) else: args.append(random.choice(types)) else: - args.append( - sa.ForeignKey(f"table_{table_num-1}.table_{table_num-1}_col_1") - ) + target = random.randint(0, table_num - 1) + args.append(sa.ForeignKey(f"table_{target}.table_{target}_col_1")) cols.append( sa.Column( f"table_{table_num}_col_{i+1}", @@ -45,8 +44,8 @@ def generate_table(meta: sa.MetaData, min_cols, max_cols, dialect_name): comment=f"primary key of table_{table_num}" if i == 0 else None, - index=random.random() > 0.9 and i > 0, - unique=random.random() > 0.95 and i > 0, + index=random.random() > 0.97 and i > 0, + unique=random.random() > 0.97 and i > 0, ) ) if add_identity and not is_mssql: @@ -131,6 +130,19 @@ def create_tables(engine, meta): meta.create_all(engine, tables[i : i + 500]) +def _drop_ddl(name, schema_name, dialect_name): + if dialect_name.startswith("postgres"): + suffix = "CASCADE" + elif dialect_name.startswith("oracle"): + suffix = "CASCADE CONSTRAINTS PURGE" + else: + suffix = "" + if schema_name: + return sa.schema.DDL(f"DROP TABLE {schema_name}.{name} {suffix}") + else: + return sa.schema.DDL(f"DROP TABLE {name} {suffix}") + + @log def drop_tables(engine, meta, schema_name, table_names: list): tables = list(meta.tables.values())[::-1] @@ -138,10 +150,6 @@ def drop_tables(engine, meta, schema_name, table_names: list): meta.drop_all(engine, tables[i : i + 500]) remaining = sa.inspect(engine).get_table_names(schema=schema_name) - suffix = "" - if engine.dialect.name.startswith("postgres"): - suffix = "CASCADE" - remaining = sorted( remaining, key=lambda tn: int(tn.partition("_")[2]), reverse=True ) @@ -151,14 +159,7 @@ def drop_tables(engine, meta, schema_name, table_names: list): name = engine.dialect.denormalize_name(tn) else: name = tn - if schema_name: - conn.execute( - sa.schema.DDL( - f'DROP TABLE {schema_name}."{name}" {suffix}' - ) - ) - else: - conn.execute(sa.schema.DDL(f'DROP TABLE "{name}" {suffix}')) + conn.execute(_drop_ddl(name, schema_name, engine.dialect.name)) if i % 500 == 0: conn.commit() conn.commit() @@ -454,6 +455,9 @@ def main(db, schema_name, table_number, min_cols, max_cols, args): else: engine = sa.create_engine(db, echo=args.echo, future=True) + if args.drop_all: + return drop_all(engine, schema_name) + if engine.name == "oracle": # clear out oracle caches so that we get the real-world time the # queries would normally take for scripts that aren't run repeatedly @@ -537,6 +541,25 @@ def timer(): return track_time +def drop_all(engine, schema_name): + with engine.connect() as conn: + table_names = engine.dialect.get_table_names(conn, schema=schema_name) + print(f"Dropping {len(table_names)} tables") + dn = engine.dialect.name + i = 0 + while table_names: + name = table_names.pop() + try: + conn.execute(_drop_ddl(name, schema_name, dn)) + conn.commit() + except Exception: + conn.rollback() + table_names.insert(0, name) + i += 1 + if i % 25 == 0: + print(f"Still running. Tables left {len(table_names)}") + + if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( @@ -572,7 +595,9 @@ if __name__ == "__main__": parser.add_argument( "--no-drop", help="Do not run drop tables", action="store_true" ) - parser.add_argument("--reflect", help="Run reflect", action="store_true") + parser.add_argument( + "--reflect", help="Run metadata reflect", action="store_true" + ) parser.add_argument( "--test", help="Run these tests. 'all' runs all tests", @@ -608,6 +633,11 @@ if __name__ == "__main__": "using single reflections. Mainly for sqlite.", ) parser.add_argument("--pool-class", help="The pool class to use") + parser.add_argument( + "--drop-all", + action="store_true", + help="Drop all tables, do nothing else", + ) args = parser.parse_args() min_cols = args.min_cols diff --git a/test/requirements.py b/test/requirements.py index 3c72cd07d..2023428b8 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -96,10 +96,6 @@ class DefaultRequirements(SuiteRequirements): ) @property - def reflect_indexes_with_ascdesc(self): - return fails_if(["oracle"]) - - @property def table_ddl_if_exists(self): """target platform supports IF NOT EXISTS / IF EXISTS for tables.""" @@ -603,11 +599,15 @@ class DefaultRequirements(SuiteRequirements): @property def indexes_with_expressions(self): - return only_on(["postgresql", "sqlite>=3.9.0"]) + return only_on(["postgresql", "sqlite>=3.9.0", "oracle"]) @property def reflect_indexes_with_expressions(self): - return only_on(["postgresql"]) + return only_on(["postgresql", "oracle"]) + + @property + def reflect_indexes_with_ascdesc_as_expression(self): + return only_on(["oracle"]) @property def temp_table_names(self): |