diff options
author | Zuul <zuul@review.opendev.org> | 2021-08-31 20:14:46 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2021-08-31 20:14:46 +0000 |
commit | 1c1e963bc96468aaccd4907e0fe2c3ec9dbd17c0 (patch) | |
tree | 5272c88d8b28332f05f8916e8d2c299de02f544d | |
parent | d024333b80c83c490ffda2d8b412baffb7ace637 (diff) | |
parent | 8a858eca55544f25427c3762f14523b1507261ee (diff) | |
download | nova-1c1e963bc96468aaccd4907e0fe2c3ec9dbd17c0.tar.gz |
Merge "Dynamically archive FK related records in archive_deleted_rows" into stable/ussuri
-rw-r--r-- | nova/db/sqlalchemy/api.py | 221 | ||||
-rw-r--r-- | nova/tests/functional/db/test_archive.py | 8 | ||||
-rw-r--r-- | nova/tests/unit/db/test_db_api.py | 36 |
3 files changed, 170 insertions, 95 deletions
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index af1c7c3d6a..1fa7f5a322 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -4080,64 +4080,132 @@ def task_log_end_task(context, task_name, period_beginning, period_ending, ################## -def _archive_if_instance_deleted(table, shadow_table, instances, conn, - max_rows, before): - """Look for records that pertain to deleted instances, but may not be - deleted themselves. This catches cases where we delete an instance, - but leave some residue because of a failure in a cleanup path or - similar. - - Logic is: if I have a column called instance_uuid, and that instance - is deleted, then I can be deleted. - """ - - # NOTE(jake): handle instance_actions_events differently as it relies on - # instance_actions.id not instances.uuid - if table.name == "instance_actions_events": - instance_actions = models.BASE.metadata.tables["instance_actions"] - query_select = sql.select( - [table], - and_(instances.c.deleted != instances.c.deleted.default.arg, - instances.c.uuid == instance_actions.c.instance_uuid, - instance_actions.c.id == table.c.action_id)) +def _get_tables_with_fk_to_table(table): + """Get a list of tables that refer to the given table by foreign key (FK). - else: - query_select = sql.select( - [table], - and_(instances.c.deleted != instances.c.deleted.default.arg, - instances.c.uuid == table.c.instance_uuid)) - - if before: - query_select = query_select.where(instances.c.deleted_at < before) + :param table: Table object (parent) for which to find references by FK - query_select = query_select.order_by(table.c.id).limit(max_rows) - - query_insert = shadow_table.insert(inline=True).\ - from_select([c.name for c in table.c], query_select) - - delete_statement = DeleteFromSelect(table, query_select, - table.c.id) + :returns: A list of Table objects that refer to the specified table by FK + """ + tables = [] + for t in models.BASE.metadata.tables.values(): + for fk in t.foreign_keys: + if fk.references(table): + tables.append(t) + return tables + + +def _get_fk_stmts(metadata, conn, table, column, records): + """Find records related to this table by foreign key (FK) and create and + return insert/delete statements for them. + + Logic is: find the tables that reference the table passed to this method + and walk the tree of references by FK. As child records are found, prepend + them to deques to execute later in a single database transaction (to avoid + orphaning related records if any one insert/delete fails or the archive + process is otherwise interrupted). + + :param metadata: Metadata object to use to construct a shadow Table object + :param conn: Connection object to use to select records related by FK + :param table: Table object (parent) for which to find references by FK + :param column: Column object (parent) to use to select records related by + FK + :param records: A list of records (column values) to use to select records + related by FK + + :returns: tuple of (insert statements, delete statements) for records + related by FK to insert into shadow tables and delete from main tables + """ + inserts = collections.deque() + deletes = collections.deque() + fk_tables = _get_tables_with_fk_to_table(table) + for fk_table in fk_tables: + # Create the shadow table for the referencing table. + fk_shadow_tablename = _SHADOW_TABLE_PREFIX + fk_table.name + try: + fk_shadow_table = Table(fk_shadow_tablename, metadata, + autoload=True) + except NoSuchTableError: + # No corresponding shadow table; skip it. + continue - try: - with conn.begin(): - conn.execute(query_insert) - result_delete = conn.execute(delete_statement) - return result_delete.rowcount - except db_exc.DBReferenceError as ex: - LOG.warning('Failed to archive %(table)s: %(error)s', - {'table': table.name, - 'error': six.text_type(ex)}) - return 0 + # TODO(stephenfin): Drop this when we drop the table + if fk_table.name == "dns_domains": + # We have one table (dns_domains) where the key is called + # "domain" rather than "id" + fk_column = fk_table.c.domain + else: + fk_column = fk_table.c.id + + for fk in fk_table.foreign_keys: + # We need to find the records in the referring (child) table that + # correspond to the records in our (parent) table so we can archive + # them. + + # First, select the column in the parent referenced by the child + # table that corresponds to the parent table records that were + # passed in. + # Example: table = 'instances' and fk_table = 'instance_extra' + # fk.parent = instance_extra.instance_uuid + # fk.column = instances.uuid + # SELECT instances.uuid FROM instances, instance_extra + # WHERE instance_extra.instance_uuid = instances.uuid + # AND instance.id IN (<ids>) + # We need the instance uuids for the <ids> in order to + # look up the matching instance_extra records. + select = sql.select([fk.column]).where( + sql.and_(fk.parent == fk.column, column.in_(records))) + rows = conn.execute(select).fetchall() + p_records = [r[0] for r in rows] + # Then, select rows in the child table that correspond to the + # parent table records that were passed in. + # Example: table = 'instances' and fk_table = 'instance_extra' + # fk.parent = instance_extra.instance_uuid + # fk.column = instances.uuid + # SELECT instance_extra.id FROM instance_extra, instances + # WHERE instance_extra.instance_uuid = instances.uuid + # AND instances.uuid IN (<uuids>) + # We will get the instance_extra ids we need to archive + # them. + fk_select = sql.select([fk_column]).where( + sql.and_(fk.parent == fk.column, fk.column.in_(p_records))) + fk_rows = conn.execute(fk_select).fetchall() + fk_records = [r[0] for r in fk_rows] + if fk_records: + # If we found any records in the child table, create shadow + # table insert statements for them and prepend them to the + # deque. + fk_columns = [c.name for c in fk_table.c] + fk_insert = fk_shadow_table.insert(inline=True).\ + from_select(fk_columns, sql.select([fk_table], + fk_column.in_(fk_records))) + inserts.appendleft(fk_insert) + # Create main table delete statements and prepend them to the + # deque. + fk_delete = fk_table.delete().where(fk_column.in_(fk_records)) + deletes.appendleft(fk_delete) + # Repeat for any possible nested child tables. + i, d = _get_fk_stmts(metadata, conn, fk_table, fk_column, fk_records) + inserts.extendleft(i) + deletes.extendleft(d) + + return inserts, deletes def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): """Move up to max_rows rows from one tables to the corresponding shadow table. - :returns: 2-item tuple: + Will also follow FK constraints and archive all referring rows. + Example: archving a record from the 'instances' table will also archive + the 'instance_extra' record before archiving the 'instances' record. + + :returns: 3-item tuple: - number of rows archived - list of UUIDs of instances that were archived + - number of extra rows archived (due to FK constraints) + dict of {tablename: rows_archived} """ conn = metadata.bind.connect() # NOTE(tdurakov): table metadata should be received @@ -4153,7 +4221,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): shadow_table = Table(shadow_tablename, metadata, autoload=True) except NoSuchTableError: # No corresponding shadow table; skip it. - return rows_archived, deleted_instance_uuids + return rows_archived, deleted_instance_uuids, {} # TODO(stephenfin): Drop this when we drop the table if tablename == "dns_domains": @@ -4176,10 +4244,29 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): rows = conn.execute(select).fetchall() records = [r[0] for r in rows] + # We will archive deleted rows for this table and also generate insert and + # delete statements for extra rows we may archive by following FK + # relationships. Because we are iterating over the sorted_tables (list of + # Table objects sorted in order of foreign key dependency), new inserts and + # deletes ("leaves") will be added to the fronts of the deques created in + # _get_fk_stmts. This way, we make sure we delete child table records + # before we delete their parent table records. + + # Keep track of any extra tablenames to number of rows that we archive by + # following FK relationships. + # {tablename: extra_rows_archived} + extras = collections.defaultdict(int) if records: insert = shadow_table.insert(inline=True).\ from_select(columns, sql.select([table], column.in_(records))) delete = table.delete().where(column.in_(records)) + # Walk FK relationships and add insert/delete statements for rows that + # refer to this table via FK constraints. fk_inserts and fk_deletes + # will be prepended to by _get_fk_stmts if referring rows are found by + # FK constraints. + fk_inserts, fk_deletes = _get_fk_stmts( + metadata, conn, table, column, records) + # NOTE(tssurya): In order to facilitate the deletion of records from # instance_mappings, request_specs and instance_group_member tables in # the nova_api DB, the rows of deleted instances from the instances @@ -4193,9 +4280,14 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): try: # Group the insert and delete in a transaction. with conn.begin(): + for fk_insert in fk_inserts: + conn.execute(fk_insert) + for fk_delete in fk_deletes: + result_fk_delete = conn.execute(fk_delete) + extras[fk_delete.table.name] += result_fk_delete.rowcount conn.execute(insert) result_delete = conn.execute(delete) - rows_archived = result_delete.rowcount + rows_archived += result_delete.rowcount except db_exc.DBReferenceError as ex: # A foreign key constraint keeps us from deleting some of # these rows until we clean up a dependent table. Just @@ -4204,22 +4296,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before): "%(tablename)s: %(error)s", {'tablename': tablename, 'error': six.text_type(ex)}) - # NOTE(jake): instance_actions_events doesn't have a instance_uuid column - # but still needs to be archived as it is a FK constraint - if ((max_rows is None or rows_archived < max_rows) and - # NOTE(melwitt): The pci_devices table uses the 'instance_uuid' - # column to track the allocated association of a PCI device and its - # records are not tied to the lifecycles of instance records. - (tablename != 'pci_devices' and - 'instance_uuid' in columns or - tablename == 'instance_actions_events')): - instances = models.BASE.metadata.tables['instances'] - limit = max_rows - rows_archived if max_rows is not None else None - extra = _archive_if_instance_deleted(table, shadow_table, instances, - conn, limit, before) - rows_archived += extra - - return rows_archived, deleted_instance_uuids + return rows_archived, deleted_instance_uuids, extras def archive_deleted_rows(context=None, max_rows=None, before=None): @@ -4243,13 +4320,18 @@ def archive_deleted_rows(context=None, max_rows=None, before=None): - list of UUIDs of instances that were archived - total number of rows that were archived """ - table_to_rows_archived = {} + table_to_rows_archived = collections.defaultdict(int) deleted_instance_uuids = [] total_rows_archived = 0 meta = MetaData(get_engine(use_slave=True, context=context)) meta.reflect() - # Reverse sort the tables so we get the leaf nodes first for processing. - for table in reversed(meta.sorted_tables): + # Get the sorted list of tables in order of foreign key dependency. + # Process the parent tables and find their dependent records in order to + # archive the related records in a single database transactions. The goal + # is to avoid a situation where, for example, an 'instances' table record + # is missing its corresponding 'instance_extra' record due to running the + # archive_deleted_rows command with max_rows. + for table in meta.sorted_tables: tablename = table.name rows_archived = 0 # skip the special sqlalchemy-migrate migrate_version table and any @@ -4257,7 +4339,7 @@ def archive_deleted_rows(context=None, max_rows=None, before=None): if (tablename == 'migrate_version' or tablename.startswith(_SHADOW_TABLE_PREFIX)): continue - rows_archived, _deleted_instance_uuids = ( + rows_archived, _deleted_instance_uuids, extras = ( _archive_deleted_rows_for_table( meta, tablename, max_rows=max_rows - total_rows_archived, @@ -4268,6 +4350,9 @@ def archive_deleted_rows(context=None, max_rows=None, before=None): # Only report results for tables that had updates. if rows_archived: table_to_rows_archived[tablename] = rows_archived + for tablename, extra_rows_archived in extras.items(): + table_to_rows_archived[tablename] += extra_rows_archived + total_rows_archived += extra_rows_archived if total_rows_archived >= max_rows: break return table_to_rows_archived, deleted_instance_uuids, total_rows_archived diff --git a/nova/tests/functional/db/test_archive.py b/nova/tests/functional/db/test_archive.py index e4d703cd2a..7a5e55963d 100644 --- a/nova/tests/functional/db/test_archive.py +++ b/nova/tests/functional/db/test_archive.py @@ -167,13 +167,7 @@ class TestDatabaseArchive(test_servers.ServersTestBase): exceptions.append(ex) if archived == 0: break - # FIXME(melwitt): OrphanedObjectError is raised because of the bug. - self.assertTrue(exceptions) - for ex in exceptions: - self.assertEqual(500, ex.response.status_code) - self.assertIn('OrphanedObjectError', str(ex)) - # FIXME(melwitt): Uncomment when the bug is fixed. - # self.assertFalse(exceptions) + self.assertFalse(exceptions) def _get_table_counts(self): engine = sqlalchemy_api.get_engine() diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 352125b7a6..6b80f21661 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -6295,17 +6295,24 @@ class ArchiveTestCase(test.TestCase, ModelsObjectComparatorMixin): instance_actions_events=1) self._assertEqualObjects(expected, results[0]) - # Archive 1 row deleted before 2017-01-03. instance_action_events - # should be the table with row deleted due to FK contraints + # Archive 1 row deleted before 2017-01-03 + # Because the instances table will be processed first, tables that + # refer to it (instance_actions and instance_action_events) will be + # visited and archived in the same transaction as the instance, to + # avoid orphaning the instance record (archive dependent records in one + # transaction) before_date = dateutil_parser.parse('2017-01-03', fuzzy=True) results = db.archive_deleted_rows(max_rows=1, before=before_date) - expected = dict(instance_actions_events=1) + expected = dict(instances=1, + instance_actions=1, + instance_actions_events=1) self._assertEqualObjects(expected, results[0]) - # Archive all other rows deleted before 2017-01-03. This should - # delete row in instance_actions, then row in instances due to FK - # constraints + # Try to archive all other rows deleted before 2017-01-03. This should + # not archive anything because the instances table and tables that + # refer to it (instance_actions and instance_action_events) were all + # archived in the last run. results = db.archive_deleted_rows(max_rows=100, before=before_date) - expected = dict(instances=1, instance_actions=1) + expected = {} self._assertEqualObjects(expected, results[0]) # Verify we have 4 left in main @@ -6453,19 +6460,8 @@ class ArchiveTestCase(test.TestCase, ModelsObjectComparatorMixin): ins_stmt = self.migrations.insert().values(instance_uuid=instance_uuid, deleted=0) self.conn.execute(ins_stmt) - # The first try to archive instances should fail, due to FK. - num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata, - "instances", - max_rows=None, - before=None) - self.assertEqual(0, num[0]) - # Then archiving migrations should work. - num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata, - "migrations", - max_rows=None, - before=None) - self.assertEqual(1, num[0]) - # Then archiving instances should work. + # Archiving instances should result in migrations related to the + # instances also being archived. num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata, "instances", max_rows=None, |