summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuyaoZhong <luyao.zhong@intel.com>2020-09-09 03:35:41 +0000
committerLuyaoZhong <luyao.zhong@intel.com>2020-09-10 05:30:39 +0000
commit255b3f2f918843ca5dd9b99e109ecd2189b6b749 (patch)
tree6d42aaf19405a933f0cbf438972cd631d6e713f6
parentb0c529a73d1856cec18184ca500bb095dff6b5c0 (diff)
downloadnova-255b3f2f918843ca5dd9b99e109ecd2189b6b749.tar.gz
Track error migrations in resource tracker
If rollback_live_migration failed, the migration status is set to 'error', and there might me some resource not be cleaned up like vpmem since rollback is not completed. So we propose to track those 'error' migrations in resource tracker until they are cleaned up by periodic task '_cleanup_incomplete_migrations'. So if rollback_live_migration succeeds, we need to set the migration status to 'failed' which will not be tracked in resource tracker. The 'failed' status is already used for resize to indicated a migration finishing the cleanup. '_cleanup_incomplete_migrations' will also handle failed rollback_live_migration cleanup except for failed resize/revert-resize. Besides, we introduce a new 'cleanup_lingering_instance_resources' virt driver interface to handle lingering instance resources cleanup including vpmem cleanup and whatever we add in the future. Change-Id: I422a907056543f9bf95acbffdd2658438febf801 Partially-Implements: blueprint vpmem-enhancement
-rw-r--r--nova/compute/manager.py58
-rw-r--r--nova/compute/resource_tracker.py11
-rw-r--r--nova/db/api.py8
-rw-r--r--nova/db/sqlalchemy/api.py18
-rw-r--r--nova/objects/instance.py9
-rw-r--r--nova/objects/migration.py12
-rw-r--r--nova/tests/functional/libvirt/test_numa_live_migration.py4
-rw-r--r--nova/tests/functional/regressions/test_bug_1889108.py2
-rw-r--r--nova/tests/functional/test_servers.py6
-rw-r--r--nova/tests/unit/compute/test_compute.py4
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py9
-rw-r--r--nova/tests/unit/compute/test_resource_tracker.py154
-rw-r--r--nova/tests/unit/objects/test_instance.py7
-rw-r--r--nova/tests/unit/objects/test_objects.py2
-rw-r--r--nova/virt/driver.py11
-rw-r--r--nova/virt/libvirt/driver.py7
16 files changed, 236 insertions, 86 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 4c782862da..be30dd36a6 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -8716,7 +8716,7 @@ class ComputeManager(manager.Manager):
@wrap_instance_fault
def _rollback_live_migration(self, context, instance,
dest, migrate_data=None,
- migration_status='error',
+ migration_status='failed',
source_bdms=None):
"""Recovers Instance/volume state from migrating -> running.
@@ -8857,9 +8857,8 @@ class ComputeManager(manager.Manager):
phase=fields.NotificationPhase.END,
bdms=bdms)
- # TODO(luyao): set migration status to 'failed' but not 'error'
- # which means rollback_live_migration is done, we have successfully
- # cleaned up and returned instance back to normal status.
+ # NOTE(luyao): we have cleanup everything and get instance
+ # back to normal status, now set migration status to 'failed'
self._set_migration_status(migration, migration_status)
@wrap_exception()
@@ -10377,11 +10376,14 @@ class ComputeManager(manager.Manager):
@periodic_task.periodic_task(spacing=CONF.instance_delete_interval)
def _cleanup_incomplete_migrations(self, context):
- """Delete instance files on failed resize/revert-resize operation
-
- During resize/revert-resize operation, if that instance gets deleted
- in-between then instance files might remain either on source or
- destination compute node because of race condition.
+ """Cleanup on failed resize/revert-resize operation and
+ failed rollback live migration operation.
+
+ During resize/revert-resize operation, or after a failed rollback
+ live migration operation, if that instance gets deleted then instance
+ files might remain either on source or destination compute node and
+ other specific resources might not be cleaned up because of the race
+ condition.
"""
LOG.debug('Cleaning up deleted instances with incomplete migration ')
migration_filters = {'host': CONF.host,
@@ -10403,21 +10405,29 @@ class ComputeManager(manager.Manager):
context, inst_filters, expected_attrs=attrs, use_slave=True)
for instance in instances:
- if instance.host != CONF.host:
- for migration in migrations:
- if instance.uuid == migration.instance_uuid:
- # Delete instance files if not cleanup properly either
- # from the source or destination compute nodes when
- # the instance is deleted during resizing.
- self.driver.delete_instance_files(instance)
- try:
- migration.status = 'failed'
- migration.save()
- except exception.MigrationNotFound:
- LOG.warning("Migration %s is not found.",
- migration.id,
- instance=instance)
- break
+ if instance.host == CONF.host:
+ continue
+ for migration in migrations:
+ if instance.uuid != migration.instance_uuid:
+ continue
+ self.driver.delete_instance_files(instance)
+ # we are not sure whether the migration_context is applied
+ # during incompleted migrating, we need to apply/revert
+ # migration_context to get instance object content matching
+ # current host.
+ revert = (True if migration.source_compute == CONF.host
+ else False)
+ with instance.mutated_migration_context(revert=revert):
+ self.driver.cleanup_lingering_instance_resources(instance)
+
+ try:
+ migration.status = 'failed'
+ migration.save()
+ except exception.MigrationNotFound:
+ LOG.warning("Migration %s is not found.",
+ migration.id,
+ instance=instance)
+ break
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
exception.QemuGuestAgentNotEnabled,
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 1074879738..484c509a4b 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -897,9 +897,9 @@ class ResourceTracker(object):
instance_by_uuid = self._update_usage_from_instances(
context, instances, nodename)
- # Grab all in-progress migrations:
- migrations = objects.MigrationList.get_in_progress_by_host_and_node(
- context, self.host, nodename)
+ # Grab all in-progress migrations and error migrations:
+ migrations = objects.MigrationList.get_in_progress_and_error(
+ context, self.host, nodename)
self._pair_instances_to_migrations(migrations, instance_by_uuid)
self._update_usage_from_migrations(context, migrations, nodename)
@@ -1338,6 +1338,11 @@ class ResourceTracker(object):
try:
if uuid not in instances:
+ # Track migrating instance even if it is deleted but still
+ # has database record. This kind of instance might be
+ # deleted during unfinished migrating but exist in the
+ # hypervisor.
+ migration._context = context.elevated(read_deleted='yes')
instances[uuid] = migration.instance
except exception.InstanceNotFound as e:
# migration referencing deleted instance
diff --git a/nova/db/api.py b/nova/db/api.py
index b9bdc7e845..14578c1efd 100644
--- a/nova/db/api.py
+++ b/nova/db/api.py
@@ -470,6 +470,14 @@ def migration_get_by_sort_filters(context, sort_keys, sort_dirs, values):
values)
+def migration_get_in_progress_and_error_by_host_and_node(context, host, node):
+ """Finds all in progress migrations and error migrations for the given
+ host + node.
+ """
+ return IMPL.migration_get_in_progress_and_error_by_host_and_node(
+ context, host, node)
+
+
####################
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index a7f500dc90..973622a8f2 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -3204,6 +3204,10 @@ def migration_get_all_by_filters(context, filters,
elif "source_compute" in filters:
host = filters['source_compute']
query = query.filter(models.Migration.source_compute == host)
+ if "node" in filters:
+ node = filters['node']
+ query = query.filter(or_(models.Migration.source_node == node,
+ models.Migration.dest_node == node))
if "migration_type" in filters:
migtype = filters["migration_type"]
query = query.filter(models.Migration.migration_type == migtype)
@@ -3272,6 +3276,20 @@ def migration_migrate_to_uuid(context, count):
return done, done
+@pick_context_manager_reader
+def migration_get_in_progress_and_error_by_host_and_node(context, host, node):
+ return model_query(context, models.Migration).\
+ filter(or_(and_(models.Migration.source_compute == host,
+ models.Migration.source_node == node),
+ and_(models.Migration.dest_compute == host,
+ models.Migration.dest_node == node))).\
+ filter(~models.Migration.status.in_(['confirmed', 'reverted',
+ 'failed', 'completed',
+ 'cancelled', 'done'])).\
+ options(_joinedload_all('instance.system_metadata')).\
+ all()
+
+
########################
# User-provided metadata
diff --git a/nova/objects/instance.py b/nova/objects/instance.py
index 9821d57d77..69d69e6e5f 100644
--- a/nova/objects/instance.py
+++ b/nova/objects/instance.py
@@ -1047,8 +1047,8 @@ class Instance(base.NovaPersistentObject, base.NovaObject,
setattr(self, inst_attr_name, attr_value)
@contextlib.contextmanager
- def mutated_migration_context(self):
- """Context manager to temporarily apply the migration context.
+ def mutated_migration_context(self, revert=False):
+ """Context manager to temporarily apply/revert the migration context.
Calling .save() from within the context manager means that the mutated
context will be saved which can cause incorrect resource tracking, and
@@ -1063,7 +1063,10 @@ class Instance(base.NovaPersistentObject, base.NovaObject,
current_values = {}
for attr_name in _MIGRATION_CONTEXT_ATTRS:
current_values[attr_name] = getattr(self, attr_name)
- self.apply_migration_context()
+ if revert:
+ self.revert_migration_context()
+ else:
+ self.apply_migration_context()
try:
yield
finally:
diff --git a/nova/objects/migration.py b/nova/objects/migration.py
index 5c185f45e2..9a195086cc 100644
--- a/nova/objects/migration.py
+++ b/nova/objects/migration.py
@@ -222,7 +222,9 @@ class MigrationList(base.ObjectListBase, base.NovaObject):
# for an instance.
# Version 1.4: Added sort_keys, sort_dirs, limit, marker kwargs to
# get_by_filters for migrations pagination support.
- VERSION = '1.4'
+ # Version 1.5: Added a new function to get in progress migrations
+ # and error migrations for a given host + node.
+ VERSION = '1.5'
fields = {
'objects': fields.ListOfObjectsField('Migration'),
@@ -266,3 +268,11 @@ class MigrationList(base.ObjectListBase, base.NovaObject):
context, instance_uuid, migration_type)
return base.obj_make_list(context, cls(context), objects.Migration,
db_migrations)
+
+ @base.remotable_classmethod
+ def get_in_progress_and_error(cls, context, host, node):
+ db_migrations = \
+ db.migration_get_in_progress_and_error_by_host_and_node(
+ context, host, node)
+ return base.obj_make_list(context, cls(context), objects.Migration,
+ db_migrations)
diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py
index 427ba0edbe..428fd5feb1 100644
--- a/nova/tests/functional/libvirt/test_numa_live_migration.py
+++ b/nova/tests/functional/libvirt/test_numa_live_migration.py
@@ -351,7 +351,7 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
# rollback test, so server_a is expected to remain on host_a.
if pin_dest:
self._rpc_pin_host('host_b')
- self._live_migrate(self.server_a, 'error')
+ self._live_migrate(self.server_a, 'failed')
self.assertEqual('host_a', self.get_host(self.server_a['id']))
self.assertIsNone(self._get_migration_context(self.server_a['id']))
@@ -442,7 +442,7 @@ class NUMALiveMigrationLegacyBase(NUMALiveMigrationPositiveBase):
['completed'])
else:
final_host = 'source'
- self._wait_for_migration_status(self.migrating_server, ['error'])
+ self._wait_for_migration_status(self.migrating_server, ['failed'])
self.assertEqual(final_host,
self.get_host(self.migrating_server['id']))
self.assertTrue(self.migrate_stub_ran)
diff --git a/nova/tests/functional/regressions/test_bug_1889108.py b/nova/tests/functional/regressions/test_bug_1889108.py
index 6c6d1a86fd..0e847e81ab 100644
--- a/nova/tests/functional/regressions/test_bug_1889108.py
+++ b/nova/tests/functional/regressions/test_bug_1889108.py
@@ -82,7 +82,7 @@ class TestVolAttachmentsDuringPreLiveMigration(
# Migrate the instance and wait until the migration errors out thanks
# to our mocked version of pre_live_migration raising
# test.TestingException
- self._live_migrate(server, 'error')
+ self._live_migrate(server, 'failed')
# Assert that we called the fake pre_live_migration method
mock_plm.assert_called_once()
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index 01e702b718..dd84547308 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -2870,8 +2870,10 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
}
self.api.post_server_action(server['id'], post)
# The compute manager will put the migration record into error status
- # when pre_live_migration fails, so wait for that to happen.
- migration = self._wait_for_migration_status(server, ['error'])
+ # when start _cleanup_pre_live_migration, then set it to failed status
+ # at the end of _rollback_live_migration
+ migration = self._wait_for_migration_status(
+ server, ['error', 'failed'])
# The _rollback_live_migration method in the compute manager will reset
# the task_state on the instance, so wait for that to happen.
server = self._wait_for_server_parameter(
diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py
index e16ebcd973..510734c7d5 100644
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@@ -6251,7 +6251,7 @@ class ComputeTestCase(BaseTestCase,
self.assertEqual('src_host', instance.host)
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
self.assertIsNone(instance.task_state)
- self.assertEqual('error', migration.status)
+ self.assertEqual('failed', migration.status)
mock_get_disk.assert_called()
mock_pre.assert_called_once_with(c,
instance, True, mock_get_disk.return_value, dest_host,
@@ -6651,7 +6651,7 @@ class ComputeTestCase(BaseTestCase,
_test()
- self.assertEqual(migration_status or 'error', migration.status)
+ self.assertEqual(migration_status or 'failed', migration.status)
self.assertEqual(0, instance.progress)
def test_rollback_live_migration_set_migration_status(self):
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 35f544f690..babdbb3edc 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -2463,13 +2463,14 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_delete.assert_has_calls([mock.call(mock.ANY),
mock.call(mock.ANY)])
+ @mock.patch.object(objects.Instance, 'mutated_migration_context')
@mock.patch.object(objects.Migration, 'save')
@mock.patch.object(objects.MigrationList, 'get_by_filters')
@mock.patch.object(objects.InstanceList, 'get_by_filters')
def _test_cleanup_incomplete_migrations(self, inst_host,
mock_inst_get_by_filters,
mock_migration_get_by_filters,
- mock_save):
+ mock_save, mock_mutated_mgr_ctxt):
def fake_inst(context, uuid, host):
inst = objects.Instance(context)
inst.uuid = uuid
@@ -2498,7 +2499,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_migration_get_by_filters.return_value = fake_migrations
mock_inst_get_by_filters.return_value = fake_instances
- with mock.patch.object(self.compute.driver, 'delete_instance_files'):
+ with test.nested(
+ mock.patch.object(self.compute.driver, 'delete_instance_files'),
+ mock.patch.object(self.compute.driver,
+ 'cleanup_lingering_instance_resources')
+ ) as (mock_delete_files, mock_cleanup_resources):
self.compute._cleanup_incomplete_migrations(self.context)
# Ensure that migration status is set to 'failed' after instance
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 62bf0b8ead..09c7e9b598 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -585,7 +585,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_disabled(self, get_mock, migr_mock, get_cn_mock, pci_mock,
instance_pci_mock):
@@ -618,7 +618,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_no_instances_no_migrations_no_reserved(self, get_mock, migr_mock,
get_cn_mock, pci_mock,
@@ -670,7 +670,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_remove_deleted_instances_allocations')
@@ -699,7 +699,7 @@ class TestUpdateAvailableResources(BaseTestCase):
return_value=objects.PciDeviceList())
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_init_compute_node', return_value=True)
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_remove_deleted_instances_allocations')
@@ -729,7 +729,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_no_instances_no_migrations_reserved_disk_ram_and_cpu(
self, get_mock, migr_mock, get_cn_mock, pci_mock,
@@ -771,7 +771,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_some_instances_no_migrations(self, get_mock, migr_mock,
get_cn_mock, pci_mock,
@@ -820,7 +820,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_orphaned_instances_no_migrations(self, get_mock, migr_mock,
get_cn_mock, pci_mock,
@@ -886,7 +886,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_no_instances_source_migration(self, get_mock, get_inst_mock,
@@ -952,7 +952,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_no_instances_dest_migration(self, get_mock, get_inst_mock,
@@ -1015,7 +1015,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_no_instances_dest_evacuation(self, get_mock, get_inst_mock,
@@ -1076,7 +1076,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
return_value=None)
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_some_instances_source_and_dest_migration(self, get_mock,
@@ -1141,13 +1141,76 @@ class TestUpdateAvailableResources(BaseTestCase):
update_mock.assert_called_once()
@mock.patch('nova.compute.utils.is_volume_backed_instance',
+ return_value=False)
+ @mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
+ return_value=objects.InstancePCIRequests(requests=[]))
+ @mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
+ return_value=objects.PciDeviceList())
+ @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
+ @mock.patch('nova.objects.Instance.get_by_uuid')
+ @mock.patch('nova.objects.InstanceList.get_by_host_and_node')
+ def test_no_instances_err_migration(
+ self, get_mock, get_inst_mock, migr_mock, get_cn_mock, pci_mock,
+ instance_pci_mock, mock_is_volume_backed_instance
+ ):
+ # We test the behavior of update_available_resource() when
+ # there is an error migration that involves this compute node
+ # as the destination host not the source host, and the resource
+ # tracker does not yet have any instances assigned to it. This is
+ # the case when a migration to this compute host from another host
+ # is in error, the resources claimed on destination host might not
+ # be cleaned up, so the resource tracker must reserve the resources
+ # for the error migration.
+
+ # Setup virt resources to match used resources to number
+ # of defined instances on the hypervisor
+ virt_resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES)
+ virt_resources.update(
+ vcpus_used=2, memory_mb_used=256, local_gb_used=5)
+ self._setup_rt(virt_resources=virt_resources)
+
+ get_mock.return_value = []
+ migr_obj = _MIGRATION_FIXTURES['dest-only']
+ migr_obj.status = 'error'
+ # in-process and error migrations
+ migr_mock.return_value = [migr_obj]
+ inst_uuid = migr_obj.instance_uuid
+ instance = _MIGRATION_INSTANCE_FIXTURES[inst_uuid].obj_clone()
+ get_inst_mock.return_value = instance
+ get_cn_mock.return_value = _COMPUTE_NODE_FIXTURES[0]
+ instance.migration_context = _MIGRATION_CONTEXT_FIXTURES[inst_uuid]
+
+ update_mock = self._update_available_resources()
+
+ get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
+ expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
+ vals = {
+ 'free_disk_gb': 1,
+ 'local_gb': 6,
+ 'free_ram_mb': 256, # 512 total - 256 for possible confirm of new
+ 'memory_mb_used': 256, # 256 possible confirmed amount
+ 'vcpus_used': 2,
+ 'local_gb_used': 5,
+ 'memory_mb': 512,
+ 'current_workload': 0,
+ 'vcpus': 4,
+ 'running_vms': 0
+ }
+ _update_compute_node(expected_resources, **vals)
+ actual_resources = update_mock.call_args[0][1]
+ self.assertTrue(
+ obj_base.obj_equal_prims(expected_resources, actual_resources))
+ update_mock.assert_called_once()
+
+ @mock.patch('nova.compute.utils.is_volume_backed_instance',
new=mock.Mock(return_value=False))
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
new=mock.Mock(return_value=objects.PciDeviceList()))
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
new=mock.Mock(return_value=None))
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_populate_assigned_resources(self, mock_get_instances,
@@ -1199,7 +1262,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
new=mock.Mock(return_value=None))
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_check_resources_startup_success(self, mock_get_instances,
@@ -1240,7 +1303,7 @@ class TestUpdateAvailableResources(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
new=mock.Mock(return_value=objects.PciDeviceList()))
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
def test_check_resources_startup_fail(self, mock_get_instances,
mock_get_migrations,
@@ -1949,7 +2012,7 @@ class TestInstanceClaim(BaseTestCase):
self.assertIsInstance(claim, claims.NopClaim)
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
def test_update_usage_with_claim(self, migr_mock, check_bfv_mock):
# Test that RT.update_usage() only changes the compute node
# resources if there has been a claim first.
@@ -1989,7 +2052,7 @@ class TestInstanceClaim(BaseTestCase):
self.assertTrue(obj_base.obj_equal_prims(expected, cn))
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
def test_update_usage_removed(self, migr_mock, check_bfv_mock):
# Test that RT.update_usage() removes the instance when update is
# called in a removed state
@@ -2065,7 +2128,7 @@ class TestInstanceClaim(BaseTestCase):
self.assertEqual(0, len(self.rt.assigned_resources[cn.uuid][rc]))
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
def test_claim(self, migr_mock, check_bfv_mock):
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
check_bfv_mock.return_value = False
@@ -2105,7 +2168,7 @@ class TestInstanceClaim(BaseTestCase):
@mock.patch('nova.compute.utils.is_volume_backed_instance')
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests',
return_value=True)
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
def test_claim_with_pci(self, migr_mock, pci_stats_mock,
check_bfv_mock):
# Test that a claim involving PCI requests correctly claims
@@ -2238,7 +2301,7 @@ class TestInstanceClaim(BaseTestCase):
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait', new=mock.Mock())
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.ComputeNode.save')
def test_claim_abort_context_manager(self, save_mock, migr_mock,
check_bfv_mock):
@@ -2282,7 +2345,7 @@ class TestInstanceClaim(BaseTestCase):
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait', new=mock.Mock())
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.ComputeNode.save')
def test_claim_abort(self, save_mock, migr_mock, check_bfv_mock):
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
@@ -2324,7 +2387,7 @@ class TestInstanceClaim(BaseTestCase):
self.assertEqual(0, cn.running_vms)
@mock.patch('nova.compute.utils.is_volume_backed_instance')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.ComputeNode.save')
def test_claim_numa(self, save_mock, migr_mock, check_bfv_mock):
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
@@ -2360,7 +2423,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_same_host(self, save_mock, get_mock, migr_mock,
@@ -2465,7 +2528,7 @@ class TestResize(BaseTestCase):
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
return_value=_COMPUTE_NODE_FIXTURES[0])
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node',
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error',
return_value=[])
@mock.patch('nova.objects.InstanceList.get_by_host_and_node',
return_value=[])
@@ -2473,7 +2536,7 @@ class TestResize(BaseTestCase):
def _test_instance_build_resize(self,
save_mock,
get_by_host_and_node_mock,
- get_in_progress_by_host_and_node_mock,
+ get_in_progress_and_error_mock,
get_by_host_and_nodename_mock,
pci_get_by_compute_node_mock,
pci_get_by_instance_mock,
@@ -2634,7 +2697,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.pci.request.get_pci_requests_from_flavor')
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node')
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_dest_host_with_pci(self, save_mock, get_mock,
@@ -2790,7 +2853,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_two_instances(self, save_mock, get_mock, migr_mock,
@@ -2922,7 +2985,7 @@ class TestRebuild(BaseTestCase):
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
return_value=objects.PciDeviceList())
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
- @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_rebuild_claim(self, save_mock, get_mock, migr_mock, get_cn_mock,
@@ -3095,13 +3158,13 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
instance_uuid='some_uuid',
)
self._setup_rt()
- self.rt._update_usage_from_migrations(mock.sentinel.ctx, [migration],
+ ctx = mock.MagicMock()
+ self.rt._update_usage_from_migrations(ctx, [migration],
_NODENAME)
- mock_get_instance.assert_called_once_with(mock.sentinel.ctx,
- 'some_uuid',
- expected_attrs=[
- 'migration_context',
- 'flavor'])
+ mock_get_instance.assert_called_once_with(
+ ctx.elevated.return_value, 'some_uuid',
+ expected_attrs=['migration_context', 'flavor'])
+ ctx.elevated.assert_called_once_with(read_deleted='yes')
self.assertFalse(mock_update_usage.called)
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
@@ -3149,17 +3212,16 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
mig_list = objects.MigrationList(objects=migrations)
instance.migration_context = objects.MigrationContext(
migration_id=mig2.id)
- self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
- _NODENAME)
- upd_mock.assert_called_once_with(mock.sentinel.ctx, instance, mig2,
- _NODENAME)
+ ctxt = mock.MagicMock()
+ self.rt._update_usage_from_migrations(ctxt, mig_list, _NODENAME)
+ upd_mock.assert_called_once_with(ctxt, instance, mig2, _NODENAME)
upd_mock.reset_mock()
mig2.updated_at = None
instance.migration_context.migration_id = mig1.id
- self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
+ self.rt._update_usage_from_migrations(ctxt, mig_list,
_NODENAME)
- upd_mock.assert_called_once_with(mock.sentinel.ctx, instance, mig1,
+ upd_mock.assert_called_once_with(ctxt, instance, mig1,
_NODENAME)
@mock.patch('nova.objects.migration.Migration.save')
@@ -3188,8 +3250,8 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
id=1,
instance=instance)
mig_list = objects.MigrationList(objects=[mig1])
- self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
- _NODENAME)
+ ctxt = mock.MagicMock()
+ self.rt._update_usage_from_migrations(ctxt, mig_list, _NODENAME)
self.assertFalse(upd_mock.called)
self.assertEqual(mig1.status, "error")
@@ -3208,10 +3270,11 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
instance_uuid=uuids.instance, id=1, instance=instance)
for state in task_states.rebuild_states + task_states.resizing_states:
instance.task_state = state
+ ctxt = mock.MagicMock()
self.rt._update_usage_from_migrations(
- mock.sentinel.ctx, [migration], _NODENAME)
+ ctxt, [migration], _NODENAME)
mock_update_usage.assert_called_once_with(
- mock.sentinel.ctx, instance, migration, _NODENAME)
+ ctxt, instance, migration, _NODENAME)
mock_update_usage.reset_mock()
@mock.patch('nova.objects.migration.Migration.save')
@@ -3228,10 +3291,11 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
dest_compute=_HOSTNAME, dest_node=_NODENAME,
instance_uuid=uuids.instance, id=1, instance=instance,
migration_type='live-migration')
+ ctxt = mock.MagicMock()
self.rt._update_usage_from_migrations(
- mock.sentinel.ctx, [migration], _NODENAME)
+ ctxt, [migration], _NODENAME)
mock_update_usage.assert_called_once_with(
- mock.sentinel.ctx, instance, migration, _NODENAME)
+ ctxt, instance, migration, _NODENAME)
class TestUpdateUsageFromInstance(BaseTestCase):
diff --git a/nova/tests/unit/objects/test_instance.py b/nova/tests/unit/objects/test_instance.py
index a97f0cfb14..1ea97ef07a 100644
--- a/nova/tests/unit/objects/test_instance.py
+++ b/nova/tests/unit/objects/test_instance.py
@@ -1441,6 +1441,13 @@ class _TestInstanceObject(object):
getattr(inst.migration_context, 'new_' + attr_name))
self.assertIs(inst_value, migration_context_value)
+ with inst.mutated_migration_context(revert=True):
+ for attr_name in instance._MIGRATION_CONTEXT_ATTRS:
+ inst_value = getattr(inst, attr_name)
+ migration_context_value = (
+ getattr(inst.migration_context, 'old_' + attr_name))
+ self.assertIs(inst_value, migration_context_value)
+
for attr_name in instance._MIGRATION_CONTEXT_ATTRS:
inst_value = getattr(inst, attr_name)
self.assertIs(expected_objs[attr_name], inst_value)
diff --git a/nova/tests/unit/objects/test_objects.py b/nova/tests/unit/objects/test_objects.py
index 60eb3fc08a..14818c204c 100644
--- a/nova/tests/unit/objects/test_objects.py
+++ b/nova/tests/unit/objects/test_objects.py
@@ -1105,7 +1105,7 @@ object_data = {
'MemoryDiagnostics': '1.0-2c995ae0f2223bb0f8e523c5cc0b83da',
'Migration': '1.7-bd45b232fd7c95cd79ae9187e10ef582',
'MigrationContext': '1.2-89f10a83999f852a489962ae37d8a026',
- 'MigrationList': '1.4-983a9c29d4f1e747ce719dc9063b729b',
+ 'MigrationList': '1.5-36793f8d65bae421bd5564d09a4de7be',
'MonitorMetric': '1.1-53b1db7c4ae2c531db79761e7acc52ba',
'MonitorMetricList': '1.1-15ecf022a68ddbb8c2a6739cfc9f8f5e',
'NUMACell': '1.4-7695303e820fa855d76954be2eb2680e',
diff --git a/nova/virt/driver.py b/nova/virt/driver.py
index 05d2dabf80..2863054a5b 100644
--- a/nova/virt/driver.py
+++ b/nova/virt/driver.py
@@ -1856,6 +1856,17 @@ class ComputeDriver(object):
"""
return False
+ def cleanup_lingering_instance_resources(self, instance):
+ """Cleanup resources occupied by lingering instance.
+
+ For example, cleanup other specific resources or whatever we
+ add in the future.
+
+ :param instance: nova.objects.instance.Instance
+ :returns: True if the cleanup is successful, else false.
+ """
+ return True
+
def load_compute_driver(virtapi, compute_driver=None):
"""Load a compute driver module.
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 2a4a4be5ec..b6ebf7e07c 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -1482,6 +1482,13 @@ class LibvirtDriver(driver.ComputeDriver):
self._undefine_domain(instance)
+ def cleanup_lingering_instance_resources(self, instance):
+ # zero the data on backend pmem device, if fails
+ # it will raise an exception
+ vpmems = self._get_vpmems(instance)
+ if vpmems:
+ self._cleanup_vpmems(vpmems)
+
def _cleanup_vpmems(self, vpmems):
for vpmem in vpmems:
try: