summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Mooney <work@seanmooney.info>2021-05-13 12:48:21 +0100
committerAmit Uniyal <auniyal@redhat.com>2022-11-28 17:51:23 +0000
commit3885f983c358e5a5f0b10f603633193ac335a45f (patch)
treecb6d1e12607d156e09393df5c8798b32665fe122
parent5e955b62fa63b72816369a21af283a2b64f4af27 (diff)
downloadnova-3885f983c358e5a5f0b10f603633193ac335a45f.tar.gz
[compute] always set instance.host in post_livemigration
This change add a new _post_live_migration_update_host function that wraps _post_live_migration and just ensures that if we exit due to an exception instance.host is set to the destination host. when we are in _post_live_migration the guest has already started running on the destination host and we cannot revert. Sometimes admins or users will hard reboot the instance expecting that to fix everything when the vm enters the error state after the failed migrations. Previously this would end up recreating the instance on the source node leading to possible data corruption if the instance used shared storage. Change-Id: Ibc4bc7edf1c8d1e841c72c9188a0a62836e9f153 Partial-Bug: #1628606 (cherry picked from commit 8449b7caefa4a5c0728e11380a088525f15ad6f5) (cherry picked from commit 643b0c7d35752b214eee19b8d7298a19a8493f6b) (cherry picked from commit 17ae907569e45cc0f5c7da9511bb668a877b7b2e) (cherry picked from commit 15502ddedc23e6591ace4e73fa8ce5b18b5644b0) (cherry picked from commit 43c0e40d288960760a6eaad05cb9670e01ef40d0) (cherry picked from commit 0ac64bba8b7aba2fb358e00e970e88b32d26ef7e)
-rw-r--r--nova/compute/manager.py43
-rw-r--r--nova/tests/functional/regressions/test_bug_1628606.py5
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py21
3 files changed, 63 insertions, 6 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 5421bc62e5..27e76963ef 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -8229,8 +8229,9 @@ class ComputeManager(manager.Manager):
# host attachment. We fetch BDMs before that to retain connection_info
# and attachment_id relating to the source host for post migration
# cleanup.
- post_live_migration = functools.partial(self._post_live_migration,
- source_bdms=source_bdms)
+ post_live_migration = functools.partial(
+ self._post_live_migration_update_host, source_bdms=source_bdms
+ )
rollback_live_migration = functools.partial(
self._rollback_live_migration, source_bdms=source_bdms)
@@ -8479,6 +8480,42 @@ class ComputeManager(manager.Manager):
bdm.attachment_id, self.host,
six.text_type(e), instance=instance)
+ # TODO(sean-k-mooney): add typing
+ def _post_live_migration_update_host(
+ self, ctxt, instance, dest, block_migration=False,
+ migrate_data=None, source_bdms=None
+ ):
+ try:
+ self._post_live_migration(
+ ctxt, instance, dest, block_migration, migrate_data,
+ source_bdms)
+ except Exception:
+ # Restore the instance object
+ node_name = None
+ try:
+ # get node name of compute, where instance will be
+ # running after migration, that is destination host
+ compute_node = self._get_compute_info(ctxt, dest)
+ node_name = compute_node.hypervisor_hostname
+ except exception.ComputeHostNotFound:
+ LOG.exception('Failed to get compute_info for %s', dest)
+
+ # we can never rollback from post live migration and we can only
+ # get here if the instance is running on the dest so we ensure
+ # the instance.host is set correctly and reraise the original
+ # exception unmodified.
+ if instance.host != dest:
+ # apply saves the new fields while drop actually removes the
+ # migration context from the instance, so migration persists.
+ instance.apply_migration_context()
+ instance.drop_migration_context()
+ instance.host = dest
+ instance.task_state = None
+ instance.node = node_name
+ instance.progress = 0
+ instance.save()
+ raise
+
@wrap_exception()
@wrap_instance_fault
def _post_live_migration(self, ctxt, instance, dest,
@@ -8490,7 +8527,7 @@ class ComputeManager(manager.Manager):
and mainly updating database record.
:param ctxt: security context
- :param instance: instance dict
+ :param instance: instance object
:param dest: destination host
:param block_migration: if true, prepare for block migration
:param migrate_data: if not None, it is a dict which has data
diff --git a/nova/tests/functional/regressions/test_bug_1628606.py b/nova/tests/functional/regressions/test_bug_1628606.py
index 22bb219c1b..c922279b86 100644
--- a/nova/tests/functional/regressions/test_bug_1628606.py
+++ b/nova/tests/functional/regressions/test_bug_1628606.py
@@ -57,6 +57,5 @@ class PostLiveMigrationFail(
server = self._live_migrate(
server, migration_expected_state='error',
server_expected_state='ERROR')
- # FIXME(amit): this should point to the dest as after migration
- # but does not because of bug 1628606
- self.assertEqual(self.src.host, server['OS-EXT-SRV-ATTR:host'])
+
+ self.assertEqual(self.dest.host, server['OS-EXT-SRV-ATTR:host'])
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 7d777f7725..728f433084 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -9741,6 +9741,27 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.instance,
migration)
+ def test_post_live_migration_update_host(self):
+ @mock.patch.object(self.compute, '_get_compute_info')
+ def _test_post_live_migration(_get_compute_info):
+ dest_host = 'dest'
+ cn = objects.ComputeNode(hypervisor_hostname=dest_host)
+ _get_compute_info.return_value = cn
+ instance = fake_instance.fake_instance_obj(self.context,
+ node='src',
+ uuid=uuids.instance)
+ with mock.patch.object(self.compute, "_post_live_migration"
+ ) as plm, mock.patch.object(instance, "save") as save:
+ error = ValueError("some failure")
+ plm.side_effect = error
+ self.assertRaises(
+ ValueError, self.compute._post_live_migration_update_host,
+ self.context, instance, dest_host)
+ save.assert_called_once()
+ self.assertEqual(instance.host, dest_host)
+
+ _test_post_live_migration()
+
def test_post_live_migration_cinder_pre_344_api(self):
# Because live migration has
# succeeded,_post_live_migration_remove_source_vol_connections()