summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2019-08-08 17:02:18 +0000
committerGerrit Code Review <review@openstack.org>2019-08-08 17:02:18 +0000
commit069bda35c112e3689e4ef8d3d3a152a1cbf1754e (patch)
treed03ca7c43f118350231ee008e69c1266848ec5f7
parent372832163d8d3e5f9ffd76215f51080d1fce68d4 (diff)
parentcbf6a46d8fcd3236bff44439c8d143a77167dfe2 (diff)
downloadnova-069bda35c112e3689e4ef8d3d3a152a1cbf1754e.tar.gz
Merge "Drop source node allocations if finish_resize fails" into stable/rocky
-rw-r--r--nova/compute/manager.py40
-rw-r--r--nova/tests/functional/regressions/test_bug_1825537.py37
-rw-r--r--nova/tests/functional/test_servers.py11
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py12
4 files changed, 82 insertions, 18 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 6a763cd00e..f69685d94e 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -4727,12 +4727,50 @@ class ComputeManager(manager.Manager):
new host machine.
"""
+ # _finish_resize sets instance.old_flavor to instance.flavor and
+ # changes instance.flavor to instance.new_flavor (if doing a resize
+ # rather than a cold migration). We save off the old_flavor here in
+ # case we need it for error handling below.
+ old_flavor = instance.flavor
try:
self._finish_resize_helper(context, disk_info, image, instance,
migration)
except Exception:
with excutils.save_and_reraise_exception():
- self._revert_allocation(context, instance, migration)
+ # At this point, resize_instance (which runs on the source) has
+ # already updated the instance host/node values to point to
+ # this (the dest) compute, so we need to leave the allocations
+ # against the dest node resource provider intact and drop the
+ # allocations against the source node resource provider. If the
+ # user tries to recover the server by hard rebooting it, it
+ # will happen on this host so that's where the allocations
+ # should go.
+ LOG.info('Deleting allocations for old flavor on source node '
+ '%s after finish_resize failure. You may be able to '
+ 'recover the instance by hard rebooting it.',
+ migration.source_compute, instance=instance)
+ # NOTE(mriedem): We can't use _delete_allocation_after_move
+ # because it relies on the resource tracker to look up the
+ # node uuid and since we are on the dest host, passing the
+ # source nodename won't work since the RT isn't tracking that
+ # node here. So we just try to remove the migration-based
+ # allocations directly and handle the case they don't exist.
+ if not self.reportclient.delete_allocation_for_instance(
+ context, migration.uuid):
+ # No migration-based allocation. Try to cleanup directly.
+ cn = objects.ComputeNode.get_by_host_and_nodename(
+ context, migration.source_compute,
+ migration.source_node)
+ if not scheduler_utils.remove_allocation_from_compute(
+ context, instance, cn.uuid, self.reportclient,
+ flavor=old_flavor):
+ LOG.error('Failed to delete allocations for old '
+ 'flavor %s against source node %s. The '
+ 'instance is now on the dest node %s. The '
+ 'allocations against the source node need '
+ 'to be manually cleaned up in Placement.',
+ old_flavor.flavorid, migration.source_node,
+ migration.dest_node, instance=instance)
def _finish_resize_helper(self, context, disk_info, image, instance,
migration):
diff --git a/nova/tests/functional/regressions/test_bug_1825537.py b/nova/tests/functional/regressions/test_bug_1825537.py
index b12f7aab80..828089692e 100644
--- a/nova/tests/functional/regressions/test_bug_1825537.py
+++ b/nova/tests/functional/regressions/test_bug_1825537.py
@@ -24,6 +24,13 @@ class FinishResizeErrorAllocationCleanupTestCase(
compute_driver = 'fake.FakeFinishMigrationFailDriver'
+ # ProviderUsageBaseTestCase uses the AllServicesCurrent fixture which
+ # means we'll use migration-based allocations by default. This flag allows
+ # us to control the logic in conductor to handle legacy allocations where
+ # the source (old flavor) and dest (new flavor) node allocations are
+ # doubled up on the instance.
+ migration_based_allocations = True
+
def setUp(self):
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
# Get the flavors we're going to use.
@@ -31,6 +38,10 @@ class FinishResizeErrorAllocationCleanupTestCase(
self.flavor1 = flavors[0]
self.flavor2 = flavors[1]
+ self.stub_out('nova.conductor.tasks.migrate.'
+ 'should_do_migration_allocation',
+ lambda *args, **kwargs: self.migration_based_allocations)
+
def _resize_and_assert_error(self, server, dest_host):
# Now resize the server and wait for it to go to ERROR status because
# the finish_migration virt driver method in host2 should fail.
@@ -67,16 +78,20 @@ class FinishResizeErrorAllocationCleanupTestCase(
# allocations should still exist with the new flavor.
source_rp_uuid = self._get_provider_uuid_by_host('host1')
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
- # FIXME(mriedem): This is bug 1825537 where the allocations are
- # reverted when finish_resize fails so the dest node resource provider
- # does not have any allocations and the instance allocations are for
- # the old flavor on the source node resource provider even though the
- # instance is not running on the source host nor pointed at the source
- # host in the DB.
- # self.assertFlavorMatchesAllocation(
- # self.flavor2, server['id'], dest_rp_uuid)
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
+ self.assertFlavorMatchesAllocation(self.flavor2, dest_rp_usages)
+ # And the source node provider should not have any usage.
+ source_rp_usages = self._get_provider_usages(source_rp_uuid)
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
- self.assertEqual(no_usage, dest_rp_usages)
- source_usages = self._get_provider_usages(source_rp_uuid)
- self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
+ self.assertEqual(no_usage, source_rp_usages)
+
+
+class FinishResizeErrorAllocationCleanupLegacyTestCase(
+ FinishResizeErrorAllocationCleanupTestCase):
+ """Variant of FinishResizeErrorAllocationCleanupTestCase which does not
+ use migration-based allocations, e.g. tests the scenario that there are
+ older computes in the deployment so the source and dest node allocations
+ are doubled up on the instance consumer record rather than the migration
+ record.
+ """
+ migration_based_allocations = False
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index 21137683c4..81d7fb385a 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -3088,10 +3088,13 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
# Ensure the allocation records still exist on the host.
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
source_usages = self._get_provider_usages(source_rp_uuid)
- # FIXME(mriedem): This is wrong for the _finish_resize case.
- # The new_flavor should have been subtracted from the doubled
- # allocation which just leaves us with the original flavor.
- self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
+ if failing_method == '_finish_resize':
+ # finish_resize will drop the old flavor allocations.
+ self.assertFlavorMatchesAllocation(self.flavor2, source_usages)
+ else:
+ # The new_flavor should have been subtracted from the doubled
+ # allocation which just leaves us with the original flavor.
+ self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
def test_resize_to_same_host_prep_resize_fails(self):
self._test_resize_to_same_host_instance_fails(
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index d1a2177311..5e51262198 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -6766,7 +6766,9 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
test_instance_fault.fake_faults['fake-uuid'][0])
yield _finish_resize
- def test_finish_resize_failure(self):
+ @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+ 'delete_allocation_for_instance')
+ def test_finish_resize_failure(self, mock_del_allocs):
self.migration.status = 'post-migrating'
with self._mock_finish_resize() as _finish_resize:
@@ -6780,10 +6782,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
# Assert that we set the migration to an error state
self.assertEqual("error", self.migration.status)
+ mock_del_allocs.assert_called_once_with(
+ self.context, self.migration.uuid)
+ @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+ 'delete_allocation_for_instance')
@mock.patch('nova.compute.manager.ComputeManager.'
'_notify_about_instance_usage')
- def test_finish_resize_notify_failure(self, notify):
+ def test_finish_resize_notify_failure(self, notify, mock_del_allocs):
self.migration.status = 'post-migrating'
with self._mock_finish_resize():
@@ -6797,6 +6803,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
# Assert that we did not set the migration to an error state
self.assertEqual('post-migrating', self.migration.status)
+ mock_del_allocs.assert_called_once_with(
+ self.context, self.migration.uuid)
@contextlib.contextmanager
def _mock_resize_instance(self):