diff options
author | Zuul <zuul@review.opendev.org> | 2023-04-26 17:01:42 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2023-04-26 17:01:42 +0000 |
commit | 66f085f946752f7f84804efa22f739a7119a6efc (patch) | |
tree | 35543b6d2c1da86c07d553d5bf09d4a4c7148768 /nova | |
parent | 316e3e0a3b24fcb0597fd3193ec4d8f6fc0abc44 (diff) | |
parent | 82deb0ce4be588d675882a28ef7cacb97d8ffd1b (diff) | |
download | nova-66f085f946752f7f84804efa22f739a7119a6efc.tar.gz |
Merge "Stop ignoring missing compute nodes in claims"
Diffstat (limited to 'nova')
-rw-r--r-- | nova/compute/resource_tracker.py | 38 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_compute_mgr.py | 43 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_resource_tracker.py | 19 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_shelve.py | 4 |
4 files changed, 66 insertions, 38 deletions
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 3f911f3708..9ee6670c17 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -146,16 +146,20 @@ class ResourceTracker(object): during the instance build. """ if self.disabled(nodename): - # instance_claim() was called before update_available_resource() - # (which ensures that a compute node exists for nodename). We - # shouldn't get here but in case we do, just set the instance's - # host and nodename attribute (probably incorrect) and return a - # NoopClaim. - # TODO(jaypipes): Remove all the disabled junk from the resource - # tracker. Servicegroup API-level active-checking belongs in the - # nova-compute manager. - self._set_instance_host_and_node(instance, nodename) - return claims.NopClaim() + # If we get here, it means we are trying to claim for an instance + # that was scheduled to a node that we do not have in our list, + # or is in some other way unmanageable by this node. This would + # mean that we are unable to account for resources, create + # allocations in placement, or do any of the other accounting + # necessary for this to work. In the past, this situation was + # effectively ignored silently, but in a world where we track + # resources with placement and instance assignment to compute nodes + # by service, we can no longer be leaky. + raise exception.ComputeResourcesUnavailable( + ('Attempt to claim resources for instance %(inst)s ' + 'on unknown node %(node)s failed') % { + 'inst': instance.uuid, + 'node': nodename}) # sanity checks: if instance.host: @@ -280,9 +284,17 @@ class ResourceTracker(object): context, instance, new_flavor, nodename, move_type) if self.disabled(nodename): - # compute_driver doesn't support resource tracking, just - # generate the migration record and continue the resize: - return claims.NopClaim(migration=migration) + # This means we were asked to accept an incoming migration to a + # node that we do not own or track. We really should not get here, + # but if we do, we must refuse to continue with the migration + # process, since we cannot account for those resources, create + # allocations in placement, etc. This has been a silent resource + # leak in the past, but it must be a hard failure now. + raise exception.ComputeResourcesUnavailable( + ('Attempt to claim move resources for instance %(inst)s on ' + 'unknown node %(node)s failed') % { + 'inst': instance.uuid, + 'node': 'nodename'}) cn = self.compute_nodes[nodename] diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 1c69cd8f1c..73c9d32197 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -2560,10 +2560,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.assertFalse(mock_get_info.called) self.assertFalse(mock_sync_power_state.called) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch('nova.compute.manager.ComputeManager.' '_sync_instance_power_state') def test_query_driver_power_state_and_sync_not_found_driver( - self, mock_sync_power_state): + self, mock_sync_power_state, mock_claim): error = exception.InstanceNotFound(instance_id=1) with mock.patch.object(self.compute.driver, 'get_info', side_effect=error) as mock_get_info: @@ -6568,6 +6569,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): fake_rt = fake_resource_tracker.FakeResourceTracker(self.compute.host, self.compute.driver) self.compute.rt = fake_rt + self.compute.driver._set_nodes([self.node]) + self.compute.rt.compute_nodes = {self.node: objects.ComputeNode()} self.allocations = { uuids.provider1: { @@ -6857,6 +6860,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock_get_arqs.assert_called_once_with( self.instance.uuid, only_resolved=True) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch.object(fake_driver.FakeDriver, 'spawn') @mock.patch('nova.objects.Instance.save') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' @@ -6868,7 +6872,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): @mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage') def test_spawn_called_with_accel_info(self, mock_ins_usage, mock_ins_create, mock_dev_tag, mock_certs, mock_req_group_map, - mock_get_allocations, mock_ins_save, mock_spawn): + mock_get_allocations, mock_ins_save, mock_spawn, mock_claim): accel_info = [{'k1': 'v1', 'k2': 'v2'}] @@ -7142,13 +7146,15 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.security_groups, self.block_device_mapping, request_spec={}, host_lists=[fake_host_list]) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch.object(manager.ComputeManager, '_shutdown_instance') @mock.patch.object(manager.ComputeManager, '_build_networks_for_instance') @mock.patch.object(fake_driver.FakeDriver, 'spawn') @mock.patch.object(objects.Instance, 'save') @mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage') def test_rescheduled_exception_with_non_ascii_exception(self, - mock_notify, mock_save, mock_spawn, mock_build, mock_shutdown): + mock_notify, mock_save, mock_spawn, mock_build, mock_shutdown, + mock_claim): exc = exception.NovaException(u's\xe9quence') mock_build.return_value = self.network_info @@ -7164,7 +7170,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.accel_uuids) mock_save.assert_has_calls([ mock.call(), - mock.call(), mock.call(expected_task_state='block_device_mapping'), ]) mock_notify.assert_has_calls([ @@ -7670,6 +7675,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.assertEqual(10, mock_failed.call_count) mock_succeeded.assert_not_called() + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch.object(manager.ComputeManager, '_shutdown_instance') @mock.patch.object(manager.ComputeManager, '_build_networks_for_instance') @mock.patch.object(fake_driver.FakeDriver, 'spawn') @@ -7677,7 +7683,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): @mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage') def _test_instance_exception(self, exc, raised_exc, mock_notify, mock_save, mock_spawn, - mock_build, mock_shutdown): + mock_build, mock_shutdown, mock_claim): """This method test the instance related InstanceNotFound and reschedule on exception errors. The test cases get from arguments. @@ -7700,7 +7706,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock_save.assert_has_calls([ mock.call(), - mock.call(), mock.call(expected_task_state='block_device_mapping')]) mock_notify.assert_has_calls([ mock.call(self.context, self.instance, 'create.start', @@ -7811,11 +7816,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): '_shutdown_instance'), mock.patch.object(self.compute, '_validate_instance_group_policy'), + mock.patch.object(self.compute.rt, 'instance_claim'), mock.patch('nova.compute.utils.notify_about_instance_create') ) as (spawn, save, _build_networks_for_instance, _notify_about_instance_usage, _shutdown_instance, _validate_instance_group_policy, - mock_notify): + mock_claim, mock_notify): self.assertRaises(exception.BuildAbortException, self.compute._build_and_run_instance, self.context, @@ -7846,7 +7852,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): save.assert_has_calls([ mock.call(), - mock.call(), mock.call( expected_task_state=task_states.BLOCK_DEVICE_MAPPING)]) @@ -7908,11 +7913,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): request_spec={}, host_lists=[fake_host_list]) mock_nil.assert_called_once_with(self.instance) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch.object(manager.ComputeManager, '_build_resources') @mock.patch.object(objects.Instance, 'save') @mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage') def test_build_resources_buildabort_reraise(self, mock_notify, mock_save, - mock_build): + mock_build, mock_claim): exc = exception.BuildAbortException( instance_uuid=self.instance.uuid, reason='') mock_build.side_effect = exc @@ -7926,7 +7932,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.node, self.limits, self.filter_properties, request_spec=[], accel_uuids=self.accel_uuids) - mock_save.assert_called_once_with() mock_notify.assert_has_calls([ mock.call(self.context, self.instance, 'create.start', extra_usage_info={'image_name': self.image.get('name')}), @@ -8581,10 +8586,11 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): ctxt, instance, req_networks) warning_mock.assert_not_called() + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch('nova.compute.utils.notify_about_instance_create') @mock.patch.object(manager.ComputeManager, '_instance_update') def test_launched_at_in_create_end_notification(self, - mock_instance_update, mock_notify_instance_create): + mock_instance_update, mock_notify_instance_create, mock_claim): def fake_notify(*args, **kwargs): if args[2] == 'create.end': @@ -8624,6 +8630,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.flags(default_access_ip_network_name='test1') instance = fake_instance.fake_db_instance() + @mock.patch.object(self.compute.rt, 'instance_claim') @mock.patch.object(db, 'instance_update_and_get_original', return_value=({}, instance)) @mock.patch.object(self.compute.driver, 'spawn') @@ -8632,7 +8639,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): @mock.patch.object(db, 'instance_extra_update_by_uuid') @mock.patch.object(self.compute, '_notify_about_instance_usage') def _check_access_ip(mock_notify, mock_extra, mock_networks, - mock_spawn, mock_db_update): + mock_spawn, mock_db_update, mock_claim): self.compute._build_and_run_instance(self.context, self.instance, self.image, self.injected_files, self.admin_pass, self.requested_networks, self.security_groups, @@ -8653,8 +8660,10 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): _check_access_ip() + @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim') @mock.patch.object(manager.ComputeManager, '_instance_update') - def test_create_error_on_instance_delete(self, mock_instance_update): + def test_create_error_on_instance_delete(self, mock_instance_update, + mock_claim): def fake_notify(*args, **kwargs): if args[2] == 'create.error': @@ -8668,7 +8677,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock.patch.object(self.compute, '_build_networks_for_instance', return_value=[]), mock.patch.object(self.instance, 'save', - side_effect=[None, None, None, exc]), + side_effect=[None, None, exc]), mock.patch.object(self.compute, '_notify_about_instance_usage', side_effect=fake_notify) ) as (mock_spawn, mock_networks, mock_save, mock_notify): @@ -8697,7 +8706,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock.patch.object( self.compute, '_build_networks_for_instance', return_value=[]), mock.patch.object(self.instance, 'save'), - ) as (mock_spawn, mock_networks, mock_save): + mock.patch.object(self.compute.rt, 'instance_claim'), + ) as (mock_spawn, mock_networks, mock_save, mock_claim): self.compute._build_and_run_instance( self.context, self.instance, self.image, self.injected_files, @@ -8747,7 +8757,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock.patch.object(self.instance, 'save'), mock.patch('nova.scheduler.client.report.' 'SchedulerReportClient._get_resource_provider'), - ) as (mock_spawn, mock_networks, mock_save, mock_get_rp): + mock.patch.object(self.compute.rt, 'instance_claim'), + ) as (mock_spawn, mock_networks, mock_save, mock_get_rp, mock_claim): mock_get_rp.return_value = { 'uuid': uuids.rp1, 'name': 'compute1:sriov-agent:ens3' diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index cd36b8987f..b6770cb5b8 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -2231,14 +2231,19 @@ class TestInstanceClaim(BaseTestCase): self.rt.compute_nodes = {} self.assertTrue(self.rt.disabled(_NODENAME)) - with mock.patch.object(self.instance, 'save'): - claim = self.rt.instance_claim(mock.sentinel.ctx, self.instance, - _NODENAME, self.allocations, None) + # Reset all changes to the instance to make sure that we can detect + # any manipulation after the failure. + self.instance.obj_reset_changes(recursive=True) - self.assertEqual(self.rt.host, self.instance.host) - self.assertEqual(self.rt.host, self.instance.launched_on) - self.assertEqual(_NODENAME, self.instance.node) - self.assertIsInstance(claim, claims.NopClaim) + with mock.patch.object(self.instance, 'save') as mock_save: + self.assertRaises(exc.ComputeResourcesUnavailable, + self.rt.instance_claim, + mock.sentinel.ctx, self.instance, + _NODENAME, self.allocations, None) + mock_save.assert_not_called() + + # Make sure the instance was not touched by the failed claim process + self.assertEqual(set(), self.instance.obj_what_changed()) @mock.patch('nova.compute.utils.is_volume_backed_instance') @mock.patch('nova.objects.MigrationList.get_in_progress_and_error') diff --git a/nova/tests/unit/compute/test_shelve.py b/nova/tests/unit/compute/test_shelve.py index 0a1e3f54fc..62321bddec 100644 --- a/nova/tests/unit/compute/test_shelve.py +++ b/nova/tests/unit/compute/test_shelve.py @@ -646,7 +646,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase): self.compute.unshelve_instance( self.context, instance, image=None, - filter_properties={}, node='fake-node', request_spec=request_spec, + filter_properties={}, node='fakenode2', request_spec=request_spec, accel_uuids=[]) mock_update_pci.assert_called_once_with( @@ -700,7 +700,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase): self.assertRaises(test.TestingException, self.compute.unshelve_instance, self.context, instance, image=shelved_image, filter_properties={}, - node='fake-node', request_spec=fake_spec, accel_uuids=[]) + node='fakenode2', request_spec=fake_spec, accel_uuids=[]) self.assertEqual(instance.image_ref, initial_image_ref) @mock.patch.object(objects.InstanceList, 'get_by_filters') |