From e549fec76fd2015e6e21ee5138bf06142a71e71a Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Mon, 6 Dec 2021 16:36:41 +0100 Subject: Reproduce bug 1953359 This patch adds a functional test that reproduces a race between incoming migration and the update_available_resource periodic Fixes: - Added more memory to mock 'host_info', since the default would not fit the instance. Default was changed in later releases Change-Id: I4be429c56aaa15ee12f448978c38214e741eae63 Related-Bug: #1953359 (cherry picked from commit c59224d715a21998f40f72cf4e37efdc990e4d7e) (cherry picked from commit f0a6d946aaa6c30f826cfced75c2fb06fdb379a8) (cherry picked from commit d8859e4f95f5abb20c844d914f2716cba047630e) --- nova/tests/functional/integrated_helpers.py | 10 ++- nova/tests/functional/libvirt/test_numa_servers.py | 82 ++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index f0e7e148eb..fcbbdce9d4 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -385,7 +385,15 @@ class InstanceHelperMixin: """ # if forcing the server onto a host, we have to use the admin API if not api: - api = self.api if not az else getattr(self, 'admin_api', self.api) + api = self.api if not az and not host else getattr( + self, 'admin_api', self.api) + + if host and not api.microversion: + api.microversion = '2.74' + # with 2.74 networks param needs to use 'none' instead of None + # if no network is needed + if networks is None: + networks = 'none' body = self._build_server( name, image_uuid, flavor_id, networks, az, host) diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 144bad33c8..bc8d91e862 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -818,6 +818,88 @@ class NUMAServersTest(NUMAServersTestBase): self._assert_pinned_cpus(src_host, 2) self._assert_pinned_cpus(dst_host, 0) + def test_resize_dedicated_policy_race_on_dest_bug_1953359(self): + + self.flags(cpu_dedicated_set='0-2', cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + + host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=2, cpu_threads=1, + kB_mem=15740000) + self.start_compute(host_info=host_info, hostname='compute1') + + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(vcpu=1, extra_spec=extra_spec) + expected_usage = {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 1} + + server = self._run_build_test(flavor_id, expected_usage=expected_usage) + + inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) + self.assertEqual(1, len(inst.numa_topology.cells)) + # assert that the pcpu 0 is used on compute1 + self.assertEqual({'0': 0}, inst.numa_topology.cells[0].cpu_pinning_raw) + + # start another compute with the same config + self.start_compute(host_info=host_info, hostname='compute2') + + # boot another instance but now on compute2 so that it occupies the + # pcpu 0 on compute2 + # NOTE(gibi): _run_build_test cannot be used here as it assumes only + # compute1 exists + server2 = self._create_server( + flavor_id=flavor_id, + host='compute2', + ) + inst2 = objects.Instance.get_by_uuid(self.ctxt, server2['id']) + self.assertEqual(1, len(inst2.numa_topology.cells)) + # assert that the pcpu 0 is used + self.assertEqual( + {'0': 0}, inst2.numa_topology.cells[0].cpu_pinning_raw) + + # migrate the first instance from compute1 to compute2 but stop + # migrating at the start of finish_resize. Then start a racing periodic + # update_available_resources. + + def fake_finish_resize(*args, **kwargs): + # start a racing update_available_resource periodic + self._run_periodics() + # we expect it that CPU pinning fails on the destination node + # as the resource_tracker will use the source node numa_topology + # and that does not fit to the dest node as pcpu 0 in the dest + # is already occupied. + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}'): + with mock.patch( + 'nova.compute.manager.ComputeManager.finish_resize' + ) as mock_finish_resize: + mock_finish_resize.side_effect = fake_finish_resize + post = {'migrate': None} + self.admin_api.post_server_action(server['id'], post) + + log = self.stdlog.logger.output + # The resize_claim correctly calculates that the inst1 should be pinned + # to pcpu id 1 instead of 0 + self.assertIn( + 'Computed NUMA topology CPU pinning: usable pCPUs: [[1]], ' + 'vCPUs mapping: [(0, 1)]', + log, + ) + # But the periodic fails as it tries to apply the source topology on + # the dest. This is bug 1953359. + log = self.stdlog.logger.output + self.assertIn('Error updating resources for node compute2', log) + self.assertIn( + 'nova.exception.CPUPinningInvalid: CPU set to pin [0] must be ' + 'a subset of free CPU set [1]', + log, + ) + class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest): -- cgit v1.2.1