summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Finucane <stephenfin@redhat.com>2020-08-05 14:11:59 +0100
committerLee Yarwood <lyarwood@redhat.com>2021-02-04 17:06:53 +0000
commit9f789caded517b5503e6728c3c5859fb5525ad39 (patch)
treed19a2086655bae7839cc3bdcd8cc3fffeb33bda2
parent29ee5984f88b23383d680454c5a106d9c52721bb (diff)
downloadnova-9f789caded517b5503e6728c3c5859fb5525ad39.tar.gz
tests: Add reproducer for bug #1879878
When one resizes a pinned instance, the instance claims host CPUs for pinning purposes on the destination. However, the host CPUs on the source are not immediately relinquished. Rather, they are held by the migration record, to handle the event that the resize is reverted. It is only when one confirms this resize that the old cores are finally relinquished. It appears there is a potential race between the resource tracker's periodic task and the freeing of these resources, resulting in attempts to unpin host cores that have already been unpinned. This test highlights that bug pending a fix. Changes: nova/tests/functional/libvirt/test_numa_servers.py NOTE(stephenfin): We don't yet have the '_create_server' helper or the more sensible '_wait_for_state_change' behavior on 'stable/train', so we have to revert to '_build_server' and checking for the state before the one we want. Change-Id: Ie092628ac71eb87c9dfa7220255a2953ada9e04d Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Related-Bug: #1879878 (cherry picked from commit 10f0a42de162c90c701f70c9c28dc31bfada87db) (cherry picked from commit 8ffaac493288c73badfa4f1ec6021ecb4f3137b7)
-rw-r--r--nova/tests/functional/libvirt/test_numa_servers.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py
index 1e9a78822b..d73bfc4503 100644
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@@ -20,6 +20,7 @@ import testtools
from oslo_config import cfg
from oslo_log import log as logging
+import nova
from nova.conf import neutron as neutron_conf
from nova import context as nova_context
from nova import objects
@@ -647,6 +648,91 @@ class NUMAServersTest(NUMAServersTestBase):
'usages']
self.assertEqual(expected_usage, compute_usage)
+ def test_resize_bug_1879878(self):
+ """Resize a instance with a NUMA topology when confirm takes time.
+
+ Bug 1879878 describes a race between the periodic tasks of the resource
+ tracker and the libvirt virt driver. The virt driver expects to be the
+ one doing the unpinning of instances, however, the resource tracker is
+ stepping on the virt driver's toes.
+ """
+ self.flags(
+ cpu_dedicated_set='0-3', cpu_shared_set='4-7', group='compute')
+ self.flags(vcpu_pin_set=None)
+
+ orig_confirm = nova.virt.libvirt.driver.LibvirtDriver.confirm_migration
+
+ def fake_confirm_migration(*args, **kwargs):
+ # run periodics before finally running the confirm_resize routine,
+ # simulating a race between the resource tracker and the virt
+ # driver
+ self._run_periodics()
+
+ # then inspect the ComputeNode objects for our two hosts
+ src_numa_topology = objects.NUMATopology.obj_from_db_obj(
+ objects.ComputeNode.get_by_nodename(
+ self.ctxt, src_host,
+ ).numa_topology,
+ )
+ dst_numa_topology = objects.NUMATopology.obj_from_db_obj(
+ objects.ComputeNode.get_by_nodename(
+ self.ctxt, dst_host,
+ ).numa_topology,
+ )
+ # FIXME(stephenfin): There should still be two pinned cores here
+ self.assertEqual(0, len(src_numa_topology.cells[0].pinned_cpus))
+ self.assertEqual(2, len(dst_numa_topology.cells[0].pinned_cpus))
+
+ # before continuing with the actualy confirm process
+ return orig_confirm(*args, **kwargs)
+
+ self.stub_out(
+ 'nova.virt.libvirt.driver.LibvirtDriver.confirm_migration',
+ fake_confirm_migration,
+ )
+
+ # start services
+ self.start_computes(save_rp_uuids=True)
+
+ # create server
+ flavor_a_id = self._create_flavor(
+ vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'})
+ server = self.api.post_server(
+ {'server': self._build_server(flavor_a_id)}
+ )
+ server = self._wait_for_state_change(server, 'BUILD')
+
+ src_host = server['OS-EXT-SRV-ATTR:host']
+
+ # we don't really care what the new flavor is, so long as the old
+ # flavor is using pinning. We use a similar flavor for simplicity.
+ flavor_b_id = self._create_flavor(
+ vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'})
+
+ # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
+ # probably be less...dumb
+ with mock.patch(
+ 'nova.virt.libvirt.driver.LibvirtDriver'
+ '.migrate_disk_and_power_off', return_value='{}',
+ ):
+ # TODO(stephenfin): Replace with a helper
+ post = {'resize': {'flavorRef': flavor_b_id}}
+ self.api.post_server_action(server['id'], post)
+ server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
+
+ dst_host = server['OS-EXT-SRV-ATTR:host']
+
+ # Now confirm the resize
+
+ # FIXME(stephenfin): This should be successful, but it's failing with a
+ # HTTP 500 due to bug #1879878
+ post = {'confirmResize': None}
+ exc = self.assertRaises(
+ client.OpenStackApiException,
+ self.api.post_server_action, server['id'], post)
+ self.assertEqual(500, exc.response.status_code)
+ self.assertIn('CPUUnpinningInvalid', str(exc))
+
class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest):