Add compute restart capability for libvirt func tests

The existing generic restart_compute_service() call in the nova test base class is not appropriate for the libvirt functional test that needs to reconfigure the libvirt connection as it is not aware of the libvirt specific mocking needed when a compute service is started. So this patch adds a specific restart_compute_service() call to nova.tests.functional.libvirt.base.ServersTestBase. This will be used by a later patch testing [pci]device_spec reconfiguration scenarios. This change showed that some of the existing libvirt functional test used the incomplete restart_compute_service from the base class. Others used local mocking to inject new pci config to the restart. I moved all these to the new function and removed the local mocking. Change-Id: Ic717dc42ac6b6cace59d344acaf12f9d1ee35564 (cherry picked from commit 57c253a609e859fa21ba05b264f0ba4d0ade7b8b)
author: Balazs Gibizer <gibi@redhat.com> 2022-07-20 12:03:45 +0200
committer: Sean Mooney <work@seanmooney.info> 2022-11-17 15:15:44 +0000
commit: f98858aa77e4443164fc09fae3667fb0f66edfbf (patch)
tree: afb59c1794cff8c503060ba62142dec94de87a68
parent: 69667a817cb65c3efbe4e3ada0e8c69c0a106087 (diff)
download: nova-f98858aa77e4443164fc09fae3667fb0f66edfbf.tar.gz
7 files changed, 127 insertions, 54 deletions
diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py
index b2f0095f81..68c6e294c1 100644
--- a/nova/tests/functional/libvirt/base.py
+++ b/nova/tests/functional/libvirt/base.py
@@ -114,7 +114,7 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
     def start_compute(
         self, hostname='compute1', host_info=None, pci_info=None,
         mdev_info=None, vdpa_info=None, libvirt_version=None,
-        qemu_version=None,
+        qemu_version=None, cell_name=None, connection=None
     ):
         """Start a compute service.
 
@@ -124,16 +124,35 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
         :param host_info: A fakelibvirt.HostInfo object for the host. Defaults
             to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per
             core, and 16GB of RAM.
+        :param connection: A fake libvirt connection. You should not provide it
+            directly. However it is used by restart_compute_service to
+            implement restart without loosing the hypervisor state.
         :returns: The hostname of the created service, which can be used to
             lookup the created service and UUID of the assocaited resource
             provider.
         """
+        if connection and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either an existing connection instance can be provided or a "
+                "list of parameters for a new connection"
+            )
 
         def _start_compute(hostname, host_info):
-            fake_connection = self._get_connection(
-                host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
-                qemu_version, hostname,
-            )
+            if connection:
+                fake_connection = connection
+            else:
+                fake_connection = self._get_connection(
+                    host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
+                    qemu_version, hostname,
+                )
+
             # If the compute is configured with PCI devices then we need to
             # make sure that the stubs around sysfs has the MAC address
             # information for the PCI PF devices
@@ -144,7 +163,8 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
             # actually start the service.
             orig_con = self.mock_conn.return_value
             self.mock_conn.return_value = fake_connection
-            compute = self.start_service('compute', host=hostname)
+            compute = self.start_service(
+                'compute', host=hostname, cell_name=cell_name)
             # Once that's done, we need to tweak the compute "service" to
             # make sure it returns unique objects.
             compute.driver._host.get_connection = lambda: fake_connection
@@ -165,6 +185,74 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
 
         return hostname
 
+    def restart_compute_service(
+        self,
+        hostname,
+        host_info=None,
+        pci_info=None,
+        mdev_info=None,
+        vdpa_info=None,
+        libvirt_version=None,
+        qemu_version=None,
+        keep_hypervisor_state=True,
+    ):
+        """Stops the service and starts a new one to have realistic restart
+
+        :param hostname: the hostname of the nova-compute service to be
+            restarted
+        :param keep_hypervisor_state: If True then we reuse the fake connection
+            from the existing driver. If False a new connection will be created
+            based on the other parameters provided
+        """
+        # We are intentionally not calling super() here. Nova's base test class
+        # defines starting and restarting compute service with a very
+        # different signatures and also those calls are cannot be made aware of
+        # the intricacies of the libvirt fixture. So we simply hide that
+        # implementation.
+
+        if keep_hypervisor_state and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either keep_hypervisor_state=True or a list of libvirt "
+                "parameters can be provided but not both"
+            )
+
+        compute = self.computes.pop(hostname)
+        self.compute_rp_uuids.pop(hostname)
+
+        # NOTE(gibi): The service interface cannot be used to simulate a real
+        # service restart as the manager object will not be recreated after a
+        # service.stop() and service.start() therefore the manager state will
+        # survive. For example the resource tracker will not be recreated after
+        # a stop start. The service.kill() call cannot help as it deletes
+        # the service from the DB which is unrealistic and causes that some
+        # operation that refers to the killed host (e.g. evacuate) fails.
+        # So this helper method will stop the original service and then starts
+        # a brand new compute service for the same host and node. This way
+        # a new ComputeManager instance will be created and initialized during
+        # the service startup.
+        compute.stop()
+
+        # this service was running previously, so we have to make sure that
+        # we restart it in the same cell
+        cell_name = self.host_mappings[compute.host].cell_mapping.name
+
+        old_connection = compute.manager.driver._get_connection()
+
+        self.start_compute(
+            hostname, host_info, pci_info, mdev_info, vdpa_info,
+            libvirt_version, qemu_version, cell_name,
+            old_connection if keep_hypervisor_state else None
+        )
+
+        return self.computes[hostname]
+
 
 class LibvirtMigrationMixin(object):
     """A simple mixin to facilliate successful libvirt live migrations
diff --git a/nova/tests/functional/libvirt/test_device_bus_migration.py b/nova/tests/functional/libvirt/test_device_bus_migration.py
index 82a0d4556e..3852e31c68 100644
--- a/nova/tests/functional/libvirt/test_device_bus_migration.py
+++ b/nova/tests/functional/libvirt/test_device_bus_migration.py
@@ -51,7 +51,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
 
     def _assert_stashed_image_properties_persist(self, server, properties):
         # Assert the stashed properties persist across a host reboot
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
         self._assert_stashed_image_properties(server['id'], properties)
 
         # Assert the stashed properties persist across a guest reboot
@@ -173,7 +173,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
         self.flags(pointer_model='ps2mouse')
         # Restart compute to pick up ps2 setting, which means the guest will
         # not get a prescribed pointer device
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
 
         # Create a server with default image properties
         default_image_properties1 = {
@@ -187,7 +187,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
         # Assert the defaults persist across a host flag change
         self.flags(pointer_model='usbtablet')
         # Restart compute to pick up usb setting
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
         self._assert_stashed_image_properties(
             server1['id'], default_image_properties1)
 
@@ -216,7 +216,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
         # https://bugs.launchpad.net/nova/+bug/1866106
         self.flags(pointer_model=None)
         # Restart compute to pick up None setting
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
         self._assert_stashed_image_properties(
             server1['id'], default_image_properties1)
         self._assert_stashed_image_properties(
diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py
index 2f3897d6b2..0e504d2df2 100644
--- a/nova/tests/functional/libvirt/test_numa_live_migration.py
+++ b/nova/tests/functional/libvirt/test_numa_live_migration.py
@@ -206,10 +206,8 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked
         if pin_dest:
@@ -333,10 +331,8 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
         # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
         # to end up on 2,3.
         self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')
 
         # Live migrate, RPC-pinning the destination host if asked. This is a
         # rollback test, so server_a is expected to remain on host_a.
diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py
index fd09a11e20..8fd9729404 100644
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@@ -1187,10 +1187,8 @@ class ReshapeForPCPUsTest(NUMAServersTestBase):
         self.flags(cpu_dedicated_set='0-7', group='compute')
         self.flags(vcpu_pin_set=None)
 
-        computes = {}
-        for host, compute in self.computes.items():
-            computes[host] = self.restart_compute_service(compute)
-        self.computes = computes
+        for host in list(self.computes.keys()):
+            self.restart_compute_service(host)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
index c9d277f498..49be70aa7b 100644
--- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py
+++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@@ -914,11 +914,8 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
         # Disable SRIOV capabilties in PF and delete the VFs
         self._disable_sriov_in_pf(pci_info_no_sriov)
 
-        fake_connection = self._get_connection(pci_info=pci_info_no_sriov,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute('test_compute0', pci_info=pci_info_no_sriov)
+        self.compute = self.computes['test_compute0']
 
         ctxt = context.get_admin_context()
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -930,13 +927,9 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
         self.assertEqual(1, len(pci_devices))
         self.assertEqual('type-PCI', pci_devices[0].dev_type)
 
-        # Update connection with original pci info with sriov PFs
-        fake_connection = self._get_connection(pci_info=pci_info,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        # Restart the compute service
-        self.restart_compute_service(self.compute)
+        # Restart the compute service with sriov PFs
+        self.restart_compute_service(
+            self.compute.host, pci_info=pci_info, keep_hypervisor_state=False)
 
         # Verify if PCI devices are of type type-PF or type-VF
         pci_devices = objects.PciDeviceList.get_by_compute_node(
@@ -1021,10 +1014,9 @@ class SRIOVAttachDetachTest(_PCIServersTestBase):
         host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
                                          cpu_cores=2, cpu_threads=2)
         pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1)
-        fake_connection = self._get_connection(host_info, pci_info)
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute(
+            'test_compute0', host_info=host_info, pci_info=pci_info)
+        self.compute = self.computes['test_compute0']
 
         # Create server with a port
         server = self._create_server(networks=[{'port': first_port_id}])
diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py
index 8249100111..d0102f1247 100644
--- a/nova/tests/functional/libvirt/test_reshape.py
+++ b/nova/tests/functional/libvirt/test_reshape.py
@@ -72,11 +72,11 @@ class VGPUReshapeTests(base.ServersTestBase):
         # ignore the content of the above HostMdevDeviceInfo
         self.flags(enabled_mdev_types='', group='devices')
 
-        hostname = self.start_compute(
+        self.hostname = self.start_compute(
             hostname='compute1',
             mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs),
         )
-        self.compute = self.computes[hostname]
+        self.compute = self.computes[self.hostname]
 
         # create the VGPU resource in placement manually
         compute_rp_uuid = self.placement.get(
@@ -158,7 +158,7 @@ class VGPUReshapeTests(base.ServersTestBase):
                 allocations[compute_rp_uuid]['resources'])
 
         # restart compute which will trigger a reshape
-        self.compute = self.restart_compute_service(self.compute)
+        self.compute = self.restart_compute_service(self.hostname)
 
         # verify that the inventory, usages and allocation are correct after
         # the reshape
diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py
index e111f50de0..686582120a 100644
--- a/nova/tests/functional/libvirt/test_vgpu.py
+++ b/nova/tests/functional/libvirt/test_vgpu.py
@@ -113,8 +113,8 @@ class VGPUTestBase(base.ServersTestBase):
                                                    parent=libvirt_parent)})
         return uuid
 
-    def start_compute(self, hostname):
-        hostname = super().start_compute(
+    def start_compute_with_vgpu(self, hostname):
+        hostname = self.start_compute(
             pci_info=fakelibvirt.HostPCIDevicesInfo(
                 num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             ),
@@ -197,7 +197,7 @@ class VGPUTests(VGPUTestBase):
             enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
             group='devices')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def assert_vgpu_usage_for_compute(self, compute, expected):
         self.assert_mdev_usage(compute, expected_amount=expected)
@@ -211,7 +211,7 @@ class VGPUTests(VGPUTestBase):
 
     def test_resize_servers_with_vgpu(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         server = self._create_server(
             image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
             flavor_id=self.flavor, host=self.compute1.host,
@@ -337,7 +337,7 @@ class VGPUMultipleTypesTests(VGPUTestBase):
         # Prepare traits for later on
         self._create_trait('CUSTOM_NVIDIA_11')
         self._create_trait('CUSTOM_NVIDIA_12')
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
 
     def test_create_servers_with_vgpu(self):
         self._create_server(
@@ -369,13 +369,12 @@ class VGPUMultipleTypesTests(VGPUTestBase):
 
     def test_create_servers_with_specific_type(self):
         # Regenerate the PCI addresses so both pGPUs now support nvidia-12
-        connection = self.computes[
-            self.compute1.host].driver._host.get_connection()
-        connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             multiple_gpu_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service(
+            self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False)
         pgpu1_rp_uuid = self._get_provider_uuid_by_name(
             self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
         pgpu2_rp_uuid = self._get_provider_uuid_by_name(
@@ -451,7 +450,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
                    group='mdev_nvidia-12')
         self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core')
 
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -460,7 +459,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service('host1')
 
     def test_create_servers_with_different_mdev_classes(self):
         physdev1_rp_uuid = self._get_provider_uuid_by_name(
@@ -498,7 +497,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
 
     def test_resize_servers_with_mlx5(self):
         # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
         # Regenerate the PCI addresses so they can support both mlx5 and
         # nvidia-12 types
         connection = self.computes[
@@ -507,7 +506,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
             num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
             generic_types=True)
         # Make a restart to update the Resource Providers
-        self.compute2 = self.restart_compute_service(self.compute2)
+        self.compute2 = self.restart_compute_service('host2')
 
         # Use the new flavor for booting
         server = self._create_server(
author	Balazs Gibizer <gibi@redhat.com>	2022-07-20 12:03:45 +0200
committer	Sean Mooney <work@seanmooney.info>	2022-11-17 15:15:44 +0000
commit	f98858aa77e4443164fc09fae3667fb0f66edfbf (patch)
tree	afb59c1794cff8c503060ba62142dec94de87a68
parent	69667a817cb65c3efbe4e3ada0e8c69c0a106087 (diff)
download	nova-f98858aa77e4443164fc09fae3667fb0f66edfbf.tar.gz