summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2022-09-01 18:21:45 +0000
committerGerrit Code Review <review@openstack.org>2022-09-01 18:21:45 +0000
commit82498dfe4b516bd3a8a0de58670223053370ad9b (patch)
treed943d5c223fa0f4d7e9d1b2429c63b0cf0cb69f4
parent2925fd1427315ec5ccf80c56d3325c41ab90f11f (diff)
parent9268bc36a3671f946eeb1021bb101de8fab7a438 (diff)
downloadnova-82498dfe4b516bd3a8a0de58670223053370ad9b.tar.gz
Merge "Handle PCI dev reconf with allocations"
-rw-r--r--doc/source/admin/pci-passthrough.rst16
-rw-r--r--nova/compute/pci_placement_translator.py30
-rw-r--r--nova/compute/resource_tracker.py24
-rw-r--r--nova/scheduler/client/report.py1
-rw-r--r--nova/tests/functional/libvirt/test_pci_in_placement.py222
-rw-r--r--nova/tests/unit/compute/test_pci_placement_translator.py1
-rw-r--r--nova/tests/unit/compute/test_resource_tracker.py29
7 files changed, 281 insertions, 42 deletions
diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst
index 44e5362303..3fd5f7826e 100644
--- a/doc/source/admin/pci-passthrough.rst
+++ b/doc/source/admin/pci-passthrough.rst
@@ -404,4 +404,20 @@ is upgraded. If there is an in-progress migration, then the PCI allocation on
the source host of the migration will not be healed. The placement view will be
consistent after such migration is completed or reverted.
+Reconfiguring the PCI devices on the hypervisor or changing the
+:oslo.config:option:`pci.device_spec` configuration option and restarting the
+nova-compute service is supported in the following cases:
+
+* new devices are added
+* devices without allocation is removed
+
+Removing a device that has allocations is not supported. If a device having any
+allocation is removed then the nova-compute service will keep the device and
+the allocation exists in the nova DB and in placement and logs a warning. If
+a device with any allocation is reconfigured in a way that an allocated PF is
+removed and VFs from the same PF is configured (or vice versa) then
+nova-compute will refuse to start as it would create a situation where both
+the PF and its VFs are made available for consumption.
+
+
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_
diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py
index c6ccbf27ae..c7669f5203 100644
--- a/nova/compute/pci_placement_translator.py
+++ b/nova/compute/pci_placement_translator.py
@@ -443,15 +443,33 @@ class PlacementView:
fields.PciDeviceStatus.DELETED,
fields.PciDeviceStatus.REMOVED,
):
+ # If the PCI tracker marked the device DELETED or REMOVED then
+ # such device is not allocated, so we are free to drop it from
+ # placement too.
self._remove_dev(dev)
else:
if not dev_spec:
- LOG.warning(
- "Device spec is not found for device %s in "
- "[pci]device_spec. Ignoring device in Placement resource "
- "view. This should not happen. Please file a bug.",
- dev.address
- )
+ if dev.instance_uuid:
+ LOG.warning(
+ "Device spec is not found for device %s in "
+ "[pci]device_spec. We are skipping this devices "
+ "during Placement update. The device is allocated by "
+ "%s. You should not remove an allocated device from "
+ "the configuration. Please restore the configuration "
+ "or cold migrate the instance to resolve the "
+ "inconsistency.",
+ dev.address,
+ dev.instance_uuid
+ )
+ else:
+ LOG.warning(
+ "Device spec is not found for device %s in "
+ "[pci]device_spec. Ignoring device in Placement "
+ "resource view. This should not happen. Please file a "
+ "bug.",
+ dev.address
+ )
+
return
self._add_dev(dev, dev_spec.get_tags())
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index f311b11442..ffbc7ed03f 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -1295,14 +1295,22 @@ class ResourceTracker(object):
# This merges in changes from the provider config files loaded in init
self._merge_provider_configs(self.provider_configs, prov_tree)
- # Flush any changes. If we either processed ReshapeNeeded above or
- # update_provider_tree_for_pci did reshape, then we need to pass allocs
- # to update_from_provider_tree to hit placement's POST /reshaper route.
- self.reportclient.update_from_provider_tree(
- context,
- prov_tree,
- allocations=allocs if driver_reshaped or pci_reshaped else None
- )
+ try:
+ # Flush any changes. If we either processed ReshapeNeeded above or
+ # update_provider_tree_for_pci did reshape, then we need to pass
+ # allocs to update_from_provider_tree to hit placement's POST
+ # /reshaper route.
+ self.reportclient.update_from_provider_tree(
+ context,
+ prov_tree,
+ allocations=allocs if driver_reshaped or pci_reshaped else None
+ )
+ except exception.InventoryInUse as e:
+ # This means an inventory reconfiguration (e.g.: removing a parent
+ # PF and adding a VF under that parent) was not possible due to
+ # existing allocations. Translate the exception to prevent the
+ # compute service to start
+ raise exception.PlacementPciException(error=str(e))
def _update(self, context, compute_node, startup=False):
"""Update partial stats locally and populate them to Scheduler."""
diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py
index cb04711bd0..2896a07f13 100644
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@@ -1373,7 +1373,6 @@ class SchedulerReportClient(object):
# can inherit.
helper_exceptions = (
exception.InvalidResourceClass,
- exception.InventoryInUse,
exception.ResourceProviderAggregateRetrievalFailed,
exception.ResourceProviderDeletionFailed,
exception.ResourceProviderInUse,
diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py
index 233941cd56..c61ebda1eb 100644
--- a/nova/tests/functional/libvirt/test_pci_in_placement.py
+++ b/nova/tests/functional/libvirt/test_pci_in_placement.py
@@ -36,7 +36,28 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
# Just placeholders to satisfy the base class. The real value will be
# redefined by the tests
PCI_DEVICE_SPEC = []
- PCI_ALIAS = None
+ PCI_ALIAS = [
+ jsonutils.dumps(x)
+ for x in (
+ {
+ "vendor_id": fakelibvirt.PCI_VEND_ID,
+ "product_id": fakelibvirt.PCI_PROD_ID,
+ "name": "a-pci-dev",
+ },
+ {
+ "vendor_id": fakelibvirt.PCI_VEND_ID,
+ "product_id": fakelibvirt.PF_PROD_ID,
+ "device_type": "type-PF",
+ "name": "a-pf",
+ },
+ {
+ "vendor_id": fakelibvirt.PCI_VEND_ID,
+ "product_id": fakelibvirt.VF_PROD_ID,
+ "device_type": "type-VF",
+ "name": "a-vf",
+ },
+ )
+ ]
def setUp(self):
super().setUp()
@@ -681,6 +702,179 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
},
)
+ def _create_one_compute_with_a_pf_consumed_by_an_instance(self):
+ # The fake libvirt will emulate on the host:
+ # * two type-PFs in slot 0, with one type-VF
+ pci_info = fakelibvirt.HostPCIDevicesInfo(
+ num_pci=0, num_pfs=1, num_vfs=1)
+ # we match the PF only and ignore the VF
+ device_spec = self._to_device_spec_conf(
+ [
+ {
+ "vendor_id": fakelibvirt.PCI_VEND_ID,
+ "product_id": fakelibvirt.PF_PROD_ID,
+ "address": "0000:81:00.0",
+ },
+ ]
+ )
+ self.flags(group='pci', device_spec=device_spec)
+ self.mock_pci_report_in_placement.return_value = True
+ self.start_compute(hostname="compute1", pci_info=pci_info)
+
+ self.assertPCIDeviceCounts("compute1", total=1, free=1)
+ compute1_expected_placement_view = {
+ "inventories": {
+ "0000:81:00.0": {self.PF_RC: 1},
+ },
+ "traits": {
+ "0000:81:00.0": [],
+ },
+ "usages": {
+ "0000:81:00.0": {self.PF_RC: 0},
+ },
+ "allocations": {},
+ }
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+
+ # Create an instance consuming the PF
+ extra_spec = {"pci_passthrough:alias": "a-pf:1"}
+ flavor_id = self._create_flavor(extra_spec=extra_spec)
+ server = self._create_server(flavor_id=flavor_id, networks=[])
+
+ self.assertPCIDeviceCounts("compute1", total=1, free=0)
+ compute1_expected_placement_view["usages"] = {
+ "0000:81:00.0": {self.PF_RC: 1},
+ }
+ compute1_expected_placement_view["allocations"][server["id"]] = {
+ "0000:81:00.0": {self.PF_RC: 1},
+ }
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+ self._run_periodics()
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+
+ return server, compute1_expected_placement_view
+
+ def test_device_reconfiguration_with_allocations_config_change_warn(self):
+ server, compute1_expected_placement_view = (
+ self._create_one_compute_with_a_pf_consumed_by_an_instance())
+
+ # remove 0000:81:00.0 from the device spec and restart the compute
+ device_spec = self._to_device_spec_conf([])
+ self.flags(group='pci', device_spec=device_spec)
+ # The PF is used but removed from the config. The PciTracker warns
+ # but keeps the device so the placement logic mimic this and only warns
+ # but keeps the RP and the allocation in placement intact.
+ self.restart_compute_service(hostname="compute1")
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+ self._run_periodics()
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+ # the warning from the PciTracker
+ self.assertIn(
+ "WARNING [nova.pci.manager] Unable to remove device with status "
+ "'allocated' and ownership %s because of PCI device "
+ "1:0000:81:00.0 is allocated instead of ['available', "
+ "'unavailable', 'unclaimable']. Check your [pci]device_spec "
+ "configuration to make sure this allocated device is whitelisted. "
+ "If you have removed the device from the whitelist intentionally "
+ "or the device is no longer available on the host you will need "
+ "to delete the server or migrate it to another host to silence "
+ "this warning."
+ % server['id'],
+ self.stdlog.logger.output,
+ )
+ # the warning from the placement PCI tracking logic
+ self.assertIn(
+ "WARNING [nova.compute.pci_placement_translator] Device spec is "
+ "not found for device 0000:81:00.0 in [pci]device_spec. We are "
+ "skipping this devices during Placement update. The device is "
+ "allocated by %s. You should not remove an allocated device from "
+ "the configuration. Please restore the configuration or cold "
+ "migrate the instance to resolve the inconsistency."
+ % server['id'],
+ self.stdlog.logger.output,
+ )
+
+ def test_device_reconfiguration_with_allocations_config_change_stop(self):
+ self._create_one_compute_with_a_pf_consumed_by_an_instance()
+
+ # switch 0000:81:00.0 PF to 0000:81:00.1 VF
+ # in the config, then restart the compute service
+
+ # only match the VF now
+ device_spec = self._to_device_spec_conf(
+ [
+ {
+ "vendor_id": fakelibvirt.PCI_VEND_ID,
+ "product_id": fakelibvirt.VF_PROD_ID,
+ "address": "0000:81:00.1",
+ },
+ ]
+ )
+ self.flags(group='pci', device_spec=device_spec)
+ # The compute fails to start as the new config would mean that the PF
+ # inventory is removed from the 0000:81:00.0 RP and the PF inventory is
+ # added instead there, but the VF inventory has allocations. Keeping
+ # the old inventory as in
+ # test_device_reconfiguration_with_allocations_config_change_warn is
+ # not an option as it would result in two resource class on the same RP
+ # one for the PF and one for the VF. That would allow consuming
+ # the same physical device twice. Such dependent device configuration
+ # is intentionally not supported so we are stopping the compute
+ # service.
+ ex = self.assertRaises(
+ exception.PlacementPciException,
+ self.restart_compute_service,
+ hostname="compute1"
+ )
+ self.assertRegex(
+ str(ex),
+ "Failed to gather or report PCI resources to Placement: There was "
+ "a conflict when trying to complete your request.\n\n "
+ "update conflict: Inventory for 'CUSTOM_PCI_8086_1528' on "
+ "resource provider '.*' in use.",
+ )
+
+ def test_device_reconfiguration_with_allocations_hyp_change(self):
+ server, compute1_expected_placement_view = (
+ self._create_one_compute_with_a_pf_consumed_by_an_instance())
+
+ # restart the compute but simulate that the device 0000:81:00.0 is
+ # removed from the hypervisor while the device spec config left
+ # intact. The PciTracker will notice this and log a warning. The
+ # placement tracking logic simply keeps the allocation intact in
+ # placement as both the PciDevice and the DeviceSpec is available.
+ pci_info = fakelibvirt.HostPCIDevicesInfo(
+ num_pci=0, num_pfs=0, num_vfs=0)
+ self.restart_compute_service(
+ hostname="compute1",
+ pci_info=pci_info,
+ keep_hypervisor_state=False
+ )
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+ self._run_periodics()
+ self.assert_placement_pci_view(
+ "compute1", **compute1_expected_placement_view)
+ # the warning from the PciTracker
+ self.assertIn(
+ "WARNING [nova.pci.manager] Unable to remove device with status "
+ "'allocated' and ownership %s because of PCI device "
+ "1:0000:81:00.0 is allocated instead of ['available', "
+ "'unavailable', 'unclaimable']. Check your [pci]device_spec "
+ "configuration to make sure this allocated device is whitelisted. "
+ "If you have removed the device from the whitelist intentionally "
+ "or the device is no longer available on the host you will need "
+ "to delete the server or migrate it to another host to silence "
+ "this warning."
+ % server['id'],
+ self.stdlog.logger.output,
+ )
+
def test_reporting_disabled_nothing_is_reported(self):
# The fake libvirt will emulate on the host:
# * one type-PCI in slot 0
@@ -765,32 +959,6 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
)
)
- # Pre-configure a PCI alias to consume our devs
- alias_pci = {
- "vendor_id": fakelibvirt.PCI_VEND_ID,
- "product_id": fakelibvirt.PCI_PROD_ID,
- "name": "a-pci-dev",
- }
- alias_pf = {
- "vendor_id": fakelibvirt.PCI_VEND_ID,
- "product_id": fakelibvirt.PF_PROD_ID,
- "device_type": "type-PF",
- "name": "a-pf",
- }
- alias_vf = {
- "vendor_id": fakelibvirt.PCI_VEND_ID,
- "product_id": fakelibvirt.VF_PROD_ID,
- "device_type": "type-VF",
- "name": "a-vf",
- }
- self.flags(
- group='pci',
- alias=self._to_pci_alias_conf([alias_pci, alias_pf, alias_vf]))
-
- @staticmethod
- def _to_pci_alias_conf(alias_list):
- return [jsonutils.dumps(x) for x in alias_list]
-
@staticmethod
def _move_allocation(allocations, from_uuid, to_uuid):
allocations[to_uuid] = allocations[from_uuid]
diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py
index 23fa7e1b7e..ee6a0469ac 100644
--- a/nova/tests/unit/compute/test_pci_placement_translator.py
+++ b/nova/tests/unit/compute/test_pci_placement_translator.py
@@ -50,6 +50,7 @@ class TestTranslator(test.NoDBTestCase):
pci_device.PciDevice(
address="0000:81:00.0",
status=fields.PciDeviceStatus.AVAILABLE,
+ instance_uuid=None,
)
]
)
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index d31ec97f33..a3b005b80f 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -1960,6 +1960,35 @@ class TestUpdateComputeNode(BaseTestCase):
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
+ @mock.patch(
+ 'nova.compute.resource_tracker.ResourceTracker.'
+ '_sync_compute_service_disabled_trait',
+ new=mock.Mock()
+ )
+ @mock.patch(
+ 'nova.compute.resource_tracker.ResourceTracker._resource_change',
+ new=mock.Mock(return_value=False)
+ )
+ def test_update_pci_reporting_allocation_in_use_error_propagated(self):
+ """Assert that if the pci placement reporting code tries to remove
+ inventory with allocation from placement due to invalid hypervisor
+ or [pci]device_spec reconfiguration then the InventoryInUse error from
+ placement is propagated and makes the compute startup to fail.
+ """
+ compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
+ self._setup_rt()
+ self.rt.reportclient.update_from_provider_tree.side_effect = (
+ exc.InventoryInUse(
+ resource_class="FOO", resource_provider="bar"))
+
+ self.assertRaises(
+ exc.PlacementPciException,
+ self.rt._update,
+ mock.sentinel.ctx,
+ compute_obj,
+ startup=True,
+ )
+
@mock.patch('nova.objects.Service.get_by_compute_host',
return_value=objects.Service(disabled=True))
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):