summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBilly Olsen <billy.olsen@gmail.com>2022-04-21 19:42:27 -0700
committerSylvain Bauza <sbauza@redhat.com>2023-04-05 12:22:27 +0000
commitc3800f7863116412e90029381085cd01ebeae220 (patch)
treec3711a8fc3e6f28c9fb0a87f5fd96ac84b710fec
parentfdd4e7dc51ad34d3bc4741e7e40a10e8f3a7d138 (diff)
downloadnova-c3800f7863116412e90029381085cd01ebeae220.tar.gz
Handle mdev devices in libvirt 7.7+
Libvirt 7.7 changed the mdev device naming to include the parent PCI device when listing node devices. The domain, however, will still only see the UUID and not see the parent PCI device. Changing the parsing to simply drop the PCI identifier is not enough as the device cannot be found when attempting to lookup the new ID. Modify the Libvirt Driver's _get_mediated_device_information to tolerate different formats of the mdev name. This first uses the legacy behavior by trying to lookup the device name that is passed in (typically mdev_<uuid> format) and if that is not found, iterates the list of mdev node devices until the right UUID is found and selects that one. Note that the lookup of the mdev device by UUID are needed in order to keep the ability to recreate assigned mediated devices on a reboot of the compute node. Additionally, the libvirt utils parsing method mdev_name2uuid, has been updated to tolerate both mdev_<uuid> and mdev_<uuid>_<pciid> formats. Closes-Bug: 1951656 Change-Id: Ifed0fa16053228990a6a8df8d4c666521db7e329 (cherry picked from commit a28b907c4f0dbba6e141a8fbea807e6cb0438977) (cherry picked from commit 98d8c9eaa3c415cc234193e6a9115db887751363) (cherry picked from commit 28053917200e3e242148672efda0e1a2b043dc48)
-rw-r--r--nova/tests/functional/regressions/test_bug_1951656.py22
-rw-r--r--nova/tests/unit/virt/libvirt/test_config.py26
-rw-r--r--nova/virt/libvirt/config.py3
-rw-r--r--nova/virt/libvirt/driver.py43
-rw-r--r--nova/virt/libvirt/host.py2
-rw-r--r--nova/virt/libvirt/utils.py28
6 files changed, 97 insertions, 27 deletions
diff --git a/nova/tests/functional/regressions/test_bug_1951656.py b/nova/tests/functional/regressions/test_bug_1951656.py
index 9aad191072..d705ff6fe3 100644
--- a/nova/tests/functional/regressions/test_bug_1951656.py
+++ b/nova/tests/functional/regressions/test_bug_1951656.py
@@ -63,21 +63,11 @@ class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase):
flavor_id=self.flavor, host=self.compute1.host,
networks='auto', expected_state='ACTIVE')
- # TODO(sbauza): Modify this once bug #1851656 is fixed.
- # mdev_name2uuid() raises a badly formed hexadecimal UUID string error
- self.assertRaises(ValueError,
- self.assert_mdev_usage,
- self.compute1, expected_amount=1)
-
- # Now, the problem is that we can't create new instances with VGPUs
- # from this host.
- server = self._create_server(
+ self.assert_mdev_usage(self.compute1, expected_amount=1)
+
+ self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, host=self.compute1.host,
- networks='auto', expected_state='ERROR')
- # The error is due to a bad mdev name parsing
- self.assertIn('fault', server)
- # since we only have one host, we have a RescheduledException as this
- # service was creating an exception and we can't use another one.
- self.assertIn('Exceeded maximum number of retries',
- server['fault']['message'])
+ networks='auto', expected_state='ACTIVE')
+
+ self.assert_mdev_usage(self.compute1, expected_amount=2)
diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py
index 2d690e5dfc..96f0d5f3e5 100644
--- a/nova/tests/unit/virt/libvirt/test_config.py
+++ b/nova/tests/unit/virt/libvirt/test_config.py
@@ -3135,6 +3135,32 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest):
config.LibvirtConfigNodeDeviceMdevInformation)
self.assertEqual("nvidia-11", obj.mdev_information.type)
self.assertEqual(12, obj.mdev_information.iommu_group)
+ self.assertIsNone(obj.mdev_information.uuid)
+
+ def test_config_mdev_device_uuid(self):
+ xmlin = """
+ <device>
+ <name>mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0</name>
+ <path>/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a</path>
+ <parent>pci_0000_41_00_0</parent>
+ <driver>
+ <name>vfio_mdev</name>
+ </driver>
+ <capability type='mdev'>
+ <type id='nvidia-442'/>
+ <uuid>b2107403-110c-45b0-af87-32cc91597b8a</uuid>
+ <iommuGroup number='57'/>
+ </capability>
+ </device>"""
+
+ obj = config.LibvirtConfigNodeDevice()
+ obj.parse_str(xmlin)
+ self.assertIsInstance(obj.mdev_information,
+ config.LibvirtConfigNodeDeviceMdevInformation)
+ self.assertEqual("nvidia-442", obj.mdev_information.type)
+ self.assertEqual(57, obj.mdev_information.iommu_group)
+ self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a",
+ obj.mdev_information.uuid)
def test_config_vdpa_device(self):
xmlin = """
diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
index de35247770..09b82b01ca 100644
--- a/nova/virt/libvirt/config.py
+++ b/nova/virt/libvirt/config.py
@@ -3289,6 +3289,7 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
root_name="capability", **kwargs)
self.type = None
self.iommu_group = None
+ self.uuid = None
def parse_dom(self, xmldoc):
super(LibvirtConfigNodeDeviceMdevInformation,
@@ -3298,6 +3299,8 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
self.type = c.get('id')
if c.tag == "iommuGroup":
self.iommu_group = int(c.get('number'))
+ if c.tag == "uuid":
+ self.uuid = c.text
class LibvirtConfigGuestRng(LibvirtConfigGuestDevice):
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index c9b40dcda5..5248fb852c 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -7790,15 +7790,52 @@ class LibvirtDriver(driver.ComputeDriver):
def _get_mediated_device_information(self, devname):
"""Returns a dict of a mediated device."""
- virtdev = self._host.device_lookup_by_name(devname)
+ # LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
+ # address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
+ # the mdevctl allowing for multiple mediated devs having the same UUID
+ # defined (only one can be active at a time). Since the guest
+ # information doesn't have the parent ID, try to lookup which
+ # mediated device is available that matches the UUID. If multiple
+ # devices are found that match the UUID, then this is an error
+ # condition.
+ try:
+ virtdev = self._host.device_lookup_by_name(devname)
+ except libvirt.libvirtError as ex:
+ if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
+ raise
+ mdevs = [dev for dev in self._host.list_mediated_devices()
+ if dev.startswith(devname)]
+ # If no matching devices are found, simply raise the original
+ # exception indicating that no devices are found.
+ if not mdevs:
+ raise
+ elif len(mdevs) > 1:
+ msg = ("The mediated device name %(devname)s refers to a UUID "
+ "that is present in multiple libvirt mediated devices. "
+ "Matching libvirt mediated devices are %(devices)s. "
+ "Mediated device UUIDs must be unique for Nova." %
+ {'devname': devname,
+ 'devices': ', '.join(mdevs)})
+ raise exception.InvalidLibvirtMdevConfig(reason=msg)
+
+ LOG.debug('Found requested device %s as %s. Using that.',
+ devname, mdevs[0])
+ virtdev = self._host.device_lookup_by_name(mdevs[0])
xmlstr = virtdev.XMLDesc(0)
cfgdev = vconfig.LibvirtConfigNodeDevice()
cfgdev.parse_str(xmlstr)
+ # Starting with Libvirt 7.3, the uuid information is available in the
+ # node device information. If its there, use that. Otherwise,
+ # fall back to the previous behavior of parsing the uuid from the
+ # devname.
+ if cfgdev.mdev_information.uuid:
+ mdev_uuid = cfgdev.mdev_information.uuid
+ else:
+ mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
device = {
"dev_id": cfgdev.name,
- # name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
- "uuid": libvirt_utils.mdev_name2uuid(cfgdev.name),
+ "uuid": mdev_uuid,
# the physical GPU PCI device
"parent": cfgdev.parent,
"type": cfgdev.mdev_information.type,
diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py
index 604e25c49c..6a10ae723f 100644
--- a/nova/virt/libvirt/host.py
+++ b/nova/virt/libvirt/host.py
@@ -1416,7 +1416,7 @@ class Host(object):
def list_mediated_devices(self, flags=0):
"""Lookup mediated devices.
- :returns: a list of virNodeDevice instance
+ :returns: a list of strings with the name of the instance
"""
return self._list_devices("mdev", flags=flags)
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index da2a6e8b8a..8716ca1736 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -577,17 +577,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]:
def mdev_name2uuid(mdev_name: str) -> str:
- """Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
- uuid (of the form 8-4-4-4-12).
+ """Convert an mdev name (of the form mdev_<uuid_with_underscores> or
+ mdev_<uuid_with_underscores>_<pciaddress>) to a uuid
+ (of the form 8-4-4-4-12).
+
+ :param mdev_name: the name of the mdev to parse the UUID from
+ :returns: string containing the uuid
"""
- return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
+ mdev_uuid = mdev_name[5:].replace('_', '-')
+ # Unconditionnally remove the PCI address from the name
+ mdev_uuid = mdev_uuid[:36]
+ return str(uuid.UUID(mdev_uuid))
+
+def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str:
+ """Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent
+ device to a name (of the form mdev_<uuid_with_underscores>[_<pciid>]).
-def mdev_uuid2name(mdev_uuid: str) -> str:
- """Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
- mdev_<uuid_with_underscores>).
+ :param mdev_uuid: the uuid of the mediated device
+ :param parent: the parent device id for the mediated device
+ :returns: name of the mdev to reference in libvirt
"""
- return "mdev_" + mdev_uuid.replace('-', '_')
+ name = "mdev_" + mdev_uuid.replace('-', '_')
+ if parent and parent.startswith('pci_'):
+ name = name + parent[4:]
+ return name
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]: