diff options
-rw-r--r-- | nova/compute/resource_tracker.py | 2 | ||||
-rw-r--r-- | nova/conf/compute.py | 2 | ||||
-rw-r--r-- | nova/conf/hyperv.py | 2 | ||||
-rw-r--r-- | nova/conf/libvirt.py | 4 | ||||
-rw-r--r-- | nova/conf/neutron.py | 2 | ||||
-rw-r--r-- | nova/conf/quota.py | 2 | ||||
-rw-r--r-- | nova/conf/scheduler.py | 4 | ||||
-rw-r--r-- | nova/network/neutron.py | 4 | ||||
-rw-r--r-- | nova/objects/migrate_data.py | 3 | ||||
-rw-r--r-- | nova/objects/pci_device.py | 61 | ||||
-rw-r--r-- | nova/pci/manager.py | 16 | ||||
-rw-r--r-- | nova/pci/stats.py | 6 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_resource_tracker.py | 2 | ||||
-rw-r--r-- | nova/tests/unit/network/test_neutron.py | 18 | ||||
-rw-r--r-- | nova/tests/unit/objects/test_migrate_data.py | 4 | ||||
-rw-r--r-- | nova/tests/unit/objects/test_pci_device.py | 10 | ||||
-rw-r--r-- | nova/tests/unit/pci/test_manager.py | 302 | ||||
-rw-r--r-- | releasenotes/notes/bug-1970383-segment-scheduling-permissions-92ba907b10a9eb1c.yaml | 7 |
18 files changed, 390 insertions, 61 deletions
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 8497bbcd89..0b801f7ddf 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1856,7 +1856,7 @@ class ResourceTracker(object): raise ValueError(_( "Provider config '%(source_file_name)s' attempts " "to define a trait that is owned by the " - "virt driver or specified via the placment api. " + "virt driver or specified via the placement api. " "Invalid traits '%(invalid)s' must be removed " "from '%(source_file_name)s'.") % { 'source_file_name': source_file_name, diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 263d777586..5abe7694f8 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -200,7 +200,7 @@ The template will be rendered using Jinja2 template engine, and receive a top-level key called ``interfaces``. This key will contain a list of dictionaries, one for each interface. -Refer to the cloudinit documentaion for more information: +Refer to the cloudinit documentation for more information: https://cloudinit.readthedocs.io/en/latest/topics/datasources.html diff --git a/nova/conf/hyperv.py b/nova/conf/hyperv.py index caa7a8702b..cce3cdc3e2 100644 --- a/nova/conf/hyperv.py +++ b/nova/conf/hyperv.py @@ -320,7 +320,7 @@ configured to claim such devices. cfg.ListOpt('iscsi_initiator_list', default=[], help=""" -List of iSCSI initiators that will be used for estabilishing iSCSI sessions. +List of iSCSI initiators that will be used for establishing iSCSI sessions. If none are specified, the Microsoft iSCSI initiator service will choose the initiator. diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index 7d9c837ba5..4ea37b8fe9 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -453,7 +453,7 @@ support built into QEMU. Prerequisite: TLS environment is configured correctly on all relevant Compute nodes. This means, Certificate Authority (CA), server, client -certificates, their corresponding keys, and their file permisssions are +certificates, their corresponding keys, and their file permissions are in place, and are validated. Notes: @@ -705,7 +705,7 @@ the source of entropy on the host. Since libvirt 1.3.4, any path (that returns random numbers when read) is accepted. The recommended source of entropy is ``/dev/urandom`` -- it is non-blocking, therefore relatively fast; and avoids the limitations of ``/dev/random``, which is -a legacy interface. For more details (and comparision between different +a legacy interface. For more details (and comparison between different RNG sources), refer to the "Usage" section in the Linux kernel API documentation for ``[u]random``: http://man7.org/linux/man-pages/man4/urandom.4.html and diff --git a/nova/conf/neutron.py b/nova/conf/neutron.py index dc391a268e..e6774ced55 100644 --- a/nova/conf/neutron.py +++ b/nova/conf/neutron.py @@ -46,7 +46,7 @@ Default name for the floating IP pool. Specifies the name of floating IP pool used for allocating floating IPs. This option is only used if Neutron does not specify the floating IP pool name in -port binding reponses. +port binding responses. """), cfg.IntOpt('extension_sync_interval', default=600, diff --git a/nova/conf/quota.py b/nova/conf/quota.py index 0d51129d50..e5b4b8dc73 100644 --- a/nova/conf/quota.py +++ b/nova/conf/quota.py @@ -147,7 +147,7 @@ Possible values: deprecated_group='DEFAULT', deprecated_name='quota_server_groups', help=""" -The maxiumum number of server groups per project. +The maximum number of server groups per project. Server groups are used to control the affinity and anti-affinity scheduling policy for a group of servers or instances. Reducing the quota will not affect diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py index 8b3b616987..03e78fe701 100644 --- a/nova/conf/scheduler.py +++ b/nova/conf/scheduler.py @@ -780,7 +780,7 @@ follows: Possible values: -* An integer or float value, where the value corresponds to the multipler +* An integer or float value, where the value corresponds to the multiplier ratio for this weigher. Related options: @@ -857,7 +857,7 @@ of any actual metric value: Possible values: -* An integer or float value, where the value corresponds to the multipler +* An integer or float value, where the value corresponds to the multiplier ratio for this weigher. Related options: diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 427fdedecd..2021bdb58f 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -3875,7 +3875,7 @@ class API: either Segment extension isn't enabled in Neutron or if the network isn't configured for routing. """ - client = get_client(context) + client = get_client(context, admin=True) if not self.has_segment_extension(client=client): return [] @@ -3906,7 +3906,7 @@ class API: extension isn't enabled in Neutron or the provided subnet doesn't have segments (if the related network isn't configured for routing) """ - client = get_client(context) + client = get_client(context, admin=True) if not self.has_segment_extension(client=client): return None diff --git a/nova/objects/migrate_data.py b/nova/objects/migrate_data.py index 06f30342e5..cf0e4bf9a3 100644 --- a/nova/objects/migrate_data.py +++ b/nova/objects/migrate_data.py @@ -279,6 +279,9 @@ class LibvirtLiveMigrateData(LiveMigrateData): if (target_version < (1, 10) and 'src_supports_numa_live_migration' in primitive): del primitive['src_supports_numa_live_migration'] + if (target_version < (1, 10) and + 'dst_supports_numa_live_migration' in primitive): + del primitive['dst_supports_numa_live_migration'] if target_version < (1, 10) and 'dst_numa_info' in primitive: del primitive['dst_numa_info'] if target_version < (1, 9) and 'vifs' in primitive: diff --git a/nova/objects/pci_device.py b/nova/objects/pci_device.py index 275d5da356..b0d5b75826 100644 --- a/nova/objects/pci_device.py +++ b/nova/objects/pci_device.py @@ -346,10 +346,40 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject): # Update PF status to CLAIMED if all of it dependants are free # and set their status to UNCLAIMABLE vfs_list = self.child_devices - if not all([vf.is_available() for vf in vfs_list]): - raise exception.PciDeviceVFInvalidStatus( - compute_node_id=self.compute_node_id, - address=self.address) + non_free_dependants = [ + vf for vf in vfs_list if not vf.is_available()] + if non_free_dependants: + # NOTE(gibi): There should not be any dependent devices that + # are UNCLAIMABLE or UNAVAILABLE as the parent is AVAILABLE, + # but we got reports in bug 1969496 that this inconsistency + # can happen. So check if the only non-free devices are in + # state UNCLAIMABLE or UNAVAILABLE then we log a warning but + # allow to claim the parent. + actual_statuses = { + child.status for child in non_free_dependants} + allowed_non_free_statues = { + fields.PciDeviceStatus.UNCLAIMABLE, + fields.PciDeviceStatus.UNAVAILABLE, + } + if actual_statuses - allowed_non_free_statues == set(): + LOG.warning( + "Some child device of parent %s is in an inconsistent " + "state. If you can reproduce this warning then please " + "report a bug at " + "https://bugs.launchpad.net/nova/+filebug with " + "reproduction steps. Inconsistent children with " + "state: %s", + self.address, + ",".join( + "%s - %s" % (child.address, child.status) + for child in non_free_dependants + ), + ) + + else: + raise exception.PciDeviceVFInvalidStatus( + compute_node_id=self.compute_node_id, + address=self.address) self._bulk_update_status(vfs_list, fields.PciDeviceStatus.UNCLAIMABLE) @@ -447,11 +477,30 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject): instance.pci_devices.objects.append(copy.copy(self)) def remove(self): - if self.status != fields.PciDeviceStatus.AVAILABLE: + # We allow removal of a device is if it is unused. It can be unused + # either by being in available state or being in a state that shows + # that the parent or child device blocks the consumption of this device + expected_states = [ + fields.PciDeviceStatus.AVAILABLE, + fields.PciDeviceStatus.UNAVAILABLE, + fields.PciDeviceStatus.UNCLAIMABLE, + ] + if self.status not in expected_states: raise exception.PciDeviceInvalidStatus( compute_node_id=self.compute_node_id, address=self.address, status=self.status, - hopestatus=[fields.PciDeviceStatus.AVAILABLE]) + hopestatus=expected_states) + # Just to be on the safe side, do not allow removal of device that has + # an owner even if the state of the device suggests that it is not + # owned. + if 'instance_uuid' in self and self.instance_uuid is not None: + raise exception.PciDeviceInvalidOwner( + compute_node_id=self.compute_node_id, + address=self.address, + owner=self.instance_uuid, + hopeowner=None, + ) + self.status = fields.PciDeviceStatus.REMOVED self.instance_uuid = None self.request_id = None diff --git a/nova/pci/manager.py b/nova/pci/manager.py index fc6a841724..78cfb05dbb 100644 --- a/nova/pci/manager.py +++ b/nova/pci/manager.py @@ -217,10 +217,13 @@ class PciDevTracker(object): # from the pci whitelist. try: existed.remove() - except exception.PciDeviceInvalidStatus as e: - LOG.warning("Unable to remove device with %(status)s " - "ownership %(instance_uuid)s because of " - "%(pci_exception)s. " + except ( + exception.PciDeviceInvalidStatus, + exception.PciDeviceInvalidOwner, + ) as e: + LOG.warning("Unable to remove device with status " + "'%(status)s' and ownership %(instance_uuid)s " + "because of %(pci_exception)s. " "Check your [pci]passthrough_whitelist " "configuration to make sure this allocated " "device is whitelisted. If you have removed " @@ -250,7 +253,10 @@ class PciDevTracker(object): else: # Note(yjiang5): no need to update stats if an assigned # device is hot removed. - self.stats.remove_device(existed) + # NOTE(gibi): only remove the device from the pools if it + # is not already removed + if existed in self.stats.get_free_devs(): + self.stats.remove_device(existed) else: # Update tracked devices. new_value: ty.Dict[str, ty.Any] diff --git a/nova/pci/stats.py b/nova/pci/stats.py index c8dda84d4b..6a53c43c78 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -279,8 +279,12 @@ class PciDeviceStats(object): if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF: vfs_list = pci_dev.child_devices if vfs_list: + free_devs = self.get_free_devs() for vf in vfs_list: - self.remove_device(vf) + # NOTE(gibi): do not try to remove a device that are + # already removed + if vf in free_devs: + self.remove_device(vf) elif pci_dev.dev_type in ( fields.PciDeviceType.SRIOV_VF, fields.PciDeviceType.VDPA, diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 36236d58de..caa12cb754 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -4069,7 +4069,7 @@ class ProviderConfigTestCases(BaseTestCase): expected = ("Provider config 'test_provider_config.yaml' attempts to " "define a trait that is owned by the virt driver or " - "specified via the placment api. Invalid traits '" + + "specified via the placement api. Invalid traits '" + ex_trait + "' must be removed from " "'test_provider_config.yaml'.") diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index f18a548723..40137cef39 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -7070,13 +7070,17 @@ class TestAPI(TestAPIBase): req_lvl_params.same_subtree, ) - def test_get_segment_ids_for_network_no_segment_ext(self): + @mock.patch.object(neutronapi, 'get_client') + def test_get_segment_ids_for_network_no_segment_ext(self, mock_client): + mocked_client = mock.create_autospec(client.Client) + mock_client.return_value = mocked_client with mock.patch.object( self.api, 'has_segment_extension', return_value=False, ): self.assertEqual( [], self.api.get_segment_ids_for_network(self.context, uuids.network_id)) + mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi, 'get_client') def test_get_segment_ids_for_network_passes(self, mock_client): @@ -7090,6 +7094,7 @@ class TestAPI(TestAPIBase): res = self.api.get_segment_ids_for_network( self.context, uuids.network_id) self.assertEqual([uuids.segment_id], res) + mock_client.assert_called_once_with(self.context, admin=True) mocked_client.list_subnets.assert_called_once_with( network_id=uuids.network_id, fields='segment_id') @@ -7105,6 +7110,7 @@ class TestAPI(TestAPIBase): res = self.api.get_segment_ids_for_network( self.context, uuids.network_id) self.assertEqual([], res) + mock_client.assert_called_once_with(self.context, admin=True) mocked_client.list_subnets.assert_called_once_with( network_id=uuids.network_id, fields='segment_id') @@ -7120,14 +7126,19 @@ class TestAPI(TestAPIBase): self.assertRaises(exception.InvalidRoutedNetworkConfiguration, self.api.get_segment_ids_for_network, self.context, uuids.network_id) + mock_client.assert_called_once_with(self.context, admin=True) - def test_get_segment_id_for_subnet_no_segment_ext(self): + @mock.patch.object(neutronapi, 'get_client') + def test_get_segment_id_for_subnet_no_segment_ext(self, mock_client): + mocked_client = mock.create_autospec(client.Client) + mock_client.return_value = mocked_client with mock.patch.object( self.api, 'has_segment_extension', return_value=False, ): self.assertIsNone( self.api.get_segment_id_for_subnet(self.context, uuids.subnet_id)) + mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi, 'get_client') def test_get_segment_id_for_subnet_passes(self, mock_client): @@ -7141,6 +7152,7 @@ class TestAPI(TestAPIBase): res = self.api.get_segment_id_for_subnet( self.context, uuids.subnet_id) self.assertEqual(uuids.segment_id, res) + mock_client.assert_called_once_with(self.context, admin=True) mocked_client.show_subnet.assert_called_once_with(uuids.subnet_id) @mock.patch.object(neutronapi, 'get_client') @@ -7155,6 +7167,7 @@ class TestAPI(TestAPIBase): self.assertIsNone( self.api.get_segment_id_for_subnet(self.context, uuids.subnet_id)) + mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi, 'get_client') def test_get_segment_id_for_subnet_fails(self, mock_client): @@ -7168,6 +7181,7 @@ class TestAPI(TestAPIBase): self.assertRaises(exception.InvalidRoutedNetworkConfiguration, self.api.get_segment_id_for_subnet, self.context, uuids.subnet_id) + mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi.LOG, 'debug') def test_get_port_pci_dev(self, mock_debug): diff --git a/nova/tests/unit/objects/test_migrate_data.py b/nova/tests/unit/objects/test_migrate_data.py index bc04c5bd13..7f587c6906 100644 --- a/nova/tests/unit/objects/test_migrate_data.py +++ b/nova/tests/unit/objects/test_migrate_data.py @@ -94,8 +94,8 @@ class _TestLibvirtLiveMigrateData(object): target_connect_addr='127.0.0.1', dst_wants_file_backed_memory=False, file_backed_memory_discard=False, - src_supports_numa_live_migraton=True, - dst_supports_numa_live_migraton=True, + src_supports_numa_live_migration=True, + dst_supports_numa_live_migration=True, dst_numa_info=migrate_data.LibvirtLiveMigrateNUMAInfo()) manifest = ovo_base.obj_tree_get_versions(obj.obj_name()) diff --git a/nova/tests/unit/objects/test_pci_device.py b/nova/tests/unit/objects/test_pci_device.py index 4087b89800..91ec566c32 100644 --- a/nova/tests/unit/objects/test_pci_device.py +++ b/nova/tests/unit/objects/test_pci_device.py @@ -467,6 +467,16 @@ class _TestPciDeviceObject(object): devobj.claim(self.inst.uuid) self.assertRaises(exception.PciDeviceInvalidStatus, devobj.remove) + def test_remove_device_fail_owned_with_unavailable_state(self): + # This test creates an PCI device in an invalid state. This should + # not happen in any known scenario. But we want to be save not to allow + # removing a device that has an owner. See bug 1969496 for more details + self._create_fake_instance() + devobj = pci_device.PciDevice.create(None, dev_dict) + devobj.claim(self.inst.uuid) + devobj.status = fields.PciDeviceStatus.UNAVAILABLE + self.assertRaises(exception.PciDeviceInvalidOwner, devobj.remove) + class TestPciDeviceObject(test_objects._LocalTest, _TestPciDeviceObject): diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py index 39d0b116bb..e9e4a45590 100644 --- a/nova/tests/unit/pci/test_manager.py +++ b/nova/tests/unit/pci/test_manager.py @@ -21,6 +21,7 @@ from oslo_utils.fixture import uuidsentinel from nova.compute import vm_states from nova import context +from nova import exception from nova import objects from nova.objects import fields from nova.pci import manager @@ -42,6 +43,8 @@ fake_pci_1 = dict(fake_pci, address='0000:00:00.2', product_id='p1', vendor_id='v1') fake_pci_2 = dict(fake_pci, address='0000:00:00.3') +fake_pci_devs = [fake_pci, fake_pci_1, fake_pci_2] + fake_pci_3 = dict(fake_pci, address='0000:00:01.1', dev_type=fields.PciDeviceType.SRIOV_PF, vendor_id='v2', product_id='p2', numa_node=None) @@ -53,6 +56,7 @@ fake_pci_5 = dict(fake_pci, address='0000:00:02.2', dev_type=fields.PciDeviceType.SRIOV_VF, parent_addr='0000:00:01.1', vendor_id='v2', product_id='p2', numa_node=None) +fake_pci_devs_tree = [fake_pci_3, fake_pci_4, fake_pci_5] fake_db_dev = { 'created_at': None, @@ -142,14 +146,14 @@ class PciDevTrackerTestCase(test.NoDBTestCase): requests=pci_reqs) def _create_tracker(self, fake_devs): - self.fake_devs = fake_devs + self.fake_devs = copy.deepcopy(fake_devs) self.tracker = manager.PciDevTracker( self.fake_context, objects.ComputeNode(id=1, numa_topology=None)) def setUp(self): super(PciDevTrackerTestCase, self).setUp() self.fake_context = context.get_admin_context() - self.fake_devs = fake_db_devs[:] + self.fake_devs = copy.deepcopy(fake_db_devs) self.stub_out('nova.db.main.api.pci_device_get_all_by_node', self._fake_get_pci_devices) # The fake_pci_whitelist must be called before creating the fake @@ -157,7 +161,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase): patcher = pci_fakes.fake_pci_whitelist() self.addCleanup(patcher.stop) self._create_fake_instance() - self._create_tracker(fake_db_devs[:]) + self._create_tracker(fake_db_devs) def test_pcidev_tracker_create(self): self.assertEqual(len(self.tracker.pci_devs), 3) @@ -266,9 +270,8 @@ class PciDevTrackerTestCase(test.NoDBTestCase): def test_set_hvdev_new_dev(self): fake_pci_3 = dict(fake_pci, address='0000:00:00.4', vendor_id='v2') - fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_1), - copy.deepcopy(fake_pci_2), copy.deepcopy(fake_pci_3)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci, fake_pci_1, fake_pci_2, fake_pci_3] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.assertEqual(len(self.tracker.pci_devs), 4) self.assertEqual(set([dev.address for dev in self.tracker.pci_devs]), @@ -284,11 +287,8 @@ class PciDevTrackerTestCase(test.NoDBTestCase): self._create_tracker(fake_db_devs_tree) fake_new_device = dict(fake_pci_5, id=12, address='0000:00:02.3') - fake_pci_devs = [copy.deepcopy(fake_pci_3), - copy.deepcopy(fake_pci_4), - copy.deepcopy(fake_pci_5), - copy.deepcopy(fake_new_device)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci_3, fake_pci_4, fake_pci_5, fake_new_device] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.assertEqual(len(self.tracker.pci_devs), 4) pf = [dev for dev in self.tracker.pci_devs @@ -304,15 +304,14 @@ class PciDevTrackerTestCase(test.NoDBTestCase): def test_set_hvdev_changed(self): fake_pci_v2 = dict(fake_pci, address='0000:00:00.2', vendor_id='v1') - fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_2), - copy.deepcopy(fake_pci_v2)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci, fake_pci_2, fake_pci_v2] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.assertEqual(set([dev.vendor_id for dev in self.tracker.pci_devs]), set(['v', 'v1'])) def test_set_hvdev_remove(self): - self.tracker._set_hvdevs([fake_pci]) + self.tracker._set_hvdevs(copy.deepcopy([fake_pci])) self.assertEqual( len([dev for dev in self.tracker.pci_devs if dev.status == fields.PciDeviceStatus.REMOVED]), @@ -324,8 +323,8 @@ class PciDevTrackerTestCase(test.NoDBTestCase): # from previous scans) self._create_tracker(fake_db_devs_tree) - fake_pci_devs = [copy.deepcopy(fake_pci_3), copy.deepcopy(fake_pci_4)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci_3, fake_pci_4] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.assertEqual( 2, len([dev for dev in self.tracker.pci_devs @@ -344,8 +343,9 @@ class PciDevTrackerTestCase(test.NoDBTestCase): # Make sure the device tree is properly maintained when there are # devices removed from the system that are allocated to vms. - all_devs = fake_db_devs_tree[:] - self._create_tracker(all_devs) + all_db_devs = fake_db_devs_tree + all_pci_devs = fake_pci_devs_tree + self._create_tracker(all_db_devs) # we start with 3 devices self.assertEqual( 3, @@ -358,7 +358,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase): claimed_dev = self.tracker.claim_instance( mock.sentinel.context, pci_requests_obj, None)[0] - self.tracker._set_hvdevs(all_devs) + self.tracker._set_hvdevs(copy.deepcopy(all_pci_devs)) # and assert that no devices were removed self.assertEqual( 0, @@ -366,10 +366,10 @@ class PciDevTrackerTestCase(test.NoDBTestCase): if dev.status == fields.PciDeviceStatus.REMOVED])) # we then try to remove the allocated device from the set reported # by the driver. - fake_pci_devs = [dev for dev in all_devs + fake_pci_devs = [dev for dev in all_pci_devs if dev['address'] != claimed_dev.address] with mock.patch("nova.pci.manager.LOG.warning") as log: - self.tracker._set_hvdevs(fake_pci_devs) + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) log.assert_called_once() args = log.call_args_list[0][0] # args of first call self.assertIn('Unable to remove device with', args[0]) @@ -380,7 +380,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase): if dev.status == fields.PciDeviceStatus.REMOVED])) # free the device that was allocated and update tracker again self.tracker._free_device(claimed_dev) - self.tracker._set_hvdevs(fake_pci_devs) + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) # and assert that one device is removed from the tracker self.assertEqual( 1, @@ -393,12 +393,249 @@ class PciDevTrackerTestCase(test.NoDBTestCase): self.tracker.claim_instance(mock.sentinel.context, pci_requests_obj, None) fake_pci_3 = dict(fake_pci, address='0000:00:00.2', vendor_id='v2') - fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_2), - copy.deepcopy(fake_pci_3)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci, fake_pci_2, fake_pci_3] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.assertEqual(len(self.tracker.stale), 1) self.assertEqual(self.tracker.stale['0000:00:00.2']['vendor_id'], 'v2') + def _get_device_by_address(self, address): + devs = [dev for dev in self.tracker.pci_devs if dev.address == address] + if len(devs) == 1: + return devs[0] + if devs: + raise ValueError('ambiguous address', devs) + else: + raise ValueError('device not found', address) + + def test_set_hvdevs_unavailable_vf_removed(self): + # We start with a PF parent and two VF children + self._create_tracker([fake_db_dev_3, fake_db_dev_4, fake_db_dev_5]) + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # then claim and allocate the PF that makes the VFs unavailable + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + dev3_pf = self._get_device_by_address(fake_db_dev_3['address']) + self.assertEqual('allocated', dev3_pf.status) + self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid) + dev4_vf = self._get_device_by_address(fake_db_dev_4['address']) + self.assertEqual('unavailable', dev4_vf.status) + dev5_vf = self._get_device_by_address(fake_db_dev_5['address']) + self.assertEqual('unavailable', dev5_vf.status) + + # now simulate that one VF (dev_5) is removed from the hypervisor and + # the compute is restarted. As the VF is not claimed or allocated we + # are free to remove it from the tracker. + self.tracker._set_hvdevs(copy.deepcopy([fake_pci_3, fake_pci_4])) + + dev3_pf = self._get_device_by_address(fake_db_dev_3['address']) + self.assertEqual('allocated', dev3_pf.status) + self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid) + dev4_vf = self._get_device_by_address(fake_db_dev_4['address']) + self.assertEqual('unavailable', dev4_vf.status) + dev5_vf = self._get_device_by_address(fake_db_dev_5['address']) + self.assertEqual('removed', dev5_vf.status) + + def test_set_hvdevs_unavailable_pf_removed(self): + # We start with one PF parent and one child VF + self._create_tracker([fake_db_dev_3, fake_db_dev_4]) + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_VF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # Then we claim and allocate the VF that makes the PF unavailable + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + dev3_pf = self._get_device_by_address(fake_db_dev_3['address']) + self.assertEqual('unavailable', dev3_pf.status) + dev4_vf = self._get_device_by_address(fake_db_dev_4['address']) + self.assertEqual('allocated', dev4_vf.status) + self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid) + + # now simulate that the parent PF is removed from the hypervisor and + # the compute is restarted. As the PF is not claimed or allocated we + # are free to remove it from the tracker. + self.tracker._set_hvdevs(copy.deepcopy([fake_pci_4])) + + dev3_pf = self._get_device_by_address(fake_db_dev_3['address']) + self.assertEqual('removed', dev3_pf.status) + dev4_vf = self._get_device_by_address(fake_db_dev_4['address']) + self.assertEqual('allocated', dev4_vf.status) + self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid) + + def test_claim_available_pf_while_child_vf_is_unavailable(self): + # NOTE(gibi): this is bug 1969496. The state created here is + # inconsistent and should not happen. But it did happen in some cases + # where we were not able to track down the way how it happened. + + # We start with a PF parent and a VF child. The PF is available and + # the VF is unavailable. + pf = copy.deepcopy(fake_db_dev_3) + vf = copy.deepcopy(fake_db_dev_4) + vf['status'] = fields.PciDeviceStatus.UNAVAILABLE + self._create_tracker([pf, vf]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('unavailable', vf_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. It should work as it is + # available + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('allocated', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('unavailable', vf_dev.status) + + self.assertIn( + 'Some child device of parent 0000:00:01.1 is in an inconsistent ' + 'state. If you can reproduce this warning then please report a ' + 'bug at https://bugs.launchpad.net/nova/+filebug with ' + 'reproduction steps. Inconsistent children with state: ' + '0000:00:02.1 - unavailable', + self.stdlog.logger.output + ) + + # Ensure that the claim actually fixes the inconsistency so when the + # parent if freed the children become available too. + self.tracker.free_instance( + mock.sentinel.context, {'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('available', vf_dev.status) + + def test_claim_available_pf_while_children_vfs_are_in_mixed_state(self): + # We start with a PF parent and two VF children. The PF is available + # and one of the VF is unavailable while the other is available. + pf = copy.deepcopy(fake_db_dev_3) + vf1 = copy.deepcopy(fake_db_dev_4) + vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE + vf2 = copy.deepcopy(fake_db_dev_5) + vf2['status'] = fields.PciDeviceStatus.AVAILABLE + self._create_tracker([pf, vf1, vf2]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('available', vf2_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. It should work as it is + # available + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('allocated', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('unavailable', vf2_dev.status) + + self.assertIn( + 'Some child device of parent 0000:00:01.1 is in an inconsistent ' + 'state. If you can reproduce this warning then please report a ' + 'bug at https://bugs.launchpad.net/nova/+filebug with ' + 'reproduction steps. Inconsistent children with state: ' + '0000:00:02.1 - unavailable', + self.stdlog.logger.output + ) + + # Ensure that the claim actually fixes the inconsistency so when the + # parent if freed the children become available too. + self.tracker.free_instance( + mock.sentinel.context, {'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('available', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('available', vf2_dev.status) + + def test_claim_available_pf_while_a_child_is_used(self): + pf = copy.deepcopy(fake_db_dev_3) + vf1 = copy.deepcopy(fake_db_dev_4) + vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE + vf2 = copy.deepcopy(fake_db_dev_5) + vf2['status'] = fields.PciDeviceStatus.CLAIMED + self._create_tracker([pf, vf1, vf2]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('claimed', vf2_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. The claim should fail as on of + # the child is used. + self.assertRaises( + exception.PciDeviceVFInvalidStatus, + self.tracker.claim_instance, + mock.sentinel.context, + pci_requests_obj, + None, + ) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('claimed', vf2_dev.status) + def test_update_pci_for_instance_active(self): pci_requests_obj = self._create_pci_requests_object(fake_pci_requests) self.tracker.claim_instance(mock.sentinel.context, @@ -424,13 +661,13 @@ class PciDevTrackerTestCase(test.NoDBTestCase): self.assertIsNone(devs) def test_pci_claim_instance_with_numa(self): - fake_db_dev_3 = dict(fake_db_dev_1, id=4, address='0000:00:00.4') - fake_devs_numa = copy.deepcopy(fake_db_devs) - fake_devs_numa.append(fake_db_dev_3) + fake_pci_3 = dict(fake_pci_1, address='0000:00:00.4') + fake_devs_numa = copy.deepcopy(fake_pci_devs) + fake_devs_numa.append(fake_pci_3) self.tracker = manager.PciDevTracker( mock.sentinel.context, objects.ComputeNode(id=1, numa_topology=None)) - self.tracker._set_hvdevs(fake_devs_numa) + self.tracker._set_hvdevs(copy.deepcopy(fake_devs_numa)) pci_requests = copy.deepcopy(fake_pci_requests)[:1] pci_requests[0]['count'] = 2 pci_requests_obj = self._create_pci_requests_object(pci_requests) @@ -477,9 +714,8 @@ class PciDevTrackerTestCase(test.NoDBTestCase): 'nova.db.main.api.pci_device_update', self._fake_pci_device_update) fake_pci_v3 = dict(fake_pci, address='0000:00:00.2', vendor_id='v3') - fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_2), - copy.deepcopy(fake_pci_v3)] - self.tracker._set_hvdevs(fake_pci_devs) + fake_pci_devs = [fake_pci, fake_pci_2, fake_pci_v3] + self.tracker._set_hvdevs(copy.deepcopy(fake_pci_devs)) self.update_called = 0 self.tracker.save(self.fake_context) self.assertEqual(self.update_called, 3) diff --git a/releasenotes/notes/bug-1970383-segment-scheduling-permissions-92ba907b10a9eb1c.yaml b/releasenotes/notes/bug-1970383-segment-scheduling-permissions-92ba907b10a9eb1c.yaml new file mode 100644 index 0000000000..88495079e7 --- /dev/null +++ b/releasenotes/notes/bug-1970383-segment-scheduling-permissions-92ba907b10a9eb1c.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + `Bug #1970383 <https://bugs.launchpad.net/nova/+bug/1970383>`_: Fixes a + permissions error when using the + 'query_placement_for_routed_network_aggregates' scheduler variable, which + caused a traceback on instance creation for non-admin users. |