diff options
author | Zuul <zuul@review.opendev.org> | 2022-02-11 15:22:27 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2022-02-11 15:22:27 +0000 |
commit | 9b3d69c18525c4308ffd3dbb619c6ed8789eb9f0 (patch) | |
tree | d965f40b24d56c549235797a4e8c9595247eaa35 | |
parent | 69fafb93fc0f2fe81b9c124eed0929fe33c4e7b2 (diff) | |
parent | c531fdcc192afb5af628ac567cb0ff8aa3eab052 (diff) | |
download | nova-9b3d69c18525c4308ffd3dbb619c6ed8789eb9f0.tar.gz |
Merge "Add a WA flag waiting for vif-plugged event during reboot" into stable/victoria
-rw-r--r-- | .zuul.yaml | 6 | ||||
-rw-r--r-- | nova/conf/workarounds.py | 53 | ||||
-rw-r--r-- | nova/tests/unit/virt/libvirt/test_driver.py | 43 | ||||
-rw-r--r-- | nova/virt/libvirt/driver.py | 23 | ||||
-rw-r--r-- | releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml | 18 |
5 files changed, 141 insertions, 2 deletions
diff --git a/.zuul.yaml b/.zuul.yaml index c00865e504..aa371db06c 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -191,6 +191,12 @@ # reduce the number of placement calls in steady state. Added in # Stein. resource_provider_association_refresh: 0 + workarounds: + # This wa is an improvement on hard reboot that cannot be turned + # on unconditionally. But we know that ml2/ovs sends plug time + # events so we can enable this in this ovs job for vnic_type + # normal + wait_for_vif_plugged_event_during_hard_reboot: normal $NOVA_CONF: quota: # Added in Train. diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 8eadc0b6ec..4e64d87578 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -346,6 +346,59 @@ Related options: * :oslo.config:option:`image_cache.subdirectory_name` * :oslo.config:option:`update_resources_interval` """), + cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot', + item_type=cfg.types.String( + choices=[ + "normal", + "direct", + "macvtap", + "baremetal", + "direct-physical", + "virtio-forwarder", + "smart-nic", + ]), + default=[], + help=""" +The libvirt virt driver implements power on and hard reboot by tearing down +every vif of the instance being rebooted then plug them again. By default nova +does not wait for network-vif-plugged event from neutron before it lets the +instance run. This can cause the instance to requests the IP via DHCP before +the neutron backend has a chance to set up the networking backend after the vif +plug. + +This flag defines which vifs nova expects network-vif-plugged events from +during hard reboot. The possible values are neutron port vnic types: + +* normal +* direct +* macvtap +* baremetal +* direct-physical +* virtio-forwarder +* smart-nic + +Adding a ``vnic_type`` to this configuration makes Nova wait for a +network-vif-plugged event for each of the instance's vifs having the specific +``vnic_type`` before unpausing the instance, similarly to how new instance +creation works. + +Please note that not all neutron networking backends send plug time events, for +certain ``vnic_type`` therefore this config is empty by default. + +The ml2/ovs and the networking-odl backends are known to send plug time events +for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this +config if you are using only those backends in the compute host. + +The neutron in-tree SRIOV backend does not reliably send network-vif-plugged +event during plug time for ports with ``direct`` vnic_type and never sends +that event for port with ``direct-physical`` vnic_type during plug time. For +other ``vnic_type`` and backend pairs, please consult the developers of the +backend. + +Related options: + +* :oslo.config:option:`DEFAULT.vif_plugging_timeout` +"""), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 3a93eaa2a6..7576a6a60d 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -16288,7 +16288,48 @@ class LibvirtConnTestCase(test.NoDBTestCase, accel_info=accel_info) mock_create_guest_with_network.assert_called_once_with(self.context, dummyxml, instance, network_info, block_device_info, - vifs_already_plugged=True) + vifs_already_plugged=True, external_events=[]) + + @mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock()) + @mock.patch('nova.virt.libvirt.LibvirtDriver.get_info') + @mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network') + @mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml') + @mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock()) + @mock.patch( + 'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices', + new=mock.Mock(return_value={})) + def test_hard_reboot_wait_for_plug( + self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info + ): + self.flags( + group="workarounds", + wait_for_vif_plugged_event_during_hard_reboot=["normal"]) + self.context.auth_token = None + instance = objects.Instance(**self.test_instance) + network_info = _fake_network_info(self, num_networks=4) + network_info[0]["vnic_type"] = "normal" + network_info[1]["vnic_type"] = "direct" + network_info[2]["vnic_type"] = "normal" + network_info[3]["vnic_type"] = "direct-physical" + block_device_info = None + return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN), + hardware.InstanceInfo(state=power_state.RUNNING)] + mock_get_info.side_effect = return_values + mock_get_guest_xml.return_value = mock.sentinel.xml + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._hard_reboot( + self.context, instance, network_info, block_device_info) + + mock_create_guest_with_network.assert_called_once_with( + self.context, mock.sentinel.xml, instance, network_info, + block_device_info, + vifs_already_plugged=False, + external_events=[ + ('network-vif-plugged', uuids.vif1), + ('network-vif-plugged', uuids.vif3), + ] + ) @mock.patch('oslo_utils.fileutils.ensure_tree') @mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall') diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index fbd033690a..2558a49f7d 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -3383,11 +3383,32 @@ class LibvirtDriver(driver.ComputeDriver): # on which vif type we're using and we are working with a stale network # info cache here, so won't rely on waiting for neutron plug events. # vifs_already_plugged=True means "do not wait for neutron plug events" + external_events = [] + vifs_already_plugged = True + event_expected_for_vnic_types = ( + CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot) + if event_expected_for_vnic_types: + # NOTE(gibi): We unplugged every vif during destroy above and we + # will replug them with _create_guest_with_network. As the + # workaround config has some vnic_types configured we expect + # vif-plugged events for every vif with those vnic_types. + # TODO(gibi): only wait for events if we know that the networking + # backend sends plug time events. For that we need to finish + # https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron + # then create a driver -> plug-time event mapping in nova. + external_events = [ + ('network-vif-plugged', vif['id']) + for vif in network_info + if vif['vnic_type'] in event_expected_for_vnic_types + ] + vifs_already_plugged = False + # NOTE(efried): The instance should already have a vtpm_secret_uuid # registered if appropriate. self._create_guest_with_network( context, xml, instance, network_info, block_device_info, - vifs_already_plugged=True) + vifs_already_plugged=vifs_already_plugged, + external_events=external_events) self._prepare_pci_devices_for_use( pci_manager.get_instance_pci_devs(instance, 'all')) diff --git a/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml new file mode 100644 index 0000000000..c3686a9978 --- /dev/null +++ b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml @@ -0,0 +1,18 @@ +--- +issues: + - | + The libvirt virt driver in Nova implements power on and hard reboot by + destroying the domain first and unpluging the vifs then recreating the + domain and replugging the vifs. However nova does not wait for the + network-vif-plugged event before unpause the domain. This can cause + the domain to start running and requesting IP via DHCP before the + networking backend has finished plugging the vifs. The config option + [workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added, + defaulting to an empty list, that can be used to ensure that the libvirt + driver waits for the network-vif-plugged event for vifs with specific + ``vnic_type`` before it unpauses the domain during hard reboot. This should + only be used if the deployment uses a networking backend that sends such + event for the given ``vif_type`` at vif plug time. The ml2/ovs and the + networking-odl Neutron backend is known to send plug time events for ports + with ``normal`` ``vnic_type``. For more information see + https://bugs.launchpad.net/nova/+bug/1946729 |