summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2022-02-11 15:22:27 +0000
committerGerrit Code Review <review@openstack.org>2022-02-11 15:22:27 +0000
commit9b3d69c18525c4308ffd3dbb619c6ed8789eb9f0 (patch)
treed965f40b24d56c549235797a4e8c9595247eaa35
parent69fafb93fc0f2fe81b9c124eed0929fe33c4e7b2 (diff)
parentc531fdcc192afb5af628ac567cb0ff8aa3eab052 (diff)
downloadnova-9b3d69c18525c4308ffd3dbb619c6ed8789eb9f0.tar.gz
Merge "Add a WA flag waiting for vif-plugged event during reboot" into stable/victoria
-rw-r--r--.zuul.yaml6
-rw-r--r--nova/conf/workarounds.py53
-rw-r--r--nova/tests/unit/virt/libvirt/test_driver.py43
-rw-r--r--nova/virt/libvirt/driver.py23
-rw-r--r--releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml18
5 files changed, 141 insertions, 2 deletions
diff --git a/.zuul.yaml b/.zuul.yaml
index c00865e504..aa371db06c 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -191,6 +191,12 @@
# reduce the number of placement calls in steady state. Added in
# Stein.
resource_provider_association_refresh: 0
+ workarounds:
+ # This wa is an improvement on hard reboot that cannot be turned
+ # on unconditionally. But we know that ml2/ovs sends plug time
+ # events so we can enable this in this ovs job for vnic_type
+ # normal
+ wait_for_vif_plugged_event_during_hard_reboot: normal
$NOVA_CONF:
quota:
# Added in Train.
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index 8eadc0b6ec..4e64d87578 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -346,6 +346,59 @@ Related options:
* :oslo.config:option:`image_cache.subdirectory_name`
* :oslo.config:option:`update_resources_interval`
"""),
+ cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot',
+ item_type=cfg.types.String(
+ choices=[
+ "normal",
+ "direct",
+ "macvtap",
+ "baremetal",
+ "direct-physical",
+ "virtio-forwarder",
+ "smart-nic",
+ ]),
+ default=[],
+ help="""
+The libvirt virt driver implements power on and hard reboot by tearing down
+every vif of the instance being rebooted then plug them again. By default nova
+does not wait for network-vif-plugged event from neutron before it lets the
+instance run. This can cause the instance to requests the IP via DHCP before
+the neutron backend has a chance to set up the networking backend after the vif
+plug.
+
+This flag defines which vifs nova expects network-vif-plugged events from
+during hard reboot. The possible values are neutron port vnic types:
+
+* normal
+* direct
+* macvtap
+* baremetal
+* direct-physical
+* virtio-forwarder
+* smart-nic
+
+Adding a ``vnic_type`` to this configuration makes Nova wait for a
+network-vif-plugged event for each of the instance's vifs having the specific
+``vnic_type`` before unpausing the instance, similarly to how new instance
+creation works.
+
+Please note that not all neutron networking backends send plug time events, for
+certain ``vnic_type`` therefore this config is empty by default.
+
+The ml2/ovs and the networking-odl backends are known to send plug time events
+for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this
+config if you are using only those backends in the compute host.
+
+The neutron in-tree SRIOV backend does not reliably send network-vif-plugged
+event during plug time for ports with ``direct`` vnic_type and never sends
+that event for port with ``direct-physical`` vnic_type during plug time. For
+other ``vnic_type`` and backend pairs, please consult the developers of the
+backend.
+
+Related options:
+
+* :oslo.config:option:`DEFAULT.vif_plugging_timeout`
+"""),
]
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index 3a93eaa2a6..7576a6a60d 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -16288,7 +16288,48 @@ class LibvirtConnTestCase(test.NoDBTestCase,
accel_info=accel_info)
mock_create_guest_with_network.assert_called_once_with(self.context,
dummyxml, instance, network_info, block_device_info,
- vifs_already_plugged=True)
+ vifs_already_plugged=True, external_events=[])
+
+ @mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock())
+ @mock.patch('nova.virt.libvirt.LibvirtDriver.get_info')
+ @mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network')
+ @mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml')
+ @mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock())
+ @mock.patch(
+ 'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices',
+ new=mock.Mock(return_value={}))
+ def test_hard_reboot_wait_for_plug(
+ self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info
+ ):
+ self.flags(
+ group="workarounds",
+ wait_for_vif_plugged_event_during_hard_reboot=["normal"])
+ self.context.auth_token = None
+ instance = objects.Instance(**self.test_instance)
+ network_info = _fake_network_info(self, num_networks=4)
+ network_info[0]["vnic_type"] = "normal"
+ network_info[1]["vnic_type"] = "direct"
+ network_info[2]["vnic_type"] = "normal"
+ network_info[3]["vnic_type"] = "direct-physical"
+ block_device_info = None
+ return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN),
+ hardware.InstanceInfo(state=power_state.RUNNING)]
+ mock_get_info.side_effect = return_values
+ mock_get_guest_xml.return_value = mock.sentinel.xml
+
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ drvr._hard_reboot(
+ self.context, instance, network_info, block_device_info)
+
+ mock_create_guest_with_network.assert_called_once_with(
+ self.context, mock.sentinel.xml, instance, network_info,
+ block_device_info,
+ vifs_already_plugged=False,
+ external_events=[
+ ('network-vif-plugged', uuids.vif1),
+ ('network-vif-plugged', uuids.vif3),
+ ]
+ )
@mock.patch('oslo_utils.fileutils.ensure_tree')
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index fbd033690a..2558a49f7d 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -3383,11 +3383,32 @@ class LibvirtDriver(driver.ComputeDriver):
# on which vif type we're using and we are working with a stale network
# info cache here, so won't rely on waiting for neutron plug events.
# vifs_already_plugged=True means "do not wait for neutron plug events"
+ external_events = []
+ vifs_already_plugged = True
+ event_expected_for_vnic_types = (
+ CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)
+ if event_expected_for_vnic_types:
+ # NOTE(gibi): We unplugged every vif during destroy above and we
+ # will replug them with _create_guest_with_network. As the
+ # workaround config has some vnic_types configured we expect
+ # vif-plugged events for every vif with those vnic_types.
+ # TODO(gibi): only wait for events if we know that the networking
+ # backend sends plug time events. For that we need to finish
+ # https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron
+ # then create a driver -> plug-time event mapping in nova.
+ external_events = [
+ ('network-vif-plugged', vif['id'])
+ for vif in network_info
+ if vif['vnic_type'] in event_expected_for_vnic_types
+ ]
+ vifs_already_plugged = False
+
# NOTE(efried): The instance should already have a vtpm_secret_uuid
# registered if appropriate.
self._create_guest_with_network(
context, xml, instance, network_info, block_device_info,
- vifs_already_plugged=True)
+ vifs_already_plugged=vifs_already_plugged,
+ external_events=external_events)
self._prepare_pci_devices_for_use(
pci_manager.get_instance_pci_devs(instance, 'all'))
diff --git a/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml
new file mode 100644
index 0000000000..c3686a9978
--- /dev/null
+++ b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml
@@ -0,0 +1,18 @@
+---
+issues:
+ - |
+ The libvirt virt driver in Nova implements power on and hard reboot by
+ destroying the domain first and unpluging the vifs then recreating the
+ domain and replugging the vifs. However nova does not wait for the
+ network-vif-plugged event before unpause the domain. This can cause
+ the domain to start running and requesting IP via DHCP before the
+ networking backend has finished plugging the vifs. The config option
+ [workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added,
+ defaulting to an empty list, that can be used to ensure that the libvirt
+ driver waits for the network-vif-plugged event for vifs with specific
+ ``vnic_type`` before it unpauses the domain during hard reboot. This should
+ only be used if the deployment uses a networking backend that sends such
+ event for the given ``vif_type`` at vif plug time. The ml2/ovs and the
+ networking-odl Neutron backend is known to send plug time events for ports
+ with ``normal`` ``vnic_type``. For more information see
+ https://bugs.launchpad.net/nova/+bug/1946729