summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBalazs Gibizer <gibi@redhat.com>2022-07-15 13:48:46 +0200
committerBalazs Gibizer <gibi@redhat.com>2023-01-25 15:42:14 +0100
commit1a98a1a650d065a8ab3e1c474f3b9fd537dc2206 (patch)
tree875901d48a11833f1fdba07c5ffdde8aa7716b78
parent0c87681135cfb3ce61d2a0392928c1dbc1fe5fde (diff)
downloadnova-1a98a1a650d065a8ab3e1c474f3b9fd537dc2206.tar.gz
Gracefully ERROR in _init_instance if vnic_type changed
If the vnic_type of a bound port changes from "direct" to "macvtap" and then the compute service is restarted then during _init_instance nova tries to plug the vif of the changed port. However as it now has macvtap vnic_type nova tries to look up the netdev of the parent VF. Still that VF is consumed by the instance so there is no such netdev on the host OS. This error killed the compute service at startup due to unhandled exception. This patch adds the exception handler, logs an ERROR and continue initializing other instances on the host. Also this patch adds a detailed ERROR log when nova detects that the vnic_type changed during _heal_instance_info_cache periodic. Closes-Bug: #1981813 Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85 (cherry picked from commit e43bf900dc8ca66578603bed333c56b215b1876e) (cherry picked from commit a28c82719545d5c8ee7f3ff1361b3a796e05095a)
-rw-r--r--nova/compute/manager.py14
-rw-r--r--nova/network/neutron.py34
-rw-r--r--nova/tests/functional/libvirt/test_pci_sriov_servers.py23
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py30
-rw-r--r--nova/tests/unit/network/test_neutron.py149
-rw-r--r--releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml9
6 files changed, 252 insertions, 7 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 537d27e5b1..b38875cbc7 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1151,6 +1151,20 @@ class ComputeManager(manager.Manager):
'updated.', instance=instance)
self._set_instance_obj_error_state(instance)
return
+ except exception.PciDeviceNotFoundById:
+ # This is bug 1981813 where the bound port vnic_type has changed
+ # from direct to macvtap. Nova does not support that and it
+ # already printed an ERROR when the change is detected during
+ # _heal_instance_info_cache. Now we print an ERROR again and skip
+ # plugging the vifs but let the service startup continue to init
+ # the other instances
+ LOG.exception(
+ 'Virtual interface plugging failed for instance. Probably the '
+ 'vnic_type of the bound port has been changed. Nova does not '
+ 'support such change.',
+ instance=instance
+ )
+ return
if instance.task_state == task_states.RESIZE_MIGRATING:
# We crashed during resize/migration, so roll back for safety
diff --git a/nova/network/neutron.py b/nova/network/neutron.py
index 8ea3f7e6bb..5b68d5ab10 100644
--- a/nova/network/neutron.py
+++ b/nova/network/neutron.py
@@ -3236,6 +3236,25 @@ class API:
delegate_create=True,
)
+ def _log_error_if_vnic_type_changed(
+ self, port_id, old_vnic_type, new_vnic_type, instance
+ ):
+ if old_vnic_type and old_vnic_type != new_vnic_type:
+ LOG.error(
+ 'The vnic_type of the bound port %s has '
+ 'been changed in neutron from "%s" to '
+ '"%s". Changing vnic_type of a bound port '
+ 'is not supported by Nova. To avoid '
+ 'breaking the connectivity of the instance '
+ 'please change the port vnic_type back to '
+ '"%s".',
+ port_id,
+ old_vnic_type,
+ new_vnic_type,
+ old_vnic_type,
+ instance=instance
+ )
+
def _build_network_info_model(self, context, instance, networks=None,
port_ids=None, admin_client=None,
preexisting_port_ids=None,
@@ -3309,6 +3328,12 @@ class API:
preexisting_port_ids)
for index, vif in enumerate(nw_info):
if vif['id'] == refresh_vif_id:
+ self._log_error_if_vnic_type_changed(
+ vif['id'],
+ vif['vnic_type'],
+ refreshed_vif['vnic_type'],
+ instance,
+ )
# Update the existing entry.
nw_info[index] = refreshed_vif
LOG.debug('Updated VIF entry in instance network '
@@ -3358,6 +3383,7 @@ class API:
networks, port_ids = self._gather_port_ids_and_networks(
context, instance, networks, port_ids, client)
+ old_nw_info = instance.get_network_info()
nw_info = network_model.NetworkInfo()
for port_id in port_ids:
current_neutron_port = current_neutron_port_map.get(port_id)
@@ -3365,6 +3391,14 @@ class API:
vif = self._build_vif_model(
context, client, current_neutron_port, networks,
preexisting_port_ids)
+ for old_vif in old_nw_info:
+ if old_vif['id'] == port_id:
+ self._log_error_if_vnic_type_changed(
+ port_id,
+ old_vif['vnic_type'],
+ vif['vnic_type'],
+ instance,
+ )
nw_info.append(vif)
elif nw_info_refresh:
LOG.info('Port %s from network info_cache is no '
diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
index 976015a1ac..9d8d0a6f8c 100644
--- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py
+++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@@ -979,6 +979,14 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
self.host_mappings['compute1'].cell_mapping
) as cctxt:
compute.manager._heal_instance_info_cache(cctxt)
+ self.assertIn(
+ 'The vnic_type of the bound port %s has been changed in '
+ 'neutron from "direct" to "macvtap". Changing vnic_type of a '
+ 'bound port is not supported by Nova. To avoid breaking the '
+ 'connectivity of the instance please change the port '
+ 'vnic_type back to "direct".' % port['id'],
+ self.stdlog.logger.output,
+ )
def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
# we want to fail the netdev lookup only if the pci_address is
@@ -1006,17 +1014,18 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
'nova.pci.utils.get_ifname_by_pci_address',
side_effect=fake_get_ifname_by_pci_address,
):
- # This is bug 1981813 as the compute service fails to start with an
- # exception.
# Nova cannot prevent the vnic_type change on a bound port. Neutron
# should prevent that instead. But the nova-compute should still
# be able to start up and only log an ERROR for this instance in
# inconsistent state.
- self.assertRaises(
- exception.PciDeviceNotFoundById,
- self.restart_compute_service,
- 'compute1',
- )
+ self.restart_compute_service('compute1')
+
+ self.assertIn(
+ 'Virtual interface plugging failed for instance. Probably the '
+ 'vnic_type of the bound port has been changed. Nova does not '
+ 'support such change.',
+ self.stdlog.logger.output,
+ )
class SRIOVAttachDetachTest(_PCIServersTestBase):
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 152649a2e5..6601a2975d 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -1306,6 +1306,36 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute._init_instance(self.context, instance)
set_error_state.assert_called_once_with(instance)
+ def test_init_instance_vif_plug_fails_missing_pci(self):
+ instance = fake_instance.fake_instance_obj(
+ self.context,
+ uuid=uuids.instance,
+ info_cache=None,
+ power_state=power_state.RUNNING,
+ vm_state=vm_states.ACTIVE,
+ task_state=None,
+ host=self.compute.host,
+ expected_attrs=['info_cache'])
+
+ with test.nested(
+ mock.patch.object(context, 'get_admin_context',
+ return_value=self.context),
+ mock.patch.object(objects.Instance, 'get_network_info',
+ return_value=network_model.NetworkInfo()),
+ mock.patch.object(self.compute.driver, 'plug_vifs',
+ side_effect=exception.PciDeviceNotFoundById("pci-addr")),
+ mock.patch("nova.compute.manager.LOG.exception"),
+ ) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
+ # as this does not raise, we are sure that the compute service
+ # continues initializing the rest of the instances
+ self.compute._init_instance(self.context, instance)
+ log_exception.assert_called_once_with(
+ "Virtual interface plugging failed for instance. Probably the "
+ "vnic_type of the bound port has been changed. Nova does not "
+ "support such change.",
+ instance=instance
+ )
+
def _test__validate_pinning_configuration(self, supports_pcpus=True):
instance_1 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_1)
diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py
index 488deabfba..879f8a9ed6 100644
--- a/nova/tests/unit/network/test_neutron.py
+++ b/nova/tests/unit/network/test_neutron.py
@@ -3392,6 +3392,155 @@ class TestAPI(TestAPIBase):
mocked_client.list_ports.assert_called_once_with(
tenant_id=uuids.fake, device_id=uuids.instance)
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_physnet_tunneled_info',
+ new=mock.Mock(return_value=(None, False)))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_preexisting_port_ids',
+ new=mock.Mock(return_value=[]))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_subnets_from_port',
+ new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_floating_ips_by_fixed_and_port',
+ new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
+ @mock.patch.object(neutronapi, 'get_client')
+ def test_build_network_info_model_full_vnic_type_change(
+ self, mock_get_client
+ ):
+ mocked_client = mock.create_autospec(client.Client)
+ mock_get_client.return_value = mocked_client
+ fake_inst = objects.Instance()
+ fake_inst.project_id = uuids.fake
+ fake_inst.uuid = uuids.instance
+ fake_ports = [
+ {
+ "id": "port1",
+ "network_id": "net-id",
+ "tenant_id": uuids.fake,
+ "admin_state_up": True,
+ "status": "ACTIVE",
+ "fixed_ips": [{"ip_address": "1.1.1.1"}],
+ "mac_address": "de:ad:be:ef:00:01",
+ "binding:vif_type": model.VIF_TYPE_BRIDGE,
+ "binding:vnic_type": model.VNIC_TYPE_DIRECT,
+ "binding:vif_details": {},
+ },
+ ]
+ mocked_client.list_ports.return_value = {'ports': fake_ports}
+ fake_inst.info_cache = objects.InstanceInfoCache.new(
+ self.context, uuids.instance)
+ fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
+
+ # build the network info first
+ nw_infos = self.api._build_network_info_model(
+ self.context,
+ fake_inst,
+ force_refresh=True,
+ )
+
+ self.assertEqual(1, len(nw_infos))
+ fake_inst.info_cache.network_info = nw_infos
+
+ # change the vnic_type of the port and rebuild the network info
+ fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
+ with mock.patch(
+ "nova.network.neutron.API._log_error_if_vnic_type_changed"
+ ) as mock_log:
+ nw_infos = self.api._build_network_info_model(
+ self.context,
+ fake_inst,
+ force_refresh=True,
+ )
+
+ mock_log.assert_called_once_with(
+ fake_ports[0]["id"], "direct", "macvtap", fake_inst)
+ self.assertEqual(1, len(nw_infos))
+
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_physnet_tunneled_info',
+ new=mock.Mock(return_value=(None, False)))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_preexisting_port_ids',
+ new=mock.Mock(return_value=[]))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_subnets_from_port',
+ new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
+ @mock.patch.object(
+ neutronapi.API,
+ '_get_floating_ips_by_fixed_and_port',
+ new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
+ @mock.patch.object(neutronapi, 'get_client')
+ def test_build_network_info_model_single_vnic_type_change(
+ self, mock_get_client
+ ):
+ mocked_client = mock.create_autospec(client.Client)
+ mock_get_client.return_value = mocked_client
+ fake_inst = objects.Instance()
+ fake_inst.project_id = uuids.fake
+ fake_inst.uuid = uuids.instance
+ fake_ports = [
+ {
+ "id": "port1",
+ "network_id": "net-id",
+ "tenant_id": uuids.fake,
+ "admin_state_up": True,
+ "status": "ACTIVE",
+ "fixed_ips": [{"ip_address": "1.1.1.1"}],
+ "mac_address": "de:ad:be:ef:00:01",
+ "binding:vif_type": model.VIF_TYPE_BRIDGE,
+ "binding:vnic_type": model.VNIC_TYPE_DIRECT,
+ "binding:vif_details": {},
+ },
+ ]
+ fake_nets = [
+ {
+ "id": "net-id",
+ "name": "foo",
+ "tenant_id": uuids.fake,
+ }
+ ]
+ mocked_client.list_ports.return_value = {'ports': fake_ports}
+ fake_inst.info_cache = objects.InstanceInfoCache.new(
+ self.context, uuids.instance)
+ fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
+
+ # build the network info first
+ nw_infos = self.api._build_network_info_model(
+ self.context,
+ fake_inst,
+ fake_nets,
+ [fake_ports[0]["id"]],
+ refresh_vif_id=fake_ports[0]["id"],
+ )
+
+ self.assertEqual(1, len(nw_infos))
+ fake_inst.info_cache.network_info = nw_infos
+
+ # change the vnic_type of the port and rebuild the network info
+ fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
+ with mock.patch(
+ "nova.network.neutron.API._log_error_if_vnic_type_changed"
+ ) as mock_log:
+ nw_infos = self.api._build_network_info_model(
+ self.context,
+ fake_inst,
+ fake_nets,
+ [fake_ports[0]["id"]],
+ refresh_vif_id=fake_ports[0]["id"],
+ )
+
+ mock_log.assert_called_once_with(
+ fake_ports[0]["id"], "direct", "macvtap", fake_inst)
+ self.assertEqual(1, len(nw_infos))
+
@mock.patch.object(neutronapi, 'get_client')
def test_get_subnets_from_port(self, mock_get_client):
mocked_client = mock.create_autospec(client.Client)
diff --git a/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml
new file mode 100644
index 0000000000..a5a3b7c8c2
--- /dev/null
+++ b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml
@@ -0,0 +1,9 @@
+---
+fixes:
+ - |
+ `Bug #1981813 <https://bugs.launchpad.net/nova/+bug/1981813>`_: Now nova
+ detects if the ``vnic_type`` of a bound port has been changed in neutron
+ and leaves an ERROR message in the compute service log as such change on a
+ bound port is not supported. Also the restart of the nova-compute service
+ will not crash any more after such port change. Nova will log an ERROR and
+ skip the initialization of the instance with such port during the startup.