diff options
46 files changed, 1468 insertions, 429 deletions
diff --git a/.zuul.yaml b/.zuul.yaml index b2d5cf6f8e..315332c7a7 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -208,7 +208,7 @@ # (1) take a long time and (2) are already covered in the # tempest-slow* job. If this regex gets more complicated use # tempest_test_blacklist. - tempest_black_regex: ^tempest.scenario.test_network + tempest_black_regex: ^tempest\.(scenario\.test_network|api\.compute\.servers\.test_device_tagging\.TaggedAttachmentsTest\.test_tagged_attachment) devstack_local_conf: post-config: $NOVA_CPU_CONF: @@ -222,6 +222,12 @@ # reduce the number of placement calls in steady state. Added in # Stein. resource_provider_association_refresh: 0 + workarounds: + # This wa is an improvement on hard reboot that cannot be turned + # on unconditionally. But we know that ml2/ovs sends plug time + # events so we can enable this in this ovs job for vnic_type + # normal + wait_for_vif_plugged_event_during_hard_reboot: normal $NOVA_CONF: quota: # Added in Train. diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index 8a0db8a423..db52da8579 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -546,6 +546,84 @@ class ServersController(wsgi.Controller): create_kwargs['requested_networks'] = requested_networks @staticmethod + def _validate_host_availability_zone(context, availability_zone, host): + """Ensure the host belongs in the availability zone. + + This is slightly tricky and it's probably worth recapping how host + aggregates and availability zones are related before reading. Hosts can + belong to zero or more host aggregates, but they will always belong to + exactly one availability zone. If the user has set the availability + zone key on one of the host aggregates that the host is a member of + then the host will belong to this availability zone. If the user has + not set the availability zone key on any of the host aggregates that + the host is a member of or the host is not a member of any host + aggregates, then the host will belong to the default availability zone. + Setting the availability zone key on more than one of host aggregates + that the host is a member of is an error and will be rejected by the + API. + + Given the above, our host-availability zone check needs to vary + behavior based on whether we're requesting the default availability + zone or not. If we are not, then we simply ask "does this host belong + to a host aggregate and, if so, do any of the host aggregates have the + requested availability zone metadata set". By comparison, if we *are* + requesting the default availability zone then we want to ask the + inverse, or "does this host not belong to a host aggregate or, if it + does, is the availability zone information unset (or, naughty naughty, + set to the default) for each of the host aggregates". If both cases, if + the answer is no then we warn about the mismatch and then use the + actual availability zone of the host to avoid mismatches. + + :param context: The nova auth request context + :param availability_zone: The name of the requested availability zone + :param host: The name of the requested host + :returns: The availability zone that should actually be used for the + request + """ + aggregates = objects.AggregateList.get_by_host(context, host=host) + if not aggregates: + # a host is assigned to the default availability zone if it is not + # a member of any host aggregates + if availability_zone == CONF.default_availability_zone: + return availability_zone + + LOG.warning( + "Requested availability zone '%s' but forced host '%s' " + "does not belong to any availability zones; ignoring " + "requested availability zone to avoid bug #1934770", + availability_zone, host, + ) + return None + + # only one host aggregate will have the availability_zone field set so + # use the first non-null value + host_availability_zone = next( + (a.availability_zone for a in aggregates if a.availability_zone), + None, + ) + + if availability_zone == host_availability_zone: + # if there's an exact match, use what the user requested + return availability_zone + + if ( + availability_zone == CONF.default_availability_zone and + host_availability_zone is None + ): + # special case the default availability zone since this won't (or + # rather shouldn't) be explicitly stored on any host aggregate + return availability_zone + + # no match, so use the host's availability zone information, if any + LOG.warning( + "Requested availability zone '%s' but forced host '%s' " + "does not belong to this availability zone; overwriting " + "requested availability zone to avoid bug #1934770", + availability_zone, host, + ) + return None + + @staticmethod def _process_hosts_for_create( context, target, server_dict, create_kwargs, host, node): """Processes hosts request parameter for server create @@ -665,6 +743,8 @@ class ServersController(wsgi.Controller): if host or node: context.can(server_policies.SERVERS % 'create:forced_host', target=target) + availability_zone = self._validate_host_availability_zone( + context, availability_zone, host) if api_version_request.is_supported(req, min_version='2.74'): self._process_hosts_for_create(context, target, server_dict, diff --git a/nova/api/openstack/compute/services.py b/nova/api/openstack/compute/services.py index 5b4cfd0d77..00efcaff34 100644 --- a/nova/api/openstack/compute/services.py +++ b/nova/api/openstack/compute/services.py @@ -266,10 +266,25 @@ class ServiceController(wsgi.Controller): # service delete since we could orphan resource providers and # break the ability to do things like confirm/revert instances # in VERIFY_RESIZE status. - compute_nodes = objects.ComputeNodeList.get_all_by_host( - context, service.host) - self._assert_no_in_progress_migrations( - context, id, compute_nodes) + compute_nodes = [] + try: + compute_nodes = objects.ComputeNodeList.get_all_by_host( + context, service.host) + self._assert_no_in_progress_migrations( + context, id, compute_nodes) + except exception.ComputeHostNotFound: + # NOTE(artom) Consider the following situation: + # - Using the Ironic virt driver + # - Replacing (so removing and re-adding) all baremetal + # nodes associated with a single nova-compute service + # The update resources periodic will have destroyed the + # compute node records because they're no longer being + # reported by the virt driver. If we then attempt to + # manually delete the compute service record, + # get_all_host() above will raise, as there are no longer + # any compute node records for the host. Catch it here and + # continue to allow compute service deletion. + pass aggrs = self.aggregate_api.get_aggregates_by_host(context, service.host) diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 73d2832c84..68f6ff2b6a 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -2541,7 +2541,7 @@ class PlacementCommands(object): """ url = '/resource_providers' if 'uuid' in kwargs: - url += '&uuid=%s' % kwargs['uuid'] + url += '?uuid=%s' % kwargs['uuid'] resp = placement.get(url, global_request_id=context.global_id, version='1.14') diff --git a/nova/compute/api.py b/nova/compute/api.py index f4d8900b83..7e7012954f 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2036,17 +2036,15 @@ class API(base.Base): self._check_multiple_instances_with_neutron_ports( requested_networks) - if availability_zone: - available_zones = availability_zones.\ - get_availability_zones(context.elevated(), self.host_api, - get_only_available=True) - if forced_host is None and availability_zone not in \ - available_zones: + if availability_zone and forced_host is None: + azs = availability_zones.get_availability_zones( + context.elevated(), self.host_api, get_only_available=True) + if availability_zone not in azs: msg = _('The requested availability zone is not available') raise exception.InvalidRequest(msg) filter_properties = scheduler_utils.build_filter_properties( - scheduler_hints, forced_host, forced_node, instance_type) + scheduler_hints, forced_host, forced_node, instance_type) return self._create_instance( context, instance_type, diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8bc33ce32d..c59fd31f21 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -5608,6 +5608,14 @@ class ComputeManager(manager.Manager): instance.host = migration.dest_compute instance.node = migration.dest_node + # NOTE(gibi): as the instance now tracked on the destination we + # have to make sure that the source compute resource track can + # track this instance as a migration. For that the resource tracker + # needs to see the old_flavor set on the instance. The old_flavor + # setting used to be done on the destination host in finish_resize + # but that is racy with a source host update_available_resource + # periodic run + instance.old_flavor = instance.flavor instance.task_state = task_states.RESIZE_MIGRATED instance.save(expected_task_state=task_states.RESIZE_MIGRATING) @@ -5721,6 +5729,10 @@ class ComputeManager(manager.Manager): # to ACTIVE for backwards compatibility old_vm_state = instance.system_metadata.get('old_vm_state', vm_states.ACTIVE) + # NOTE(gibi): this is already set by the resize_instance on the source + # node before calling finish_resize on destination but during upgrade + # it can be that the source node is not having the fix for bug 1944759 + # yet. This assignment can be removed in Z release. instance.old_flavor = old_flavor if old_instance_type_id != new_instance_type_id: @@ -9992,6 +10004,8 @@ class ComputeManager(manager.Manager): use_slave=True, startup=startup) + self.rt.clean_compute_node_cache(compute_nodes_in_db) + # Delete orphan compute node not reported by driver but still in db for cn in compute_nodes_in_db: if cn.hypervisor_hostname not in nodenames: diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 565822d20c..c7f3f024d5 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -932,14 +932,41 @@ class ResourceTracker(object): 'flavor', 'migration_context', 'resources']) - # Now calculate usage based on instance utilization: - instance_by_uuid = self._update_usage_from_instances( - context, instances, nodename) - # Grab all in-progress migrations and error migrations: migrations = objects.MigrationList.get_in_progress_and_error( context, self.host, nodename) + # Check for tracked instances with in-progress, incoming, but not + # finished migrations. For those instance the migration context + # is not applied yet (it will be during finish_resize when the + # migration goes to finished state). We need to manually and + # temporary apply the migration context here when the resource usage is + # updated. See bug 1953359 for more details. + instance_by_uuid = {instance.uuid: instance for instance in instances} + for migration in migrations: + if ( + migration.instance_uuid in instance_by_uuid and + migration.dest_compute == self.host and + migration.dest_node == nodename + ): + # we does not check for the 'post-migrating' migration status + # as applying the migration context for an instance already + # in finished migration status is a no-op anyhow. + instance = instance_by_uuid[migration.instance_uuid] + LOG.debug( + 'Applying migration context for instance %s as it has an ' + 'incoming, in-progress migration %s. ' + 'Migration status is %s', + migration.instance_uuid, migration.uuid, migration.status + ) + # It is OK not to revert the migration context at the end of + # the periodic as the instance is not saved during the periodic + instance.apply_migration_context() + + # Now calculate usage based on instance utilization: + instance_by_uuid = self._update_usage_from_instances( + context, instances, nodename) + self._pair_instances_to_migrations(migrations, instance_by_uuid) self._update_usage_from_migrations(context, migrations, nodename) @@ -1946,3 +1973,20 @@ class ResourceTracker(object): if migration: migration.status = 'done' migration.save() + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True) + def clean_compute_node_cache(self, compute_nodes_in_db): + """Clean the compute node cache of any nodes that no longer exist. + + :param compute_nodes_in_db: list of ComputeNode objects from the DB. + """ + compute_nodes_in_db_nodenames = {cn.hypervisor_hostname + for cn in compute_nodes_in_db} + stale_cns = set(self.compute_nodes) - compute_nodes_in_db_nodenames + + for stale_cn in stale_cns: + # NOTE(mgoddard): we have found a node in the cache that has no + # compute node in the DB. This could be due to a node rebalance + # where another compute service took ownership of the node. Clean + # up the cache. + self.remove_node(stale_cn) diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 53a9f3d417..26b834b47e 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -354,6 +354,75 @@ Related options: * :oslo.config:option:`image_cache.subdirectory_name` * :oslo.config:option:`update_resources_interval` """), + cfg.BoolOpt('libvirt_disable_apic', + default=False, + help=""" +With some kernels initializing the guest apic can result in a kernel hang that +renders the guest unusable. This happens as a result of a kernel bug. In most +cases the correct fix it to update the guest image kernel to one that is +patched however in some cases this is not possible. This workaround allows the +emulation of an apic to be disabled per host however it is not recommended to +use outside of a CI or developer cloud. +"""), + cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot', + item_type=cfg.types.String( + choices=[ + "normal", + "direct", + "macvtap", + "baremetal", + "direct-physical", + "virtio-forwarder", + "smart-nic", + "vdpa", + "accelerator-direct", + "accelerator-direct-physical", + ]), + default=[], + help=""" +The libvirt virt driver implements power on and hard reboot by tearing down +every vif of the instance being rebooted then plug them again. By default nova +does not wait for network-vif-plugged event from neutron before it lets the +instance run. This can cause the instance to requests the IP via DHCP before +the neutron backend has a chance to set up the networking backend after the vif +plug. + +This flag defines which vifs nova expects network-vif-plugged events from +during hard reboot. The possible values are neutron port vnic types: + +* normal +* direct +* macvtap +* baremetal +* direct-physical +* virtio-forwarder +* smart-nic +* vdpa +* accelerator-direct +* accelerator-direct-physical + +Adding a ``vnic_type`` to this configuration makes Nova wait for a +network-vif-plugged event for each of the instance's vifs having the specific +``vnic_type`` before unpausing the instance, similarly to how new instance +creation works. + +Please note that not all neutron networking backends send plug time events, for +certain ``vnic_type`` therefore this config is empty by default. + +The ml2/ovs and the networking-odl backends are known to send plug time events +for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this +config if you are using only those backends in the compute host. + +The neutron in-tree SRIOV backend does not reliably send network-vif-plugged +event during plug time for ports with ``direct`` vnic_type and never sends +that event for port with ``direct-physical`` vnic_type during plug time. For +other ``vnic_type`` and backend pairs, please consult the developers of the +backend. + +Related options: + +* :oslo.config:option:`DEFAULT.vif_plugging_timeout` +"""), ] diff --git a/nova/console/websocketproxy.py b/nova/console/websocketproxy.py index fed1ee5e29..a71e5a4091 100644 --- a/nova/console/websocketproxy.py +++ b/nova/console/websocketproxy.py @@ -289,14 +289,9 @@ class NovaProxyRequestHandler(websockify.ProxyRequestHandler): if os.path.isdir(path): parts = urlparse.urlsplit(self.path) if not parts.path.endswith('/'): - # redirect browser - doing basically what apache does - new_parts = (parts[0], parts[1], parts[2] + '/', - parts[3], parts[4]) - new_url = urlparse.urlunsplit(new_parts) - # Browsers interpret "Location: //uri" as an absolute URI # like "http://URI" - if new_url.startswith('//'): + if self.path.startswith('//'): self.send_error(HTTPStatus.BAD_REQUEST, "URI must not start with //") return None diff --git a/nova/middleware.py b/nova/middleware.py index 717fecd4ef..8b0fc59561 100644 --- a/nova/middleware.py +++ b/nova/middleware.py @@ -24,11 +24,15 @@ def set_defaults(): 'X-Roles', 'X-Service-Catalog', 'X-User-Id', - 'X-Tenant-Id'], + 'X-Tenant-Id', + 'X-OpenStack-Nova-API-Version', + 'OpenStack-API-Version'], expose_headers=['X-Auth-Token', 'X-Openstack-Request-Id', 'X-Subject-Token', - 'X-Service-Token'], + 'X-Service-Token', + 'X-OpenStack-Nova-API-Version', + 'OpenStack-API-Version'], allow_methods=['GET', 'PUT', 'POST', diff --git a/nova/objects/instance_numa.py b/nova/objects/instance_numa.py index 94f71f8803..f4ed51a12e 100644 --- a/nova/objects/instance_numa.py +++ b/nova/objects/instance_numa.py @@ -79,6 +79,8 @@ class InstanceNUMACell(base.NovaEphemeralObject, 'memory': obj_fields.IntegerField(), 'pagesize': obj_fields.IntegerField(nullable=True, default=None), + # TODO(sean-k-mooney): This is no longer used and should be + # removed in v2 'cpu_topology': obj_fields.ObjectField('VirtCPUTopology', nullable=True), 'cpu_pinning_raw': obj_fields.DictOfIntegersField(nullable=True, @@ -164,8 +166,10 @@ class InstanceNUMATopology(base.NovaObject, if 'nova_object.name' in primitive: obj = cls.obj_from_primitive(primitive) - cls._migrate_legacy_dedicated_instance_cpuset( - context, instance_uuid, obj) + updated = cls._migrate_legacy_dedicated_instance_cpuset(obj) + if updated: + cls._save_migrated_cpuset_to_instance_extra( + context, obj, instance_uuid) else: obj = cls._migrate_legacy_object(context, instance_uuid, primitive) @@ -174,13 +178,14 @@ class InstanceNUMATopology(base.NovaObject, # TODO(huaqiang): Remove after Wallaby once we are sure these objects have # been loaded at least once. @classmethod - def _migrate_legacy_dedicated_instance_cpuset(cls, context, instance_uuid, - obj): + def _migrate_legacy_dedicated_instance_cpuset(cls, obj): # NOTE(huaqiang): We may meet some topology object with the old version # 'InstanceNUMACell' cells, in that case, the 'dedicated' CPU is kept # in 'InstanceNUMACell.cpuset' field, but it should be kept in # 'InstanceNUMACell.pcpuset' field since Victoria. Making an upgrade - # and persisting to database. + # here but letting the caller persist the result if needed as we + # don't know which table the InstanceNUMACell is coming from. It can + # come from instance_extra or request_spec too. update_db = False for cell in obj.cells: if len(cell.cpuset) == 0: @@ -192,14 +197,20 @@ class InstanceNUMATopology(base.NovaObject, cell.pcpuset = cell.cpuset cell.cpuset = set() update_db = True + return update_db - if update_db: - db_obj = jsonutils.dumps(obj.obj_to_primitive()) - values = { - 'numa_topology': db_obj, - } - db.instance_extra_update_by_uuid(context, instance_uuid, - values) + # TODO(huaqiang): Remove after Yoga once we are sure these objects have + # been loaded at least once. + @classmethod + def _save_migrated_cpuset_to_instance_extra( + cls, context, obj, instance_uuid + ): + db_obj = jsonutils.dumps(obj.obj_to_primitive()) + values = { + 'numa_topology': db_obj, + } + db.instance_extra_update_by_uuid( + context, instance_uuid, values) # TODO(stephenfin): Remove in X or later, once this has bedded in @classmethod diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index 11334335e0..48712860b7 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -585,6 +585,8 @@ class RequestSpec(base.NovaObject): @staticmethod def _from_db_object(context, spec, db_spec): spec_obj = spec.obj_from_primitive(jsonutils.loads(db_spec['spec'])) + data_migrated = False + for key in spec.fields: # Load these from the db model not the serialized object within, # though they should match. @@ -605,6 +607,13 @@ class RequestSpec(base.NovaObject): # fields. If they are not set, set None. if not spec.obj_attr_is_set(key): setattr(spec, key, None) + elif key == "numa_topology": + if key in spec_obj: + spec.numa_topology = spec_obj.numa_topology + if spec.numa_topology: + data_migrated = objects.InstanceNUMATopology.\ + _migrate_legacy_dedicated_instance_cpuset( + spec.numa_topology) elif key in spec_obj: setattr(spec, key, getattr(spec_obj, key)) spec._context = context @@ -626,6 +635,9 @@ class RequestSpec(base.NovaObject): # NOTE(danms): Instance group may have been deleted spec.instance_group = None + if data_migrated: + spec.save() + spec.obj_reset_changes() return spec diff --git a/nova/privsep/fs.py b/nova/privsep/fs.py index 4173c8e11e..11249322a4 100644 --- a/nova/privsep/fs.py +++ b/nova/privsep/fs.py @@ -57,14 +57,21 @@ def lvcreate(size, lv, vg, preallocated=None): @nova.privsep.sys_admin_pctxt.entrypoint def vginfo(vg): - return processutils.execute('vgs', '--noheadings', '--nosuffix', - '--separator', '|', '--units', 'b', - '-o', 'vg_size,vg_free', vg) + # NOTE(gibi): We see intermittent faults querying volume groups failing + # with error code -11, hence the retry. See bug 1931710 + return processutils.execute( + 'vgs', '--noheadings', '--nosuffix', + '--separator', '|', '--units', 'b', + '-o', 'vg_size,vg_free', vg, + attempts=3, delay_on_retry=True, + ) @nova.privsep.sys_admin_pctxt.entrypoint def lvlist(vg): - return processutils.execute('lvs', '--noheadings', '-o', 'lv_name', vg) + return processutils.execute( + 'lvs', '--noheadings', '-o', 'lv_name', vg, + attempts=3, delay_on_retry=True) @nova.privsep.sys_admin_pctxt.entrypoint diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 5e98d06271..311ac6314c 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -391,7 +391,15 @@ class InstanceHelperMixin: """ # if forcing the server onto a host, we have to use the admin API if not api: - api = self.api if not az else getattr(self, 'admin_api', self.api) + api = self.api if not az and not host else getattr( + self, 'admin_api', self.api) + + if host and not api.microversion: + api.microversion = '2.74' + # with 2.74 networks param needs to use 'none' instead of None + # if no network is needed + if networks is None: + networks = 'none' body = self._build_server( name, image_uuid, flavor_id, networks, az, host) diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 65f07e9c8a..6cae9fb57f 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -20,6 +20,7 @@ from oslo_config import cfg from oslo_log import log as logging import nova +from nova.compute import manager from nova.conf import neutron as neutron_conf from nova import context as nova_context from nova import objects @@ -104,8 +105,8 @@ class NUMAServersTest(NUMAServersTestBase): nodes. """ - host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1, - cpu_cores=2, cpu_threads=2) + host_info = fakelibvirt.HostInfo( + cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2) self.start_compute(host_info=host_info, hostname='compute1') extra_spec = {'hw:numa_nodes': '2'} @@ -120,6 +121,35 @@ class NUMAServersTest(NUMAServersTestBase): self.assertNotIn('cpu_topology', inst.numa_topology.cells[0]) self.assertNotIn('cpu_topology', inst.numa_topology.cells[1]) + def test_create_server_with_numa_topology_and_cpu_topology_and_pinning( + self): + """Create a server with two NUMA nodes. + + This should pass and result in a guest NUMA topology with two NUMA + nodes, pinned cpus and numa affined memory. + """ + + host_info = fakelibvirt.HostInfo( + cpu_nodes=2, cpu_sockets=1, cpu_cores=4, cpu_threads=1, + kB_mem=(1024 * 1024 * 16)) # 16 GB + self.start_compute(host_info=host_info, hostname='compute1') + + extra_spec = { + 'hw:numa_nodes': '2', + 'hw:cpu_max_sockets': '2', + 'hw:cpu_max_cores': '2', + 'hw:cpu_max_threads': '8', + 'hw:cpu_policy': 'dedicated'} + flavor_id = self._create_flavor(vcpu=8, extra_spec=extra_spec) + server = self._run_build_test(flavor_id) + + ctx = nova_context.get_admin_context() + inst = objects.Instance.get_by_uuid(ctx, server['id']) + self.assertEqual(2, len(inst.numa_topology.cells)) + self.assertLessEqual(inst.vcpu_model.topology.sockets, 2) + self.assertLessEqual(inst.vcpu_model.topology.cores, 2) + self.assertLessEqual(inst.vcpu_model.topology.threads, 8) + def test_create_server_with_numa_fails(self): """Create a two NUMA node instance on a host with only one node. @@ -217,7 +247,7 @@ class NUMAServersTest(NUMAServersTestBase): inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) self.assertEqual(1, len(inst.numa_topology.cells)) - self.assertEqual(5, inst.numa_topology.cells[0].cpu_topology.cores) + self.assertEqual(5, inst.vcpu_model.topology.sockets) def test_create_server_with_mixed_policy(self): """Create a server using the 'hw:cpu_policy=mixed' extra spec. @@ -267,7 +297,6 @@ class NUMAServersTest(NUMAServersTestBase): # sanity check the instance topology inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) self.assertEqual(1, len(inst.numa_topology.cells)) - self.assertEqual(4, inst.numa_topology.cells[0].cpu_topology.cores) self.assertEqual({0}, inst.numa_topology.cells[0].cpuset) self.assertEqual({1, 2, 3}, inst.numa_topology.cells[0].pcpuset) self.assertEqual( @@ -476,8 +505,6 @@ class NUMAServersTest(NUMAServersTestBase): ctx = nova_context.get_admin_context() inst = objects.Instance.get_by_uuid(ctx, server['id']) self.assertEqual(1, len(inst.numa_topology.cells)) - self.assertEqual(1, inst.numa_topology.cells[0].cpu_topology.cores) - self.assertEqual(2, inst.numa_topology.cells[0].cpu_topology.threads) def test_create_server_with_pcpu_fails(self): """Create a pinned instance on a host with no PCPUs. @@ -797,6 +824,210 @@ class NUMAServersTest(NUMAServersTestBase): server = self._wait_for_state_change(server, 'ACTIVE') + def _assert_pinned_cpus(self, hostname, expected_number_of_pinned): + numa_topology = objects.NUMATopology.obj_from_db_obj( + objects.ComputeNode.get_by_nodename( + self.ctxt, hostname, + ).numa_topology, + ) + self.assertEqual( + expected_number_of_pinned, len(numa_topology.cells[0].pinned_cpus)) + + def _create_server_and_resize_bug_1944759(self): + self.flags( + cpu_dedicated_set='0-3', cpu_shared_set='4-7', group='compute') + self.flags(vcpu_pin_set=None) + + # start services + self.start_compute(hostname='test_compute0') + self.start_compute(hostname='test_compute1') + + flavor_a_id = self._create_flavor( + vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'}) + server = self._create_server(flavor_id=flavor_a_id) + + src_host = server['OS-EXT-SRV-ATTR:host'] + self._assert_pinned_cpus(src_host, 2) + + # we don't really care what the new flavor is, so long as the old + # flavor is using pinning. We use a similar flavor for simplicity. + flavor_b_id = self._create_flavor( + vcpu=2, extra_spec={'hw:cpu_policy': 'dedicated'}) + + orig_rpc_finish_resize = nova.compute.rpcapi.ComputeAPI.finish_resize + + # Simulate that the finish_resize call overlaps with an + # update_available_resource periodic job + def inject_periodic_to_finish_resize(*args, **kwargs): + self._run_periodics() + return orig_rpc_finish_resize(*args, **kwargs) + + self.stub_out( + 'nova.compute.rpcapi.ComputeAPI.finish_resize', + inject_periodic_to_finish_resize, + ) + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + post = {'resize': {'flavorRef': flavor_b_id}} + self.api.post_server_action(server['id'], post) + server = self._wait_for_state_change(server, 'VERIFY_RESIZE') + + dst_host = server['OS-EXT-SRV-ATTR:host'] + + # we have 2 cpus pinned on both computes. The source should have it + # due to the outbound migration and the destination due to the + # instance running there + self._assert_pinned_cpus(src_host, 2) + self._assert_pinned_cpus(dst_host, 2) + + return server, src_host, dst_host + + def test_resize_confirm_bug_1944759(self): + server, src_host, dst_host = ( + self._create_server_and_resize_bug_1944759()) + + # Now confirm the resize + post = {'confirmResize': None} + + self.api.post_server_action(server['id'], post) + self._wait_for_state_change(server, 'ACTIVE') + + # the resource allocation reflects that the VM is running on the dest + # node + self._assert_pinned_cpus(src_host, 0) + self._assert_pinned_cpus(dst_host, 2) + + # and running periodics does not break it either + self._run_periodics() + + self._assert_pinned_cpus(src_host, 0) + self._assert_pinned_cpus(dst_host, 2) + + def test_resize_revert_bug_1944759(self): + server, src_host, dst_host = ( + self._create_server_and_resize_bug_1944759()) + + # Now revert the resize + post = {'revertResize': None} + + # reverts actually succeeds (not like confirm) but the resource + # allocation is still flaky + self.api.post_server_action(server['id'], post) + self._wait_for_state_change(server, 'ACTIVE') + + # After the revert the source host should have 2 cpus pinned due to + # the instance. + self._assert_pinned_cpus(src_host, 2) + self._assert_pinned_cpus(dst_host, 0) + + # running the periodic job will not break it either + self._run_periodics() + + self._assert_pinned_cpus(src_host, 2) + self._assert_pinned_cpus(dst_host, 0) + + def test_resize_dedicated_policy_race_on_dest_bug_1953359(self): + + self.flags(cpu_dedicated_set='0-2', cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + + host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=2, cpu_threads=1) + self.start_compute(host_info=host_info, hostname='compute1') + + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(vcpu=1, extra_spec=extra_spec) + expected_usage = {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 1} + + server = self._run_build_test(flavor_id, expected_usage=expected_usage) + + inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) + self.assertEqual(1, len(inst.numa_topology.cells)) + # assert that the pcpu 0 is used on compute1 + self.assertEqual({'0': 0}, inst.numa_topology.cells[0].cpu_pinning_raw) + + # start another compute with the same config + self.start_compute(host_info=host_info, hostname='compute2') + + # boot another instance but now on compute2 so that it occupies the + # pcpu 0 on compute2 + # NOTE(gibi): _run_build_test cannot be used here as it assumes only + # compute1 exists + server2 = self._create_server( + flavor_id=flavor_id, + host='compute2', + ) + inst2 = objects.Instance.get_by_uuid(self.ctxt, server2['id']) + self.assertEqual(1, len(inst2.numa_topology.cells)) + # assert that the pcpu 0 is used + self.assertEqual( + {'0': 0}, inst2.numa_topology.cells[0].cpu_pinning_raw) + + # migrate the first instance from compute1 to compute2 but stop + # migrating at the start of finish_resize. Then start a racing periodic + # update_available_resources. + orig_finish_resize = manager.ComputeManager.finish_resize + + def fake_finish_resize(*args, **kwargs): + # start a racing update_available_resource periodic + self._run_periodics() + # we expect it that CPU pinning fails on the destination node + # as the resource_tracker will use the source node numa_topology + # and that does not fit to the dest node as pcpu 0 in the dest + # is already occupied. + log = self.stdlog.logger.output + # The resize_claim correctly calculates that the instance should be + # pinned to pcpu id 1 instead of 0 + self.assertIn( + 'Computed NUMA topology CPU pinning: usable pCPUs: [[1]], ' + 'vCPUs mapping: [(0, 1)]', + log, + ) + # We expect that the periodic not fails as bug 1953359 is fixed. + log = self.stdlog.logger.output + self.assertIn('Running periodic for compute (compute2)', log) + self.assertNotIn('Error updating resources for node compute2', log) + self.assertNotIn( + 'nova.exception.CPUPinningInvalid: CPU set to pin [0] must be ' + 'a subset of free CPU set [1]', + log, + ) + + # now let the resize finishes + return orig_finish_resize(*args, **kwargs) + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}'): + with mock.patch( + 'nova.compute.manager.ComputeManager.finish_resize', + new=fake_finish_resize, + ): + post = {'migrate': None} + # this is expected to succeed + self.admin_api.post_server_action(server['id'], post) + + server = self._wait_for_state_change(server, 'VERIFY_RESIZE') + + # As bug 1953359 is fixed the revert should succeed too + post = {'revertResize': {}} + self.admin_api.post_server_action(server['id'], post) + self._wait_for_state_change(server, 'ACTIVE') + self.assertNotIn( + 'nova.exception.CPUUnpinningInvalid: CPU set to unpin [1] must be ' + 'a subset of pinned CPU set [0]', + self.stdlog.logger.output, + ) + class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest): diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py new file mode 100644 index 0000000000..16b8209a85 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -0,0 +1,200 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from nova import context +from nova import objects +from nova.tests.functional import integrated_helpers + + +class NodeRebalanceDeletedComputeNodeRaceTestCase( + integrated_helpers.ProviderUsageBaseTestCase, +): + """Regression test for bug 1853009 observed in Rocky & later. + + When an ironic node re-balances from one host to another, there can be a + race where the old host deletes the orphan compute node after the new host + has taken ownership of it which results in the new host failing to create + the compute node and resource provider because the ResourceTracker does not + detect a change. + """ + # Make sure we're using the fake driver that has predictable uuids + # for each node. + compute_driver = 'fake.PredictableNodeUUIDDriver' + + def _assert_hypervisor_api(self, nodename, expected_host): + # We should have one compute node shown by the API. + hypervisors = self.api.api_get( + '/os-hypervisors/detail' + ).body['hypervisors'] + self.assertEqual(1, len(hypervisors), hypervisors) + hypervisor = hypervisors[0] + self.assertEqual(nodename, hypervisor['hypervisor_hostname']) + self.assertEqual(expected_host, hypervisor['service']['host']) + + def _start_compute(self, host): + host = self.start_service('compute', host) + # Ironic compute driver has rebalances_nodes = True. + host.manager.driver.rebalances_nodes = True + return host + + def setUp(self): + super(NodeRebalanceDeletedComputeNodeRaceTestCase, self).setUp() + + self.nodename = 'fake-node' + self.ctxt = context.get_admin_context() + + def test_node_rebalance_deleted_compute_node_race(self): + # Simulate a service running and then stopping. host_a runs, creates + # fake-node, then is stopped. The fake-node compute node is destroyed. + # This leaves a soft-deleted node in the DB. + host_a = self._start_compute('host_a') + host_a.manager.driver._set_nodes([self.nodename]) + host_a.manager.update_available_resource(self.ctxt) + host_a.stop() + cn = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_a', self.nodename, + ) + cn.destroy() + + self.assertEqual(0, len(objects.ComputeNodeList.get_all(self.ctxt))) + + # Now we create a new compute service to manage our node. + host_b = self._start_compute('host_b') + host_b.manager.driver._set_nodes([self.nodename]) + + # When start_service runs, it will create a host_b ComputeNode. We want + # to delete that and inject our fake node into the driver which will + # be re-balanced to another host later. First assert this actually + # exists. + self._assert_hypervisor_api('host_b', expected_host='host_b') + + # Now run the update_available_resource periodic to register fake-node + # and have it managed by host_b. This will also detect the "host_b" + # node as orphaned and delete it along with its resource provider. + + # host_b[1]: Finds no compute record in RT. Tries to create one + # (_init_compute_node). + # FIXME(mgoddard): This shows a traceback with SQL rollback due to + # soft-deleted node. The create seems to succeed but breaks the RT + # update for this node. See + # https://bugs.launchpad.net/nova/+bug/1853159. + host_b.manager.update_available_resource(self.ctxt) + self._assert_hypervisor_api(self.nodename, expected_host='host_b') + # There should only be one resource provider (fake-node). + original_rps = self._get_all_providers() + self.assertEqual(1, len(original_rps), original_rps) + self.assertEqual(self.nodename, original_rps[0]['name']) + + # Simulate a re-balance by restarting host_a and make it manage + # fake-node. At this point both host_b and host_a think they own + # fake-node. + host_a = self._start_compute('host_a') + host_a.manager.driver._set_nodes([self.nodename]) + + # host_a[1]: Finds no compute record in RT, 'moves' existing node from + # host_b + host_a.manager.update_available_resource(self.ctxt) + # Assert that fake-node was re-balanced from host_b to host_a. + self.assertIn( + 'ComputeNode fake-node moving from host_b to host_a', + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, expected_host='host_a') + + # host_a[2]: Begins periodic update, queries compute nodes for this + # host, finds the fake-node. + cn = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_a', self.nodename, + ) + + # host_b[2]: Finds no compute record in RT, 'moves' existing node from + # host_a + host_b.manager.update_available_resource(self.ctxt) + # Assert that fake-node was re-balanced from host_a to host_b. + self.assertIn( + 'ComputeNode fake-node moving from host_a to host_b', + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, expected_host='host_b') + + # Complete rebalance, as host_a realises it does not own fake-node. + host_a.manager.driver._set_nodes([]) + + # host_a[2]: Deletes orphan compute node. + # Mock out the compute node query to simulate a race condition where + # the list includes an orphan compute node that is newly owned by + # host_b by the time host_a attempts to delete it. + # FIXME(mgoddard): Ideally host_a would not delete a node that does not + # belong to it. See https://bugs.launchpad.net/nova/+bug/1853009. + with mock.patch( + 'nova.compute.manager.ComputeManager._get_compute_nodes_in_db' + ) as mock_get: + mock_get.return_value = [cn] + host_a.manager.update_available_resource(self.ctxt) + + # Verify that the node was deleted. + self.assertIn( + 'Deleting orphan compute node %s hypervisor host ' + 'is fake-node, nodes are' % cn.id, + self.stdlog.logger.output) + hypervisors = self.api.api_get( + '/os-hypervisors/detail').body['hypervisors'] + self.assertEqual(0, len(hypervisors), hypervisors) + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # host_b[3]: Should recreate compute node and resource provider. + # FIXME(mgoddard): Resource provider not recreated here, because it + # exists in the provider tree. See + # https://bugs.launchpad.net/nova/+bug/1841481. + host_b.manager.update_available_resource(self.ctxt) + + # Verify that the node was recreated. + self._assert_hypervisor_api(self.nodename, 'host_b') + + # But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute + # node is not cached in the RT. + self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes) + + # There is no RP. + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # But the RP exists in the provider tree. + self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( + self.nodename)) + + # host_b[1]: Should add compute node to RT cache and recreate resource + # provider. + # FIXME(mgoddard): Resource provider not recreated here, because it + # exists in the provider tree. See + # https://bugs.launchpad.net/nova/+bug/1841481. + host_b.manager.update_available_resource(self.ctxt) + + # Verify that the node still exists. + self._assert_hypervisor_api(self.nodename, 'host_b') + + # And it is now in the RT cache. + self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) + + # There is still no RP. + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # But the RP it exists in the provider tree. + self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( + self.nodename)) + + # This fails due to the lack of a resource provider. + self.assertIn( + 'Skipping removal of allocations for deleted instances', + self.stdlog.logger.output) diff --git a/nova/tests/unit/api/openstack/compute/test_servers.py b/nova/tests/unit/api/openstack/compute/test_servers.py index b384c73f36..2f6940702f 100644 --- a/nova/tests/unit/api/openstack/compute/test_servers.py +++ b/nova/tests/unit/api/openstack/compute/test_servers.py @@ -4563,21 +4563,144 @@ class ServersControllerCreateTest(test.TestCase): self.assertRaises(exception.ValidationError, self.controller.create, req, body=body) - def test_create_az_with_leading_trailing_spaces(self): + def test_create_instance_az_with_leading_trailing_spaces(self): self.body['server']['availability_zone'] = ' zone1 ' self.req.body = jsonutils.dump_as_bytes(self.body) self.assertRaises(exception.ValidationError, self.controller.create, self.req, body=self.body) - def test_create_az_with_leading_trailing_spaces_in_compat_mode( - self): + def test_create_instance_az_with_leading_trailing_spaces_compat_mode(self): self.body['server']['name'] = ' abc def ' - self.body['server']['availability_zones'] = ' zone1 ' + self.body['server']['availability_zone'] = ' zone1 ' self.req.body = jsonutils.dump_as_bytes(self.body) self.req.set_legacy_v2() - with mock.patch.object(availability_zones, 'get_availability_zones', - return_value=[' zone1 ']): + with mock.patch.object( + availability_zones, 'get_availability_zones', + return_value=[' zone1 '], + ) as mock_get_azs: self.controller.create(self.req, body=self.body) + mock_get_azs.assert_called_once() + + def test_create_instance_invalid_az(self): + self.body['server']['availability_zone'] = 'zone1' + self.req.body = jsonutils.dump_as_bytes(self.body) + with mock.patch.object( + availability_zones, 'get_availability_zones', + return_value=['zone2'], + ) as mock_get_azs: + self.assertRaises( + webob.exc.HTTPBadRequest, + self.controller.create, + self.req, body=self.body) + mock_get_azs.assert_called_once() + + @mock.patch.object(objects.AggregateList, 'get_by_host') + @mock.patch.object(servers, 'LOG') + def test_create_instance_az_host(self, mock_log, mock_get_host_aggs): + """Ensure we handle az:host format for 'availability_zone'.""" + mock_get_host_aggs.return_value = objects.AggregateList( + objects=[ + objects.Aggregate(metadata={'availability_zone': 'zone1'}), + ], + ) + + self.body['server']['availability_zone'] = 'zone1:host1' + self.req.body = jsonutils.dump_as_bytes(self.body) + + self.controller.create(self.req, body=self.body) + + mock_get_host_aggs.assert_called_once() + mock_log.warning.assert_not_called() + + @mock.patch.object(objects.AggregateList, 'get_by_host') + @mock.patch.object(servers, 'LOG') + def test_create_instance_az_host_mismatch_without_aggs( + self, mock_log, mock_get_host_aggs, + ): + """User requests an AZ but the host doesn't have one""" + mock_get_host_aggs.return_value = objects.AggregateList(objects=[]) + + self.body['server']['availability_zone'] = 'zone1:host1' + self.req.body = jsonutils.dump_as_bytes(self.body) + + self.controller.create(self.req, body=self.body) + + mock_get_host_aggs.assert_called_once() + # we should see a log since the host doesn't belong to the requested AZ + self.assertIn('bug #1934770', mock_log.warning.call_args[0][0]) + + @mock.patch.object(objects.AggregateList, 'get_by_host') + @mock.patch.object(servers, 'LOG') + def test_create_instance_az_host_mismatch_without_aggs_in_default_az( + self, mock_log, mock_get_host_aggs, + ): + """User requests the default AZ and host isn't in any explicit AZ""" + self.flags(default_availability_zone='zone1') + mock_get_host_aggs.return_value = objects.AggregateList(objects=[]) + + self.body['server']['availability_zone'] = 'zone1:host1' + self.req.body = jsonutils.dump_as_bytes(self.body) + + self.controller.create(self.req, body=self.body) + + mock_get_host_aggs.assert_called_once() + # we shouldn't see a log since the host is not in any aggregate and + # therefore is in the default AZ + mock_log.warning.assert_not_called() + + @mock.patch.object(objects.AggregateList, 'get_by_host') + @mock.patch.object(servers, 'LOG') + def test_create_instance_az_host_mismatch_with_aggs( + self, mock_log, mock_get_host_aggs, + ): + """User requests an AZ but the host has a different one.""" + mock_get_host_aggs.return_value = objects.AggregateList( + objects=[ + objects.Aggregate(metadata={'availability_zone': 'zone2'}), + ], + ) + + self.body['server']['availability_zone'] = 'zone1:host1' + self.req.body = jsonutils.dump_as_bytes(self.body) + + self.controller.create(self.req, body=self.body) + + mock_get_host_aggs.assert_called_once() + # we should see a log since the host belongs to a different AZ + self.assertIn('bug #1934770', mock_log.warning.call_args[0][0]) + + @mock.patch.object(objects.AggregateList, 'get_by_host') + @mock.patch.object(servers, 'LOG') + def test_create_instance_az_host_mismatch_with_aggs_in_default_az( + self, mock_log, mock_get_host_aggs, + ): + """User requests the default AZ and host is in aggregates without AZ""" + self.flags(default_availability_zone='zone1') + mock_get_host_aggs.return_value = objects.AggregateList( + objects=[objects.Aggregate(metadata={})], + ) + + self.body['server']['availability_zone'] = 'zone1:host1' + self.req.body = jsonutils.dump_as_bytes(self.body) + + self.controller.create(self.req, body=self.body) + + mock_get_host_aggs.assert_called_once() + # we shouldn't see a log since none of the host aggregates have an + # explicit AZ set and the host is therefore in the default AZ + mock_log.warning.assert_not_called() + + def test_create_instance_invalid_az_format(self): + self.body['server']['availability_zone'] = 'invalid::::zone' + self.assertRaises(webob.exc.HTTPBadRequest, + self.controller.create, + self.req, body=self.body) + + def test_create_instance_invalid_az_as_int(self): + self.body['server']['availability_zone'] = 123 + self.assertRaises(exception.ValidationError, + self.controller.create, + self.req, body=self.body) def test_create_instance(self): self.stub_out('uuid.uuid4', lambda: FAKE_UUID) @@ -6181,18 +6304,6 @@ class ServersControllerCreateTest(test.TestCase): self.controller.create, self.req, body=self.body) - def test_create_instance_invalid_availability_zone(self): - self.body['server']['availability_zone'] = 'invalid::::zone' - self.assertRaises(webob.exc.HTTPBadRequest, - self.controller.create, - self.req, body=self.body) - - def test_create_instance_invalid_availability_zone_as_int(self): - self.body['server']['availability_zone'] = 123 - self.assertRaises(exception.ValidationError, - self.controller.create, - self.req, body=self.body) - @mock.patch.object(compute_api.API, 'create', side_effect=exception.FixedIpNotFoundForAddress( address='dummy')) diff --git a/nova/tests/unit/api/openstack/compute/test_services.py b/nova/tests/unit/api/openstack/compute/test_services.py index 8d77ad0d9f..7c330bf61b 100644 --- a/nova/tests/unit/api/openstack/compute/test_services.py +++ b/nova/tests/unit/api/openstack/compute/test_services.py @@ -700,6 +700,21 @@ class ServicesTestV21(test.TestCase): mock_get_compute_nodes.assert_called_once_with( self.req.environ['nova.context'], compute.host) + @mock.patch( + 'nova.objects.ComputeNodeList.get_all_by_host', + side_effect=exception.ComputeHostNotFound(host='fake-compute-host')) + def test_services_delete_compute_host_not_found( + self, mock_get_all_by_host): + compute = objects.Service(self.ctxt, + **{'host': 'fake-compute-host', + 'binary': 'nova-compute', + 'topic': 'compute', + 'report_count': 0}) + compute.create() + self.controller.delete(self.req, compute.id) + mock_get_all_by_host.assert_called_with( + self.req.environ['nova.context'], 'fake-compute-host') + def test_services_delete_not_found(self): self.assertRaises(webob.exc.HTTPNotFound, diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index d4ec339d91..6dc9f4f83d 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -374,18 +374,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, ) # First node in set should have been removed from DB + # Last node in set should have been added to DB. for db_node in db_nodes: if db_node.hypervisor_hostname == 'node1': db_node.destroy.assert_called_once_with() rc_mock.delete_resource_provider.assert_called_once_with( self.context, db_node, cascade=True) - mock_rt.remove_node.assert_called_once_with( - 'node1') + mock_rt.remove_node.assert_called_once_with('node1') mock_log.error.assert_called_once_with( "Failed to delete compute node resource provider for " "compute node %s: %s", db_node.uuid, mock.ANY) else: self.assertFalse(db_node.destroy.called) + self.assertEqual(1, mock_rt.remove_node.call_count) + mock_rt.clean_compute_node_cache.assert_called_once_with(db_nodes) @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'delete_resource_provider') diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index d47e92ffb1..02354032cc 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -4178,3 +4178,20 @@ class ProviderConfigTestCases(BaseTestCase): mock_log.warning.assert_called_once_with(*expected_log_call) self.assertIn(uuids.unknown, self.rt.absent_providers) self.assertEqual(result, []) + + +class TestCleanComputeNodeCache(BaseTestCase): + + def setUp(self): + super(TestCleanComputeNodeCache, self).setUp() + self._setup_rt() + self.context = context.RequestContext( + mock.sentinel.user_id, mock.sentinel.project_id) + + @mock.patch.object(resource_tracker.ResourceTracker, "remove_node") + def test_clean_compute_node_cache(self, mock_remove): + invalid_nodename = "invalid-node" + self.rt.compute_nodes[_NODENAME] = self.compute + self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute + self.rt.clean_compute_node_cache([self.compute]) + mock_remove.assert_called_once_with(invalid_nodename) diff --git a/nova/tests/unit/console/test_websocketproxy.py b/nova/tests/unit/console/test_websocketproxy.py index 58fe841db2..abe24af75f 100644 --- a/nova/tests/unit/console/test_websocketproxy.py +++ b/nova/tests/unit/console/test_websocketproxy.py @@ -15,6 +15,7 @@ """Tests for nova websocketproxy.""" import copy +import io import socket import mock @@ -625,6 +626,38 @@ class NovaProxyRequestHandlerTestCase(test.NoDBTestCase): b'' ] + client_addr = ('8.8.8.8', 54321) + mock_server = mock.MagicMock() + # This specifies that the server will be able to handle requests other + # than only websockets. + mock_server.only_upgrade = False + + # Constructing a handler will process the mock_req request passed in. + handler = websocketproxy.NovaProxyRequestHandler( + mock_req, client_addr, mock_server) + + # Collect the response data to verify at the end. The + # SimpleHTTPRequestHandler writes the response data to a 'wfile' + # attribute. + output = io.BytesIO() + handler.wfile = output + # Process the mock_req again to do the capture. + handler.do_GET() + output.seek(0) + result = output.readlines() + + # Verify no redirect happens and instead a 400 Bad Request is returned. + self.assertIn('400 URI must not start with //', result[0].decode()) + + def test_reject_open_redirect_3_slashes(self): + # This will test the behavior when an attempt is made to cause an open + # redirect. It should be rejected. + mock_req = mock.MagicMock() + mock_req.makefile().readline.side_effect = [ + b'GET ///example.com/%2F.. HTTP/1.1\r\n', + b'' + ] + # Collect the response data to verify at the end. The # SimpleHTTPRequestHandler writes the response data by calling the # request socket sendall() method. diff --git a/nova/tests/unit/fake_request_spec.py b/nova/tests/unit/fake_request_spec.py index c5fac666ee..b8049f7f38 100644 --- a/nova/tests/unit/fake_request_spec.py +++ b/nova/tests/unit/fake_request_spec.py @@ -57,14 +57,15 @@ PCI_REQUESTS = objects.InstancePCIRequests( PCI_REQUESTS.obj_reset_changes(recursive=True) -def fake_db_spec(): - req_obj = fake_spec_obj() +def fake_db_spec(spec_obj=None): + if not spec_obj: + spec_obj = fake_spec_obj() # NOTE(takashin): There is not 'retry' information in the DB table. - del req_obj.retry + del spec_obj.retry db_request_spec = { 'id': 1, - 'instance_uuid': req_obj.instance_uuid, - 'spec': jsonutils.dumps(req_obj.obj_to_primitive()), + 'instance_uuid': spec_obj.instance_uuid, + 'spec': jsonutils.dumps(spec_obj.obj_to_primitive()), } return db_request_spec diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py index f31fa832e7..2973db369d 100644 --- a/nova/tests/unit/objects/test_request_spec.py +++ b/nova/tests/unit/objects/test_request_spec.py @@ -611,6 +611,44 @@ class _TestRequestSpecObject(object): self.assertIsInstance(req_obj.instance_group, objects.InstanceGroup) self.assertEqual('fresh', req_obj.instance_group.name) + @mock.patch('nova.objects.request_spec.RequestSpec.save') + @mock.patch.object( + request_spec.RequestSpec, '_get_by_instance_uuid_from_db') + @mock.patch('nova.objects.InstanceGroup.get_by_uuid') + def test_get_by_instance_uuid_numa_topology_migration( + self, mock_get_ig, get_by_uuid, mock_save + ): + # Simulate a pre-Victoria RequestSpec where the pcpuset field is not + # defined for the embedded InstanceNUMACell objects but the cpu_policy + # is dedicated meaning that cores in cpuset defines pinned cpus. So + # in Victoria or later these InstanceNUMACell objects should be + # translated to hold the cores in the pcpuset field instead. + numa_topology = objects.InstanceNUMATopology( + instance_uuid=uuids.instance_uuid, + cells=[ + objects.InstanceNUMACell( + id=0, cpuset={1, 2}, memory=512, cpu_policy="dedicated"), + objects.InstanceNUMACell( + id=1, cpuset={3, 4}, memory=512, cpu_policy="dedicated"), + ] + ) + spec_obj = fake_request_spec.fake_spec_obj() + spec_obj.numa_topology = numa_topology + fake_spec = fake_request_spec.fake_db_spec(spec_obj) + fake_spec['instance_uuid'] = uuids.instance_uuid + + get_by_uuid.return_value = fake_spec + mock_get_ig.return_value = objects.InstanceGroup(name='fresh') + + req_obj = request_spec.RequestSpec.get_by_instance_uuid( + self.context, fake_spec['instance_uuid']) + + self.assertEqual(2, len(req_obj.numa_topology.cells)) + self.assertEqual({1, 2}, req_obj.numa_topology.cells[0].pcpuset) + self.assertEqual({3, 4}, req_obj.numa_topology.cells[1].pcpuset) + + mock_save.assert_called_once() + def _check_update_primitive(self, req_obj, changes): self.assertEqual(req_obj.instance_uuid, changes['instance_uuid']) serialized_obj = objects.RequestSpec.obj_from_primitive( diff --git a/nova/tests/unit/policies/test_servers.py b/nova/tests/unit/policies/test_servers.py index 400201de81..57a130ac69 100644 --- a/nova/tests/unit/policies/test_servers.py +++ b/nova/tests/unit/policies/test_servers.py @@ -420,6 +420,9 @@ class ServersPolicyTest(base.BasePolicyTest): @mock.patch('nova.compute.api.API.create') @mock.patch('nova.compute.api.API.parse_availability_zone') + @mock.patch.object( + servers.ServersController, '_validate_host_availability_zone', + new=mock.Mock(return_value=None)) def test_create_forced_host_server_policy(self, mock_az, mock_create): # 'create' policy is checked before 'create:forced_host' so # we have to allow it for everyone otherwise it will diff --git a/nova/tests/unit/privsep/test_fs.py b/nova/tests/unit/privsep/test_fs.py index 0146469b17..89062acce9 100644 --- a/nova/tests/unit/privsep/test_fs.py +++ b/nova/tests/unit/privsep/test_fs.py @@ -65,13 +65,15 @@ class PrivsepFilesystemHelpersTestCase(test.NoDBTestCase): nova.privsep.fs.vginfo('vg') mock_execute.assert_called_with('vgs', '--noheadings', '--nosuffix', '--separator', '|', '--units', 'b', - '-o', 'vg_size,vg_free', 'vg') + '-o', 'vg_size,vg_free', 'vg', + attempts=3, delay_on_retry=True) @mock.patch('oslo_concurrency.processutils.execute') def test_lvlist(self, mock_execute): nova.privsep.fs.lvlist('vg') mock_execute.assert_called_with('lvs', '--noheadings', '-o', - 'lv_name', 'vg') + 'lv_name', 'vg', + attempts=3, delay_on_retry=True) @mock.patch('oslo_concurrency.processutils.execute') def test_lvinfo(self, mock_execute): diff --git a/nova/tests/unit/storage/test_rbd.py b/nova/tests/unit/storage/test_rbd.py index 65796ebc1f..396f22c643 100644 --- a/nova/tests/unit/storage/test_rbd.py +++ b/nova/tests/unit/storage/test_rbd.py @@ -119,13 +119,15 @@ class RbdTestCase(test.NoDBTestCase): rados_patcher = mock.patch.object(rbd_utils, 'rados') self.mock_rados = rados_patcher.start() self.addCleanup(rados_patcher.stop) - self.mock_rados.Rados = mock.Mock - self.mock_rados.Rados.ioctx = mock.Mock() - self.mock_rados.Rados.connect = mock.Mock() - self.mock_rados.Rados.shutdown = mock.Mock() - self.mock_rados.Rados.open_ioctx = mock.Mock() - self.mock_rados.Rados.open_ioctx.return_value = \ - self.mock_rados.Rados.ioctx + self.mock_rados.Rados = mock.Mock() + self.rados_inst = mock.Mock() + self.mock_rados.Rados.return_value = self.rados_inst + self.rados_inst.ioctx = mock.Mock() + self.rados_inst.connect = mock.Mock() + self.rados_inst.shutdown = mock.Mock() + self.rados_inst.open_ioctx = mock.Mock() + self.rados_inst.open_ioctx.return_value = \ + self.rados_inst.ioctx self.mock_rados.Error = Exception rbd_patcher = mock.patch.object(rbd_utils, 'rbd') @@ -339,33 +341,31 @@ class RbdTestCase(test.NoDBTestCase): def test_connect_to_rados_default(self): ret = self.driver._connect_to_rados() - self.mock_rados.Rados.connect.assert_called_once_with( + self.rados_inst.connect.assert_called_once_with( timeout=self.rbd_connect_timeout) - self.assertTrue(self.mock_rados.Rados.open_ioctx.called) - self.assertIsInstance(ret[0], self.mock_rados.Rados) - self.assertEqual(self.mock_rados.Rados.ioctx, ret[1]) - self.mock_rados.Rados.open_ioctx.assert_called_with(self.rbd_pool) + self.assertTrue(self.rados_inst.open_ioctx.called) + self.assertEqual(self.rados_inst.ioctx, ret[1]) + self.rados_inst.open_ioctx.assert_called_with(self.rbd_pool) def test_connect_to_rados_different_pool(self): ret = self.driver._connect_to_rados('alt_pool') - self.mock_rados.Rados.connect.assert_called_once_with( + self.rados_inst.connect.assert_called_once_with( timeout=self.rbd_connect_timeout) - self.assertTrue(self.mock_rados.Rados.open_ioctx.called) - self.assertIsInstance(ret[0], self.mock_rados.Rados) - self.assertEqual(self.mock_rados.Rados.ioctx, ret[1]) - self.mock_rados.Rados.open_ioctx.assert_called_with('alt_pool') + self.assertTrue(self.rados_inst.open_ioctx.called) + self.assertEqual(self.rados_inst.ioctx, ret[1]) + self.rados_inst.open_ioctx.assert_called_with('alt_pool') def test_connect_to_rados_error(self): - self.mock_rados.Rados.open_ioctx.side_effect = self.mock_rados.Error + self.rados_inst.open_ioctx.side_effect = self.mock_rados.Error self.assertRaises(self.mock_rados.Error, self.driver._connect_to_rados) - self.mock_rados.Rados.open_ioctx.assert_called_once_with( + self.rados_inst.open_ioctx.assert_called_once_with( self.rbd_pool) - self.mock_rados.Rados.shutdown.assert_called_once_with() + self.rados_inst.shutdown.assert_called_once_with() def test_connect_to_rados_unicode_arg(self): self.driver._connect_to_rados(u'unicode_pool') - self.mock_rados.Rados.open_ioctx.assert_called_with( + self.rados_inst.open_ioctx.assert_called_with( test.MatchType(str)) def test_ceph_args_none(self): diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py index bf9eb1994e..17008e7600 100644 --- a/nova/tests/unit/virt/libvirt/test_config.py +++ b/nova/tests/unit/virt/libvirt/test_config.py @@ -1604,6 +1604,24 @@ class LibvirtConfigGuestHostdevPCI(LibvirtConfigBaseTest): self.assertEqual(obj.mode, 'subsystem') self.assertEqual(obj.type, 'usb') + def test_config_alias_parse(self): + xml = """ + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver name='vfio'/> + <source> + <address domain='0x0000' bus='0x81' slot='0x00' + function='0x1'/> + </source> + <alias name='hostdev1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x05' + function='0x0'/> + </hostdev>""" + + xmldoc = etree.fromstring(xml) + obj = config.LibvirtConfigGuestHostdevPCI() + obj.parse_dom(xmldoc) + self.assertEqual('hostdev1', obj.alias) + class LibvirtConfigGuestHostdevMDEV(LibvirtConfigBaseTest): diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 345f9ca7fb..e095ab32f0 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -4909,6 +4909,33 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertTrue(cfg.features[2].vapic) self.assertEqual(hvid_hidden, cfg.features[2].vendorid_spoof) + @mock.patch.object(host.Host, 'has_min_version', + new=mock.Mock(return_value=True)) + def test_get_guest_config_apic_workaround(self): + self.flags(virt_type='qemu', group='libvirt') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + instance_ref = objects.Instance(**self.test_instance) + image_meta = objects.ImageMeta.from_dict(self.test_image_meta) + disk_info = blockinfo.get_disk_info( + CONF.libvirt.virt_type, instance_ref, image_meta) + + cfg = drvr._get_guest_config( + instance_ref, _fake_network_info(self), image_meta, disk_info) + + self.assertEqual(2, len(cfg.features)) + self.assertIsInstance( + cfg.features[0], vconfig.LibvirtConfigGuestFeatureACPI) + self.assertIsInstance( + cfg.features[1], vconfig.LibvirtConfigGuestFeatureAPIC) + + self.flags(libvirt_disable_apic=True, group='workarounds') + cfg = drvr._get_guest_config( + instance_ref, _fake_network_info(self), image_meta, disk_info) + + self.assertEqual(1, len(cfg.features)) + self.assertIsInstance( + cfg.features[0], vconfig.LibvirtConfigGuestFeatureACPI) + def test_get_guest_config_windows_hyperv_feature2(self): self._test_get_guest_config_windows_hyperv() @@ -9084,6 +9111,19 @@ class LibvirtConnTestCase(test.NoDBTestCase, uuids.volume_id) mock_encryptor.detach_volume.assert_not_called() + # assert that no attempt to remove the secret is made when + # destroy_secrets=False + drvr._host.find_secret.reset_mock() + drvr._host.delete_secret.reset_mock() + drvr._disconnect_volume( + self.context, + connection_info, + instance, + encryption=encryption, + destroy_secrets=False + ) + drvr._host.delete_secret.assert_not_called() + # assert that the encryptor is used if no secret is found drvr._host.find_secret.reset_mock() drvr._host.delete_secret.reset_mock() @@ -10147,6 +10187,36 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_find_secret.assert_called_once_with('volume', uuids.volume_id) mock_get_encryptor.assert_not_called() + @mock.patch('nova.virt.libvirt.host.Host.delete_secret') + @mock.patch('nova.virt.libvirt.host.Host.find_secret', new=mock.Mock()) + def test_detach_encryptor_skip_secret_removal(self, mock_delete_secret): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._detach_encryptor( + self.context, + { + 'data': { + 'volume_id': uuids.volume_id + } + }, + None, + destroy_secrets=False + ) + # Assert that no attempt is made to delete the volume secert + mock_delete_secret.assert_not_called() + + drvr._detach_encryptor( + self.context, + { + 'data': { + 'volume_id': uuids.volume_id + } + }, + None, + destroy_secrets=True + ) + # Assert that volume secert is deleted + mock_delete_secret.assert_called_once_with('volume', uuids.volume_id) + def test_allow_native_luksv1(self): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.assertFalse(drvr._allow_native_luksv1({})) @@ -15793,7 +15863,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_domain_destroy.assert_called_once_with() mock_teardown_container.assert_called_once_with(instance) mock_cleanup.assert_called_once_with(self.context, instance, - network_info, None, False) + network_info, None, False, + destroy_secrets=True) @mock.patch.object(libvirt_driver.LibvirtDriver, 'cleanup') @mock.patch.object(libvirt_driver.LibvirtDriver, '_teardown_container') @@ -15813,7 +15884,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock.call(instance)]) mock_teardown_container.assert_called_once_with(instance) mock_cleanup.assert_called_once_with(self.context, instance, - network_info, None, False) + network_info, None, False, + destroy_secrets=True) @mock.patch.object(host.Host, 'get_guest') def test_reboot_different_ids(self, mock_get): @@ -16034,7 +16106,8 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_get_mdev.assert_called_once_with(instance) mock_destroy.assert_called_once_with(self.context, instance, network_info, destroy_disks=False, - block_device_info=block_device_info) + block_device_info=block_device_info, + destroy_secrets=False) mock_get_guest_xml.assert_called_once_with(self.context, instance, network_info, mock.ANY, mock.ANY, @@ -16042,7 +16115,48 @@ class LibvirtConnTestCase(test.NoDBTestCase, accel_info=accel_info) mock_create_guest_with_network.assert_called_once_with(self.context, dummyxml, instance, network_info, block_device_info, - vifs_already_plugged=True) + vifs_already_plugged=True, external_events=[]) + + @mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock()) + @mock.patch('nova.virt.libvirt.LibvirtDriver.get_info') + @mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network') + @mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml') + @mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock()) + @mock.patch( + 'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices', + new=mock.Mock(return_value={})) + def test_hard_reboot_wait_for_plug( + self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info + ): + self.flags( + group="workarounds", + wait_for_vif_plugged_event_during_hard_reboot=["normal"]) + self.context.auth_token = None + instance = objects.Instance(**self.test_instance) + network_info = _fake_network_info(self, num_networks=4) + network_info[0]["vnic_type"] = "normal" + network_info[1]["vnic_type"] = "direct" + network_info[2]["vnic_type"] = "normal" + network_info[3]["vnic_type"] = "direct-physical" + block_device_info = None + return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN), + hardware.InstanceInfo(state=power_state.RUNNING)] + mock_get_info.side_effect = return_values + mock_get_guest_xml.return_value = mock.sentinel.xml + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._hard_reboot( + self.context, instance, network_info, block_device_info) + + mock_create_guest_with_network.assert_called_once_with( + self.context, mock.sentinel.xml, instance, network_info, + block_device_info, + vifs_already_plugged=False, + external_events=[ + ('network-vif-plugged', uuids.vif1), + ('network-vif-plugged', uuids.vif3), + ] + ) @mock.patch('oslo_utils.fileutils.ensure_tree') @mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall') @@ -19321,6 +19435,59 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertTrue(instance.cleaned) save.assert_called_once_with() + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._disconnect_volume') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._undefine_domain', + new=mock.Mock()) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vpmems', + new=mock.Mock(return_value=None)) + def test_cleanup_destroy_secrets(self, mock_disconnect_volume): + block_device_info = { + 'block_device_mapping': [ + { + 'connection_info': mock.sentinel.connection_info + } + ] + } + instance = objects.Instance(self.context, **self.test_instance) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) + + # Pass destroy_vifs=False and destroy_disks=False as we only care about + # asserting the behaviour of destroy_secrets in this test. + drvr.cleanup( + self.context, + instance, + network_info={}, + block_device_info=block_device_info, + destroy_vifs=False, + destroy_disks=False, + destroy_secrets=False + ) + drvr.cleanup( + self.context, + instance, + network_info={}, + block_device_info=block_device_info, + destroy_vifs=False, + destroy_disks=False, + ) + + # Assert that disconnect_volume is called with destroy_secrets=False + # and destroy_secrets=True by default + mock_disconnect_volume.assert_has_calls([ + mock.call( + self.context, + mock.sentinel.connection_info, + instance, + destroy_secrets=False + ), + mock.call( + self.context, + mock.sentinel.connection_info, + instance, + destroy_secrets=True + ) + ]) + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_volume_encryption') @mock.patch.object(libvirt_driver.LibvirtDriver, '_allow_native_luksv1') def test_swap_volume_native_luks_blocked(self, mock_allow_native_luksv1, @@ -23334,15 +23501,23 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): # check that the internal event handling is cleaned up self.assertEqual(set(), drvr._device_event_handler._waiters) - @ddt.data(power_state.RUNNING, power_state.PAUSED) - def test__detach_with_retry_live_success(self, state): + @ddt.unpack + @ddt.data( + (power_state.RUNNING, vconfig.LibvirtConfigGuestDisk()), + (power_state.RUNNING, vconfig.LibvirtConfigGuestInterface()), + (power_state.RUNNING, vconfig.LibvirtConfigGuestHostdevPCI()), + (power_state.PAUSED, vconfig.LibvirtConfigGuestDisk()), + (power_state.PAUSED, vconfig.LibvirtConfigGuestInterface()), + (power_state.PAUSED, vconfig.LibvirtConfigGuestHostdevPCI()), + ) + def test__detach_with_retry_live_success(self, state, device_class): """Test detach only from the live domain""" drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) mock_guest = mock.Mock(spec=libvirt_guest.Guest) mock_guest.get_power_state.return_value = state mock_guest.has_persistent_configuration.return_value = False - mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk) + mock_dev = mock.Mock(spec_set=device_class) mock_dev.alias = 'virtio-disk1' mock_get_device_conf_func = mock.Mock( diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py index 901c014e11..04a28f67a0 100644 --- a/nova/tests/unit/virt/libvirt/test_migration.py +++ b/nova/tests/unit/virt/libvirt/test_migration.py @@ -948,7 +948,48 @@ class UtilityMigrationTestCase(test.NoDBTestCase): doc = etree.fromstring(original_xml) ex = self.assertRaises(KeyError, migration._update_vif_xml, doc, data, get_vif_config) - self.assertIn("CA:FE:DE:AD:BE:EF", str(ex)) + self.assertIn("ca:fe:de:ad:be:ef", str(ex)) + + def test_update_vif_xml_lower_case_mac(self): + """Tests that the vif in the migrate data is not found in the existing + guest interfaces. + """ + conf = vconfig.LibvirtConfigGuestInterface() + conf.net_type = "bridge" + conf.source_dev = "qbra188171c-ea" + conf.target_dev = "tapa188171c-ea" + conf.mac_addr = "DE:AD:BE:EF:CA:FE" + conf.model = "virtio" + original_xml = """<domain> + <uuid>3de6550a-8596-4937-8046-9d862036bca5</uuid> + <devices> + <interface type="bridge"> + <mac address="de:ad:be:ef:ca:fe"/> + <model type="virtio"/> + <source bridge="qbra188171c-ea"/> + <target dev="tapa188171c-ea"/> + <virtualport type="openvswitch"> + <parameters interfaceid="%s"/> + </virtualport> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' + function='0x0'/> + </interface> + </devices> + </domain>""" % uuids.ovs + expected_xml = """<domain> + <uuid>3de6550a-8596-4937-8046-9d862036bca5</uuid> + <devices> + <interface type="bridge"> + <mac address="DE:AD:BE:EF:CA:FE"/> + <model type="virtio"/> + <source bridge="qbra188171c-ea"/> + <target dev="tapa188171c-ea"/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' + function='0x0'/> + </interface> + </devices> + </domain>""" + self._test_update_vif_xml(conf, original_xml, expected_xml) class MigrationMonitorTestCase(test.NoDBTestCase): diff --git a/nova/tests/unit/virt/libvirt/test_vif.py b/nova/tests/unit/virt/libvirt/test_vif.py index 12d11c040b..5e638bdd76 100644 --- a/nova/tests/unit/virt/libvirt/test_vif.py +++ b/nova/tests/unit/virt/libvirt/test_vif.py @@ -378,6 +378,10 @@ class LibvirtVifTestCase(test.NoDBTestCase): uuid='f0000000-0000-0000-0000-000000000001', project_id=723) + flavor_1vcpu = objects.Flavor(vcpus=1, memory=512, root_gb=1) + + flavor_2vcpu = objects.Flavor(vcpus=2, memory=512, root_gb=1) + bandwidth = { 'quota:vif_inbound_peak': '200', 'quota:vif_outbound_peak': '20', @@ -1068,30 +1072,50 @@ class LibvirtVifTestCase(test.NoDBTestCase): @mock.patch('nova.privsep.linux_net.device_exists', return_value=True) @mock.patch('nova.privsep.linux_net.set_device_mtu') @mock.patch('nova.privsep.linux_net.create_tap_dev') - def test_plug_tap_kvm_virtio(self, mock_create_tap_dev, mock_set_mtu, - mock_device_exists): + def test_plug_tap_kvm_virtio( + self, mock_create_tap_dev, mock_set_mtu, mock_device_exists): d1 = vif.LibvirtGenericVIFDriver() ins = objects.Instance( id=1, uuid='f0000000-0000-0000-0000-000000000001', + flavor=self.flavor_2vcpu, project_id=723, system_metadata={} ) d1.plug(ins, self.vif_tap) - mock_create_tap_dev.assert_called_once_with('tap-xxx-yyy-zzz', None, - multiqueue=False) + mock_create_tap_dev.assert_called_once_with( + 'tap-xxx-yyy-zzz', None, multiqueue=False) mock_create_tap_dev.reset_mock() d2 = vif.LibvirtGenericVIFDriver() mq_ins = objects.Instance( id=1, uuid='f0000000-0000-0000-0000-000000000001', + flavor=self.flavor_2vcpu, project_id=723, system_metadata={ 'image_hw_vif_multiqueue_enabled': 'True' } ) d2.plug(mq_ins, self.vif_tap) - mock_create_tap_dev.assert_called_once_with('tap-xxx-yyy-zzz', None, - multiqueue=True) + mock_create_tap_dev.assert_called_once_with( + 'tap-xxx-yyy-zzz', None, multiqueue=True) + + @mock.patch('nova.privsep.linux_net.device_exists', return_value=True) + @mock.patch('nova.privsep.linux_net.set_device_mtu') + @mock.patch('nova.privsep.linux_net.create_tap_dev') + def test_plug_tap_mq_ignored_1vcpu( + self, mock_create_tap_dev, mock_set_mtu, mock_device_exists): + + d1 = vif.LibvirtGenericVIFDriver() + mq_ins = objects.Instance( + id=1, uuid='f0000000-0000-0000-0000-000000000001', + image_ref=uuids.image_ref, flavor=self.flavor_1vcpu, + project_id=723, system_metadata={ + 'image_hw_vif_multiqueue_enabled': 'True', + } + ) + d1.plug(mq_ins, self.vif_tap) + mock_create_tap_dev.assert_called_once_with( + 'tap-xxx-yyy-zzz', None, multiqueue=False) @mock.patch('nova.privsep.linux_net.device_exists', return_value=True) @mock.patch('nova.privsep.linux_net.set_device_mtu') @@ -1106,14 +1130,14 @@ class LibvirtVifTestCase(test.NoDBTestCase): d1 = vif.LibvirtGenericVIFDriver() ins = objects.Instance( id=1, uuid='f0000000-0000-0000-0000-000000000001', + flavor=self.flavor_2vcpu, project_id=723, system_metadata={ 'image_hw_vif_multiqueue_enabled': 'True' } ) d1.plug(ins, self.vif_tap) - mock_create_tap_dev.assert_called_once_with('tap-xxx-yyy-zzz', - None, - multiqueue=False) + mock_create_tap_dev.assert_called_once_with( + 'tap-xxx-yyy-zzz', None, multiqueue=False) @mock.patch('nova.privsep.linux_net.device_exists', return_value=True) @mock.patch('nova.privsep.linux_net.set_device_mtu') @@ -1124,15 +1148,15 @@ class LibvirtVifTestCase(test.NoDBTestCase): d1 = vif.LibvirtGenericVIFDriver() ins = objects.Instance( id=1, uuid='f0000000-0000-0000-0000-000000000001', + flavor=self.flavor_2vcpu, project_id=723, system_metadata={ 'image_hw_vif_multiqueue_enabled': 'True', 'image_hw_vif_model': 'e1000', } ) d1.plug(ins, self.vif_tap) - mock_create_tap_dev.assert_called_once_with('tap-xxx-yyy-zzz', - None, - multiqueue=False) + mock_create_tap_dev.assert_called_once_with( + 'tap-xxx-yyy-zzz', None, multiqueue=False) def test_unplug_tap(self): d = vif.LibvirtGenericVIFDriver() diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py index 82aace61d6..b53b2446f6 100644 --- a/nova/tests/unit/virt/test_hardware.py +++ b/nova/tests/unit/virt/test_hardware.py @@ -766,119 +766,6 @@ class VCPUTopologyTest(test.NoDBTestCase): }, "expect": [16, 1, 1] }, - { # NUMA needs threads, only cores requested by flavor - "allow_threads": True, - "flavor": objects.Flavor( - vcpus=4, memory_mb=2048, - extra_specs={ - "hw:cpu_cores": "2", - } - ), - "image": { - "properties": { - "hw_cpu_max_cores": 2, - } - }, - "numa_topology": objects.InstanceNUMATopology(cells=[ - objects.InstanceNUMACell( - id=0, cpuset=set([0, 1]), pcpuset=set(), memory=1024, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=1, threads=2)), - objects.InstanceNUMACell( - id=1, cpuset=set([2, 3]), pcpuset=set(), memory=1024), - ]), - "expect": [1, 2, 2] - }, - { # NUMA needs threads, but more than requested by flavor - the - # least amount of threads wins - "allow_threads": True, - "flavor": objects.Flavor( - vcpus=4, memory_mb=2048, - extra_specs={ - "hw:cpu_threads": "2", - } - ), - "image": { - "properties": {} - }, - "numa_topology": objects.InstanceNUMATopology(cells=[ - objects.InstanceNUMACell( - id=0, cpuset=set([0, 1, 2, 3]), pcpuset=set(), - memory=2048, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=1, threads=4)), - ]), - "expect": [2, 1, 2] - }, - { # NUMA needs threads, but more than limit in flavor - the - # least amount of threads which divides into the vcpu - # count wins. So with desired 4, max of 3, and - # vcpu count of 4, we should get 2 threads. - "allow_threads": True, - "flavor": objects.Flavor( - vcpus=4, memory_mb=2048, - extra_specs={ - "hw:cpu_max_sockets": "5", - "hw:cpu_max_cores": "2", - "hw:cpu_max_threads": "3", - } - ), - "image": { - "properties": {} - }, - "numa_topology": objects.InstanceNUMATopology(cells=[ - objects.InstanceNUMACell( - id=0, cpuset=set([0, 1, 2, 3]), pcpuset=set(), - memory=2048, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=1, threads=4)), - ]), - "expect": [2, 1, 2] - }, - { # NUMA needs threads, but thread count does not - # divide into flavor vcpu count, so we must - # reduce thread count to closest divisor - "allow_threads": True, - "flavor": objects.Flavor( - vcpus=6, memory_mb=2048, - extra_specs={} - ), - "image": { - "properties": {} - }, - "numa_topology": objects.InstanceNUMATopology(cells=[ - objects.InstanceNUMACell( - id=0, cpuset=set([0, 1, 2, 3]), pcpuset=set(), - memory=2048, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=1, threads=4)), - ]), - "expect": [2, 1, 3] - }, - { # NUMA needs different number of threads per cell - the least - # amount of threads wins - "allow_threads": True, - "flavor": objects.Flavor( - vcpus=8, memory_mb=2048, - extra_specs={} - ), - "image": { - "properties": {} - }, - "numa_topology": objects.InstanceNUMATopology(cells=[ - objects.InstanceNUMACell( - id=0, cpuset=set([0, 1, 2, 3]), pcpuset=set(), - memory=1024, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=2, threads=2)), - objects.InstanceNUMACell( - id=1, cpuset=set([4, 5, 6, 7]), pcpuset=set(), - memory=1024, - cpu_topology=objects.VirtCPUTopology( - sockets=1, cores=1, threads=4)), - ]), - "expect": [4, 1, 2] - }, ] for topo_test in testdata: @@ -886,8 +773,7 @@ class VCPUTopologyTest(test.NoDBTestCase): topology = hw._get_desirable_cpu_topologies( topo_test["flavor"], image_meta, - topo_test["allow_threads"], - topo_test.get("numa_topology"))[0] + topo_test["allow_threads"],)[0] self.assertEqual(topo_test["expect"][0], topology.sockets) self.assertEqual(topo_test["expect"][1], topology.cores) @@ -3284,8 +3170,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 3)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3340,8 +3225,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 4)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3369,8 +3253,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 4)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3398,8 +3281,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 8)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3426,8 +3308,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {0: 0, 1: 3, 2: 4, 3: 7} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3454,8 +3335,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=1, threads=4) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 4)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3482,8 +3362,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) got_pinning = {x: x for x in range(0, 4)} self.assertEqual(got_pinning, inst_pin.cpu_pinning) @@ -3561,8 +3440,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) def test_get_pinning_require_policy_fits_w_usage(self): host_pin = objects.NUMACell( @@ -3588,8 +3466,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) + self.assertNotIn('cpu_topology', inst_pin) def test_get_pinning_host_siblings_instance_odd_fit(self): host_pin = objects.NUMACell( @@ -3612,10 +3489,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) def test_get_pinning_host_siblings_instance_fit_optimize_threads(self): host_pin = objects.NUMACell( @@ -3638,10 +3513,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=2) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) def test_get_pinning_host_siblings_instance_odd_fit_w_usage(self): host_pin = objects.NUMACell( @@ -3664,10 +3537,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) def test_get_pinning_host_siblings_instance_mixed_siblings(self): host_pin = objects.NUMACell( @@ -3690,10 +3561,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) def test_get_pinning_host_siblings_instance_odd_fit_orphan_only(self): host_pin = objects.NUMACell( @@ -3716,10 +3585,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) def test_get_pinning_host_siblings_large_instance_odd_fit(self): host_pin = objects.NUMACell( @@ -3744,11 +3611,9 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) self.assertPinningPreferThreads(inst_pin, host_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_too_few_fully_free_cores(self): @@ -3775,7 +3640,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertIsNone(inst_pin) # TODO(stephenfin): Remove when we drop support for vcpu_pin_set @@ -3830,10 +3695,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_fits_ht_host(self): @@ -3860,10 +3723,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) # TODO(stephenfin): Remove when we drop support for vcpu_pin_set def test_get_pinning_isolate_policy_fits_w_usage(self): @@ -3890,10 +3751,8 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): ) inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits) - + self.assertNotIn('cpu_topology', inst_pin) self.assertInstanceCellPinned(inst_pin) - got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1) - self.assertEqualTopology(got_topo, inst_pin.cpu_topology) # TODO(stephenfin): Remove when we drop support for vcpu_pin_set @mock.patch.object(hw, 'LOG') diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 5c96d4041a..eea6ae2440 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -412,7 +412,7 @@ class ComputeDriver(object): raise NotImplementedError() def destroy(self, context, instance, network_info, block_device_info=None, - destroy_disks=True): + destroy_disks=True, destroy_secrets=True): """Destroy the specified instance from the Hypervisor. If the instance is not found (for example if networking failed), this @@ -425,11 +425,13 @@ class ComputeDriver(object): :param block_device_info: Information about block devices that should be detached from the instance. :param destroy_disks: Indicates if disks should be destroyed + :param destroy_secrets: Indicates if secrets should be destroyed """ raise NotImplementedError() def cleanup(self, context, instance, network_info, block_device_info=None, - destroy_disks=True, migrate_data=None, destroy_vifs=True): + destroy_disks=True, migrate_data=None, destroy_vifs=True, + destroy_secrets=True): """Cleanup the instance resources . Instance should have been destroyed from the Hypervisor before calling @@ -442,6 +444,8 @@ class ComputeDriver(object): be detached from the instance. :param destroy_disks: Indicates if disks should be destroyed :param migrate_data: implementation specific params + :param destroy_vifs: Indicates if vifs should be unplugged + :param destroy_secrets: Indicates if secrets should be destroyed """ raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 0c2476cbfb..008aa94486 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -308,7 +308,7 @@ class FakeDriver(driver.ComputeDriver): pass def destroy(self, context, instance, network_info, block_device_info=None, - destroy_disks=True): + destroy_disks=True, destroy_secrets=True): key = instance.uuid if key in self.instances: flavor = instance.flavor @@ -323,7 +323,8 @@ class FakeDriver(driver.ComputeDriver): 'inst': self.instances}, instance=instance) def cleanup(self, context, instance, network_info, block_device_info=None, - destroy_disks=True, migrate_data=None, destroy_vifs=True): + destroy_disks=True, migrate_data=None, destroy_vifs=True, + destroy_secrets=True): # cleanup() should not be called when the guest has not been destroyed. if instance.uuid in self.instances: raise exception.InstanceExists( diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 043ee648fa..feeef57f94 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -14,7 +14,6 @@ import collections import itertools -import math import re import typing as ty @@ -504,44 +503,6 @@ def _get_possible_cpu_topologies(vcpus, maxtopology, return possible -def _filter_for_numa_threads(possible, wantthreads): - """Filter topologies which closest match to NUMA threads. - - Determine which topologies provide the closest match to the number - of threads desired by the NUMA topology of the instance. - - The possible topologies may not have any entries which match the - desired thread count. This method will find the topologies which - have the closest matching count. For example, if 'wantthreads' is 4 - and the possible topologies has entries with 6, 3, 2 or 1 threads, - the topologies which have 3 threads will be identified as the - closest match not greater than 4 and will be returned. - - :param possible: list of objects.VirtCPUTopology instances - :param wantthreads: desired number of threads - - :returns: list of objects.VirtCPUTopology instances - """ - # First figure out the largest available thread - # count which is not greater than wantthreads - mostthreads = 0 - for topology in possible: - if topology.threads > wantthreads: - continue - if topology.threads > mostthreads: - mostthreads = topology.threads - - # Now restrict to just those topologies which - # match the largest thread count - bestthreads = [] - for topology in possible: - if topology.threads != mostthreads: - continue - bestthreads.append(topology) - - return bestthreads - - def _sort_possible_cpu_topologies(possible, wanttopology): """Sort the topologies in order of preference. @@ -579,8 +540,7 @@ def _sort_possible_cpu_topologies(possible, wanttopology): return desired -def _get_desirable_cpu_topologies(flavor, image_meta, allow_threads=True, - numa_topology=None): +def _get_desirable_cpu_topologies(flavor, image_meta, allow_threads=True): """Identify desirable CPU topologies based for given constraints. Look at the properties set in the flavor extra specs and the image @@ -591,10 +551,6 @@ def _get_desirable_cpu_topologies(flavor, image_meta, allow_threads=True, :param flavor: objects.Flavor instance to query extra specs from :param image_meta: nova.objects.ImageMeta object instance :param allow_threads: if the hypervisor supports CPU threads - :param numa_topology: objects.InstanceNUMATopology instance that - may contain additional topology constraints - (such as threading information) that should - be considered :returns: sorted list of objects.VirtCPUTopology instances """ @@ -612,36 +568,12 @@ def _get_desirable_cpu_topologies(flavor, image_meta, allow_threads=True, maximum, allow_threads) LOG.debug("Possible topologies %s", possible) - - if numa_topology: - min_requested_threads = None - cell_topologies = [cell.cpu_topology for cell in numa_topology.cells - if ('cpu_topology' in cell and cell.cpu_topology)] - if cell_topologies: - min_requested_threads = min( - topo.threads for topo in cell_topologies) - - if min_requested_threads: - if preferred.threads: - min_requested_threads = min(preferred.threads, - min_requested_threads) - - specified_threads = max(1, min_requested_threads) - LOG.debug("Filtering topologies best for %d threads", - specified_threads) - - possible = _filter_for_numa_threads(possible, - specified_threads) - LOG.debug("Remaining possible topologies %s", - possible) - desired = _sort_possible_cpu_topologies(possible, preferred) LOG.debug("Sorted desired topologies %s", desired) return desired -def get_best_cpu_topology(flavor, image_meta, allow_threads=True, - numa_topology=None): +def get_best_cpu_topology(flavor, image_meta, allow_threads=True): """Identify best CPU topology for given constraints. Look at the properties set in the flavor extra specs and the image @@ -651,15 +583,11 @@ def get_best_cpu_topology(flavor, image_meta, allow_threads=True, :param flavor: objects.Flavor instance to query extra specs from :param image_meta: nova.objects.ImageMeta object instance :param allow_threads: if the hypervisor supports CPU threads - :param numa_topology: objects.InstanceNUMATopology instance that - may contain additional topology constraints - (such as threading information) that should - be considered :returns: an objects.VirtCPUTopology instance for best topology """ - return _get_desirable_cpu_topologies(flavor, image_meta, - allow_threads, numa_topology)[0] + return _get_desirable_cpu_topologies( + flavor, image_meta, allow_threads)[0] def _numa_cell_supports_pagesize_request(host_cell, inst_cell): @@ -753,43 +681,6 @@ def _pack_instance_onto_cores(host_cell, instance_cell, pinning = None threads_no = 1 - def _orphans(instance_cell, threads_per_core): - """Number of instance CPUs which will not fill up a host core. - - Best explained by an example: consider set of free host cores as such: - [(0, 1), (3, 5), (6, 7, 8)] - This would be a case of 2 threads_per_core AKA an entry for 2 in the - sibling_sets structure. - - If we attempt to pack a 5 core instance on it - due to the fact that we - iterate the list in order, we will end up with a single core of the - instance pinned to a thread "alone" (with id 6), and we would have one - 'orphan' vcpu. - """ - return len(instance_cell) % threads_per_core - - def _threads(instance_cell, threads_per_core): - """Threads to expose to the instance via the VirtCPUTopology. - - This is calculated by taking the GCD of the number of threads we are - considering at the moment, and the number of orphans. An example for - instance_cell = 6 - threads_per_core = 4 - - So we can fit the instance as such: - [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)] - x x x x x x - - We can't expose 4 threads, as that will not be a valid topology (all - cores exposed to the guest have to have an equal number of threads), - and 1 would be too restrictive, but we want all threads that guest sees - to be on the same physical core, so we take GCD of 4 (max number of - threads) and 2 (number of 'orphan' CPUs) and get 2 as the number of - threads. - """ - return math.gcd(threads_per_core, _orphans(instance_cell, - threads_per_core)) - def _get_pinning(threads_no, sibling_set, instance_cores): """Determines pCPUs/vCPUs mapping @@ -988,7 +879,6 @@ def _pack_instance_onto_cores(host_cell, instance_cell, if (instance_cell.cpu_thread_policy != fields.CPUThreadAllocationPolicy.REQUIRE and not pinning): - threads_no = 1 # we create a fake sibling set by splitting all sibling sets and # treating each core as if it has no siblings. This is necessary # because '_get_pinning' will normally only take the same amount of @@ -1005,23 +895,12 @@ def _pack_instance_onto_cores(host_cell, instance_cell, cpuset_reserved = _get_reserved( sibling_set, pinning, num_cpu_reserved=num_cpu_reserved) - threads_no = _threads(instance_cell, threads_no) - if not pinning or (num_cpu_reserved and not cpuset_reserved): return LOG.debug('Selected cores for pinning: %s, in cell %s', pinning, host_cell.id) - # TODO(stephenfin): we're using this attribute essentially as a container - # for the thread count used in '_get_desirable_cpu_topologies'; we should - # drop it and either use a non-persistent attrbiute or add a new - # 'min_threads' field - topology = objects.VirtCPUTopology( - sockets=1, - cores=len(instance_cell) // threads_no, - threads=threads_no) instance_cell.pin_vcpus(*pinning) - instance_cell.cpu_topology = topology instance_cell.id = host_cell.id instance_cell.cpuset_reserved = cpuset_reserved return instance_cell diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 66f620da69..16cb9db8e9 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -172,12 +172,13 @@ class HyperVDriver(driver.ComputeDriver): self._vmops.reboot(instance, network_info, reboot_type) def destroy(self, context, instance, network_info, block_device_info=None, - destroy_disks=True): + destroy_disks=True, destroy_secrets=True): self._vmops.destroy(instance, network_info, block_device_info, destroy_disks) def cleanup(self, context, instance, network_info, block_device_info=None, - destroy_disks=True, migrate_data=None, destroy_vifs=True): + destroy_disks=True, migrate_data=None, destroy_vifs=True, + destroy_secrets=True): """Cleanup after instance being destroyed by Hypervisor.""" self.unplug_vifs(instance, network_info) diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 006d79e220..aabc05776e 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -1277,7 +1277,8 @@ class IronicDriver(virt_driver.ComputeDriver): _sync_remove_cache_entry() def destroy(self, context, instance, network_info, - block_device_info=None, destroy_disks=True): + block_device_info=None, destroy_disks=True, + destroy_secrets=True): """Destroy the specified instance, if it can be found. :param context: The security context. @@ -1287,6 +1288,8 @@ class IronicDriver(virt_driver.ComputeDriver): information. Ignored by this driver. :param destroy_disks: Indicates if disks should be destroyed. Ignored by this driver. + :param destroy_secrets: Indicates if secrets should be + destroyed. Ignored by this driver. """ LOG.debug('Destroy called for instance', instance=instance) try: diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py index f9475776b3..106d0413f8 100644 --- a/nova/virt/libvirt/config.py +++ b/nova/virt/libvirt/config.py @@ -2204,6 +2204,8 @@ class LibvirtConfigGuestHostdevPCI(LibvirtConfigGuestHostdev): self.slot = None self.function = None + self.alias = None + def __eq__(self, other): if not isinstance(other, LibvirtConfigGuestHostdevPCI): return False @@ -2243,6 +2245,8 @@ class LibvirtConfigGuestHostdevPCI(LibvirtConfigGuestHostdev): self.bus = sub.get('bus') self.slot = sub.get('slot') self.function = sub.get('function') + elif c.tag == 'alias': + self.alias = c.get('name') class LibvirtConfigGuestHostdevMDEV(LibvirtConfigGuestHostdev): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index e0f18d1df9..f86ad2e161 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1402,13 +1402,13 @@ class LibvirtDriver(driver.ComputeDriver): self._teardown_container(instance) def destroy(self, context, instance, network_info, block_device_info=None, - destroy_disks=True): + destroy_disks=True, destroy_secrets=True): self._destroy(instance) # NOTE(gibi): if there was device detach in progress then we need to # unblock the waiting threads and clean up. self._device_event_handler.cleanup_waiters(instance.uuid) self.cleanup(context, instance, network_info, block_device_info, - destroy_disks) + destroy_disks, destroy_secrets=destroy_secrets) def _undefine_domain(self, instance): try: @@ -1435,7 +1435,8 @@ class LibvirtDriver(driver.ComputeDriver): pass def cleanup(self, context, instance, network_info, block_device_info=None, - destroy_disks=True, migrate_data=None, destroy_vifs=True): + destroy_disks=True, migrate_data=None, destroy_vifs=True, + destroy_secrets=True): """Cleanup the instance from the host. Identify if the instance disks and instance path should be removed @@ -1449,6 +1450,7 @@ class LibvirtDriver(driver.ComputeDriver): :param destroy_disks: if local ephemeral disks should be destroyed :param migrate_data: optional migrate_data object :param destroy_vifs: if plugged vifs should be unplugged + :param destroy_secrets: Indicates if secrets should be destroyed """ cleanup_instance_dir = False cleanup_instance_disks = False @@ -1480,11 +1482,12 @@ class LibvirtDriver(driver.ComputeDriver): block_device_info=block_device_info, destroy_vifs=destroy_vifs, cleanup_instance_dir=cleanup_instance_dir, - cleanup_instance_disks=cleanup_instance_disks) + cleanup_instance_disks=cleanup_instance_disks, + destroy_secrets=destroy_secrets) def _cleanup(self, context, instance, network_info, block_device_info=None, destroy_vifs=True, cleanup_instance_dir=False, - cleanup_instance_disks=False): + cleanup_instance_disks=False, destroy_secrets=True): """Cleanup the domain and any attached resources from the host. This method cleans up any pmem devices, unplugs VIFs, disconnects @@ -1525,7 +1528,9 @@ class LibvirtDriver(driver.ComputeDriver): continue try: - self._disconnect_volume(context, connection_info, instance) + self._disconnect_volume( + context, connection_info, instance, + destroy_secrets=destroy_secrets) except Exception as exc: with excutils.save_and_reraise_exception() as ctxt: if cleanup_instance_disks: @@ -1842,8 +1847,13 @@ class LibvirtDriver(driver.ComputeDriver): return (False if connection_count > 1 else True) def _disconnect_volume(self, context, connection_info, instance, - encryption=None): - self._detach_encryptor(context, connection_info, encryption=encryption) + encryption=None, destroy_secrets=True): + self._detach_encryptor( + context, + connection_info, + encryption=encryption, + destroy_secrets=destroy_secrets + ) vol_driver = self._get_volume_driver(connection_info) volume_id = driver_block_device.get_volume_id(connection_info) multiattach = connection_info.get('multiattach', False) @@ -1960,7 +1970,8 @@ class LibvirtDriver(driver.ComputeDriver): encryption) encryptor.attach_volume(context, **encryption) - def _detach_encryptor(self, context, connection_info, encryption): + def _detach_encryptor(self, context, connection_info, encryption, + destroy_secrets=True): """Detach the frontend encryptor if one is required by the volume. The request context is only used when an encryption metadata dict is @@ -1972,7 +1983,11 @@ class LibvirtDriver(driver.ComputeDriver): """ volume_id = driver_block_device.get_volume_id(connection_info) if volume_id and self._host.find_secret('volume', volume_id): + if not destroy_secrets: + LOG.debug("Skipping volume secret destruction") + return return self._host.delete_secret('volume', volume_id) + if encryption is None: encryption = self._get_volume_encryption(context, connection_info) @@ -3733,7 +3748,8 @@ class LibvirtDriver(driver.ComputeDriver): # we can here without losing data. This allows us to re-initialise from # scratch, and hopefully fix, most aspects of a non-functioning guest. self.destroy(context, instance, network_info, destroy_disks=False, - block_device_info=block_device_info) + block_device_info=block_device_info, + destroy_secrets=False) # Convert the system metadata to image metadata # NOTE(mdbooth): This is a workaround for stateless Nova compute @@ -3774,11 +3790,32 @@ class LibvirtDriver(driver.ComputeDriver): # on which vif type we're using and we are working with a stale network # info cache here, so won't rely on waiting for neutron plug events. # vifs_already_plugged=True means "do not wait for neutron plug events" + external_events = [] + vifs_already_plugged = True + event_expected_for_vnic_types = ( + CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot) + if event_expected_for_vnic_types: + # NOTE(gibi): We unplugged every vif during destroy above and we + # will replug them with _create_guest_with_network. As the + # workaround config has some vnic_types configured we expect + # vif-plugged events for every vif with those vnic_types. + # TODO(gibi): only wait for events if we know that the networking + # backend sends plug time events. For that we need to finish + # https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron + # then create a driver -> plug-time event mapping in nova. + external_events = [ + ('network-vif-plugged', vif['id']) + for vif in network_info + if vif['vnic_type'] in event_expected_for_vnic_types + ] + vifs_already_plugged = False + # NOTE(efried): The instance should already have a vtpm_secret_uuid # registered if appropriate. self._create_guest_with_network( context, xml, instance, network_info, block_device_info, - vifs_already_plugged=True) + vifs_already_plugged=vifs_already_plugged, + external_events=external_events) def _wait_for_reboot(): """Called at an interval until the VM is running again.""" @@ -5095,8 +5132,7 @@ class LibvirtDriver(driver.ComputeDriver): if cpu is None: return None - topology = hardware.get_best_cpu_topology( - flavor, image_meta, numa_topology=instance_numa_topology) + topology = hardware.get_best_cpu_topology(flavor, image_meta) cpu.sockets = topology.sockets cpu.cores = topology.cores @@ -5831,7 +5867,8 @@ class LibvirtDriver(driver.ComputeDriver): if CONF.libvirt.virt_type in ('qemu', 'kvm'): guest.features.append(vconfig.LibvirtConfigGuestFeatureACPI()) - guest.features.append(vconfig.LibvirtConfigGuestFeatureAPIC()) + if not CONF.workarounds.libvirt_disable_apic: + guest.features.append(vconfig.LibvirtConfigGuestFeatureAPIC()) if CONF.libvirt.virt_type in ('qemu', 'kvm') and os_type == 'windows': hv = vconfig.LibvirtConfigGuestFeatureHyperV() @@ -7165,7 +7202,7 @@ class LibvirtDriver(driver.ComputeDriver): power_on: bool = True, vifs_already_plugged: bool = False, post_xml_callback: ty.Callable = None, - external_events: ty.Optional[ty.List[str]] = None, + external_events: ty.Optional[ty.List[ty.Tuple[str, str]]] = None, cleanup_instance_dir: bool = False, cleanup_instance_disks: bool = False, ) -> libvirt_guest.Guest: diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py index d2f505a760..8cea9f2983 100644 --- a/nova/virt/libvirt/migration.py +++ b/nova/virt/libvirt/migration.py @@ -339,14 +339,21 @@ def _update_vif_xml(xml_doc, migrate_data, get_vif_config): instance_uuid = xml_doc.findtext('uuid') parser = etree.XMLParser(remove_blank_text=True) interface_nodes = xml_doc.findall('./devices/interface') - migrate_vif_by_mac = {vif.source_vif['address']: vif + # MAC address stored for port in neutron DB and in domain XML + # might be in different cases, so to harmonize that + # we convert MAC to lower case for dict key. + migrate_vif_by_mac = {vif.source_vif['address'].lower(): vif for vif in migrate_data.vifs} for interface_dev in interface_nodes: mac = interface_dev.find('mac') mac = mac if mac is not None else {} mac_addr = mac.get('address') if mac_addr: - migrate_vif = migrate_vif_by_mac[mac_addr] + # MAC address stored in libvirt should always be normalized + # and stored in lower case. But just to be extra safe here + # we still normalize MAC retrieved from XML to be absolutely + # sure it will be the same with the Neutron provided one. + migrate_vif = migrate_vif_by_mac[mac_addr.lower()] vif = migrate_vif.get_dest_vif() # get_vif_config is a partial function of # nova.virt.libvirt.vif.LibvirtGenericVIFDriver.get_config diff --git a/nova/virt/libvirt/vif.py b/nova/virt/libvirt/vif.py index fbcfc7823a..a18a0e5c23 100644 --- a/nova/virt/libvirt/vif.py +++ b/nova/virt/libvirt/vif.py @@ -681,7 +681,8 @@ class LibvirtGenericVIFDriver(object): vif_model = self.get_vif_model(image_meta=image_meta) # TODO(ganso): explore whether multiqueue works for other vif models # that go through this code path. - multiqueue = (self._requests_multiqueue(image_meta) and + multiqueue = (instance.get_flavor().vcpus > 1 and + self._requests_multiqueue(image_meta) and vif_model == network_model.VIF_MODEL_VIRTIO) nova.privsep.linux_net.create_tap_dev(dev, mac, multiqueue=multiqueue) network = vif.get('network') diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index 36671d66de..7291415003 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -224,7 +224,8 @@ class VMwareVCDriver(driver.ComputeDriver): LOG.debug('Extension %s already exists.', constants.EXTENSION_KEY) def cleanup(self, context, instance, network_info, block_device_info=None, - destroy_disks=True, migrate_data=None, destroy_vifs=True): + destroy_disks=True, migrate_data=None, destroy_vifs=True, + destroy_secrets=True): """Cleanup after instance being destroyed by Hypervisor.""" pass @@ -593,7 +594,7 @@ class VMwareVCDriver(driver.ComputeDriver): instance=instance) def destroy(self, context, instance, network_info, block_device_info=None, - destroy_disks=True): + destroy_disks=True, destroy_secrets=True): """Destroy VM instance.""" # Destroy gets triggered when Resource Claim in resource_tracker diff --git a/releasenotes/notes/bug-1910466-max-vcpu-topologies-with-numa-9a9ceb1b0fc7c33d.yaml b/releasenotes/notes/bug-1910466-max-vcpu-topologies-with-numa-9a9ceb1b0fc7c33d.yaml new file mode 100644 index 0000000000..2c1e092baa --- /dev/null +++ b/releasenotes/notes/bug-1910466-max-vcpu-topologies-with-numa-9a9ceb1b0fc7c33d.yaml @@ -0,0 +1,23 @@ +--- +fixes: + - | + The nova libvirt driver supports two independent features, virtual CPU + topologies and virtual NUMA topologies. Previously, when + ``hw:cpu_max_sockets``, ``hw:cpu_max_cores`` and ``hw:cpu_max_threads`` + were specified for pinned instances (``hw:cpu_policy=dedicated``) + without explicit ``hw:cpu_sockets``, ``hw:cpu_cores``, ``hw:cpu_threads`` + extra specs or their image equivalent, nova failed to generate a valid + virtual CPU topology. This has now been fixed and it is now possible to + use max CPU constraints with pinned instances. + e.g. a combination of ``hw:numa_nodes=2``, ``hw:cpu_max_sockets=2``, + ``hw:cpu_max_cores=2``, ``hw:cpu_max_threads=8`` and + ``hw:cpu_policy=dedicated`` can now generate a valid topology using + a flavor with 8 vCPUs. +upgrade: + - | + As part of the fix for bug 1910466, code that attempted to optimize VM CPU + thread assignment based on the host CPU topology as it was determined + to be buggy, undocumented and rejected valid virtual CPU topologies while + also producing different behavior when CPU pinning was enabled vs disabled. + The optimization may be reintroduced in the future with a more generic + implementation that works for both pinned and unpinned VMs. diff --git a/releasenotes/notes/bug-1939604-547c729b7741831b.yaml b/releasenotes/notes/bug-1939604-547c729b7741831b.yaml new file mode 100644 index 0000000000..e14327c285 --- /dev/null +++ b/releasenotes/notes/bug-1939604-547c729b7741831b.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Addressed an issue that prevented instances with 1 vcpu using multiqueue + feature from being created successfully when their vif_type is TAP. diff --git a/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml new file mode 100644 index 0000000000..c3686a9978 --- /dev/null +++ b/releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml @@ -0,0 +1,18 @@ +--- +issues: + - | + The libvirt virt driver in Nova implements power on and hard reboot by + destroying the domain first and unpluging the vifs then recreating the + domain and replugging the vifs. However nova does not wait for the + network-vif-plugged event before unpause the domain. This can cause + the domain to start running and requesting IP via DHCP before the + networking backend has finished plugging the vifs. The config option + [workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added, + defaulting to an empty list, that can be used to ensure that the libvirt + driver waits for the network-vif-plugged event for vifs with specific + ``vnic_type`` before it unpauses the domain during hard reboot. This should + only be used if the deployment uses a networking backend that sends such + event for the given ``vif_type`` at vif plug time. The ml2/ovs and the + networking-odl Neutron backend is known to send plug time events for ports + with ``normal`` ``vnic_type``. For more information see + https://bugs.launchpad.net/nova/+bug/1946729 diff --git a/releasenotes/notes/bug-1952941-request-spec-numa-topology-migration-c97dbd51b3c6c116.yaml b/releasenotes/notes/bug-1952941-request-spec-numa-topology-migration-c97dbd51b3c6c116.yaml new file mode 100644 index 0000000000..f582bef0c1 --- /dev/null +++ b/releasenotes/notes/bug-1952941-request-spec-numa-topology-migration-c97dbd51b3c6c116.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The `bug 1952941`_ is fixed where a pre-Victoria server with pinned + CPUs cannot be migrated or evacuated after the cloud is upgraded to + Victoria or newer as the scheduling fails with + ``NotImplementedError: Cannot load 'pcpuset'`` error. + + .. _bug 1952941: https://bugs.launchpad.net/nova/+bug/1952941 diff --git a/releasenotes/notes/libvirt-disable-apic-39599bdc2d110a1f.yaml b/releasenotes/notes/libvirt-disable-apic-39599bdc2d110a1f.yaml new file mode 100644 index 0000000000..83ab5d7a03 --- /dev/null +++ b/releasenotes/notes/libvirt-disable-apic-39599bdc2d110a1f.yaml @@ -0,0 +1,13 @@ +--- +issues: + - | + Linux guest images that have known kernel bugs related to virtualized apic + initialization previously would sporadically hang. For images where the + kernel cannot be upgraded, a ``[workarounds]`` config option has been + introduced: + + ``[workarounds]libvirt_disable_apic`` + + This option is primarily intended for CI and development clouds as a bridge + for operators to mitigate the issue while they work with their upstream + image vendors. |