diff options
author | Matt Riedemann <mriedem.os@gmail.com> | 2019-10-15 15:49:55 -0400 |
---|---|---|
committer | Balazs Gibizer <balazs.gibizer@est.tech> | 2021-08-30 06:11:25 +0000 |
commit | c09d98dadb6cd69e294420ba7ecea0f9b9cfcd71 (patch) | |
tree | 73071cf3736f16afc29a3c54c5c9144f2ee3d117 /nova/scheduler | |
parent | 2a78626a85954997d35f5fe62c50de297e2ca92d (diff) | |
download | nova-c09d98dadb6cd69e294420ba7ecea0f9b9cfcd71.tar.gz |
Add force kwarg to delete_allocation_for_instance
This adds a force kwarg to delete_allocation_for_instance which
defaults to True because that was found to be the most common use case
by a significant margin during implementation of this patch.
In most cases, this method is called when we want to delete the
allocations because they should be gone, e.g. server delete, failed
build, or shelve offload. The alternative in these cases is the caller
could trap the conflict error and retry but we might as well just force
the delete in that case (it's cleaner).
When force=True, it will DELETE the consumer allocations rather than
GET and PUT with an empty allocations dict and the consumer generation
which can result in a 409 conflict from Placement. For example, bug
1836754 shows that in one tempest test that creates a server and then
immediately deletes it, we can hit a very tight window where the method
GETs the allocations and before it PUTs the empty allocations to remove
them, something changes which results in a conflict and the server
delete fails with a 409 error.
It's worth noting that delete_allocation_for_instance used to just
DELETE the allocations before Stein [1] when we started taking consumer
generations into account. There was also a related mailing list thread
[2].
Closes-Bug: #1836754
[1] I77f34788dd7ab8fdf60d668a4f76452e03cf9888
[2] http://lists.openstack.org/pipermail/openstack-dev/2018-August/133374.html
Change-Id: Ife3c7a5a95c5d707983ab33fd2fbfc1cfb72f676
Diffstat (limited to 'nova/scheduler')
-rw-r--r-- | nova/scheduler/client/report.py | 41 | ||||
-rw-r--r-- | nova/scheduler/manager.py | 3 |
2 files changed, 38 insertions, 6 deletions
diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 257aaf07ca..aa13f5e59f 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -2050,8 +2050,9 @@ class SchedulerReportClient(object): return r.status_code == 204 @safe_connect - def delete_allocation_for_instance(self, context, uuid, - consumer_type='instance'): + def delete_allocation_for_instance( + self, context, uuid, consumer_type='instance', force=False + ): """Delete the instance allocation from placement :param context: The security context @@ -2059,13 +2060,42 @@ class SchedulerReportClient(object): as the consumer UUID towards placement :param consumer_type: The type of the consumer specified by uuid. 'instance' or 'migration' (Default: instance) + :param force: True if the allocations should be deleted without regard + to consumer generation conflicts, False will attempt to + GET and PUT empty allocations and use the consumer + generation which could result in a conflict and need to + retry the operation. :return: Returns True if the allocation is successfully deleted by this call. Returns False if the allocation does not exist. :raises AllocationDeleteFailed: If the allocation cannot be read from - placement or it is changed by another process while we tried to - delete it. + placement (if force=False), is changed by another process while + we tried to delete it (if force=False), or if some other server + side error occurred (if force=True) """ url = '/allocations/%s' % uuid + if force: + # Do not bother with consumer generations, just delete the + # allocations. + r = self.delete(url, global_request_id=context.global_id) + if r: + LOG.info('Deleted allocations for %(consumer_type)s %(uuid)s', + {'consumer_type': consumer_type, 'uuid': uuid}) + return True + + # Check for 404 since we don't need to log a warning if we + # tried to delete something which doesn't actually exist. + if r.status_code != 404: + LOG.warning( + 'Unable to delete allocation for %(consumer_type)s ' + '%(uuid)s: (%(code)i %(text)s)', + {'consumer_type': consumer_type, + 'uuid': uuid, + 'code': r.status_code, + 'text': r.text}) + raise exception.AllocationDeleteFailed(consumer_uuid=uuid, + error=r.text) + return False + # We read the consumer generation then try to put an empty allocation # for that consumer. If between the GET and the PUT the consumer # generation changes then we raise AllocationDeleteFailed. @@ -2245,7 +2275,8 @@ class SchedulerReportClient(object): instance_uuids = objects.InstanceList.get_uuids_by_host_and_node( context, host, nodename) for instance_uuid in instance_uuids: - self.delete_allocation_for_instance(context, instance_uuid) + self.delete_allocation_for_instance( + context, instance_uuid, force=True) # Ensure to delete resource provider in tree by top-down # traversable order. rps_to_refresh = self.get_providers_in_tree(context, rp_uuid) diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 9cee6b3bfc..03df615f6a 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -461,7 +461,8 @@ class SchedulerManager(manager.Manager): LOG.debug("Cleaning up allocations for %s", instance_uuids) for uuid in instance_uuids: - self.placement_client.delete_allocation_for_instance(context, uuid) + self.placement_client.delete_allocation_for_instance( + context, uuid, force=True) def _legacy_find_hosts( self, context, num_instances, spec_obj, hosts, num_alts, |