summaryrefslogtreecommitdiff
path: root/nova/scheduler
diff options
context:
space:
mode:
authorMatt Riedemann <mriedem.os@gmail.com>2019-10-15 15:49:55 -0400
committerBalazs Gibizer <balazs.gibizer@est.tech>2021-08-30 06:11:25 +0000
commitc09d98dadb6cd69e294420ba7ecea0f9b9cfcd71 (patch)
tree73071cf3736f16afc29a3c54c5c9144f2ee3d117 /nova/scheduler
parent2a78626a85954997d35f5fe62c50de297e2ca92d (diff)
downloadnova-c09d98dadb6cd69e294420ba7ecea0f9b9cfcd71.tar.gz
Add force kwarg to delete_allocation_for_instance
This adds a force kwarg to delete_allocation_for_instance which defaults to True because that was found to be the most common use case by a significant margin during implementation of this patch. In most cases, this method is called when we want to delete the allocations because they should be gone, e.g. server delete, failed build, or shelve offload. The alternative in these cases is the caller could trap the conflict error and retry but we might as well just force the delete in that case (it's cleaner). When force=True, it will DELETE the consumer allocations rather than GET and PUT with an empty allocations dict and the consumer generation which can result in a 409 conflict from Placement. For example, bug 1836754 shows that in one tempest test that creates a server and then immediately deletes it, we can hit a very tight window where the method GETs the allocations and before it PUTs the empty allocations to remove them, something changes which results in a conflict and the server delete fails with a 409 error. It's worth noting that delete_allocation_for_instance used to just DELETE the allocations before Stein [1] when we started taking consumer generations into account. There was also a related mailing list thread [2]. Closes-Bug: #1836754 [1] I77f34788dd7ab8fdf60d668a4f76452e03cf9888 [2] http://lists.openstack.org/pipermail/openstack-dev/2018-August/133374.html Change-Id: Ife3c7a5a95c5d707983ab33fd2fbfc1cfb72f676
Diffstat (limited to 'nova/scheduler')
-rw-r--r--nova/scheduler/client/report.py41
-rw-r--r--nova/scheduler/manager.py3
2 files changed, 38 insertions, 6 deletions
diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py
index 257aaf07ca..aa13f5e59f 100644
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@@ -2050,8 +2050,9 @@ class SchedulerReportClient(object):
return r.status_code == 204
@safe_connect
- def delete_allocation_for_instance(self, context, uuid,
- consumer_type='instance'):
+ def delete_allocation_for_instance(
+ self, context, uuid, consumer_type='instance', force=False
+ ):
"""Delete the instance allocation from placement
:param context: The security context
@@ -2059,13 +2060,42 @@ class SchedulerReportClient(object):
as the consumer UUID towards placement
:param consumer_type: The type of the consumer specified by uuid.
'instance' or 'migration' (Default: instance)
+ :param force: True if the allocations should be deleted without regard
+ to consumer generation conflicts, False will attempt to
+ GET and PUT empty allocations and use the consumer
+ generation which could result in a conflict and need to
+ retry the operation.
:return: Returns True if the allocation is successfully deleted by this
call. Returns False if the allocation does not exist.
:raises AllocationDeleteFailed: If the allocation cannot be read from
- placement or it is changed by another process while we tried to
- delete it.
+ placement (if force=False), is changed by another process while
+ we tried to delete it (if force=False), or if some other server
+ side error occurred (if force=True)
"""
url = '/allocations/%s' % uuid
+ if force:
+ # Do not bother with consumer generations, just delete the
+ # allocations.
+ r = self.delete(url, global_request_id=context.global_id)
+ if r:
+ LOG.info('Deleted allocations for %(consumer_type)s %(uuid)s',
+ {'consumer_type': consumer_type, 'uuid': uuid})
+ return True
+
+ # Check for 404 since we don't need to log a warning if we
+ # tried to delete something which doesn't actually exist.
+ if r.status_code != 404:
+ LOG.warning(
+ 'Unable to delete allocation for %(consumer_type)s '
+ '%(uuid)s: (%(code)i %(text)s)',
+ {'consumer_type': consumer_type,
+ 'uuid': uuid,
+ 'code': r.status_code,
+ 'text': r.text})
+ raise exception.AllocationDeleteFailed(consumer_uuid=uuid,
+ error=r.text)
+ return False
+
# We read the consumer generation then try to put an empty allocation
# for that consumer. If between the GET and the PUT the consumer
# generation changes then we raise AllocationDeleteFailed.
@@ -2245,7 +2275,8 @@ class SchedulerReportClient(object):
instance_uuids = objects.InstanceList.get_uuids_by_host_and_node(
context, host, nodename)
for instance_uuid in instance_uuids:
- self.delete_allocation_for_instance(context, instance_uuid)
+ self.delete_allocation_for_instance(
+ context, instance_uuid, force=True)
# Ensure to delete resource provider in tree by top-down
# traversable order.
rps_to_refresh = self.get_providers_in_tree(context, rp_uuid)
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 9cee6b3bfc..03df615f6a 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -461,7 +461,8 @@ class SchedulerManager(manager.Manager):
LOG.debug("Cleaning up allocations for %s", instance_uuids)
for uuid in instance_uuids:
- self.placement_client.delete_allocation_for_instance(context, uuid)
+ self.placement_client.delete_allocation_for_instance(
+ context, uuid, force=True)
def _legacy_find_hosts(
self, context, num_instances, spec_obj, hosts, num_alts,