diff options
author | melanie witt <melwittt@gmail.com> | 2021-02-12 01:50:19 +0000 |
---|---|---|
committer | melanie witt <melwittt@gmail.com> | 2021-02-19 04:40:35 +0000 |
commit | 123f6262f63477d3f50dfad09688978e044bd9e0 (patch) | |
tree | 5119fe646c6fbf2de0b297d8f3f8ff3e4e417b6f /nova/compute | |
parent | f7975d640ce1e9fa06d045d35177f07451716f0c (diff) | |
download | nova-123f6262f63477d3f50dfad09688978e044bd9e0.tar.gz |
Handle instance = None in _local_delete_cleanup
Change I4d3193d8401614311010ed0e055fcb3aaeeebaed added some
additional local delete cleanup to prevent leaking of placement
allocations. The change introduced a regression in our "delete while
booting" handling as the _local_delete_cleanup required a valid
instance object to do its work and in two cases, we could have
instance = None from _lookup_instance if we are racing with a create
request and the conductor has deleted the instance record while we
are in the middle of processing the delete request.
This handles those scenarios by doing two things:
(1) Changing the _local_delete_cleanup and
_update_queued_for_deletion methods to take an instance UUID
instead of a full instance object as they really only need the
UUID to do their work
(2) Saving a copy of the instance UUID before doing another instance
lookup which might return None and passing that UUID to the
_local_delete_cleanup and _update_queued_for_deletion methods
Closes-Bug: #1914777
Change-Id: I03cf285ad83e09d88cdb702a88dfed53c01610f8
Diffstat (limited to 'nova/compute')
-rw-r--r-- | nova/compute/api.py | 35 |
1 files changed, 20 insertions, 15 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py index a7a7c35da4..c498d46ddb 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2165,22 +2165,22 @@ class API(base.Base): return True return False - def _local_delete_cleanup(self, context, instance): + def _local_delete_cleanup(self, context, instance_uuid): # NOTE(aarents) Ensure instance allocation is cleared and instance # mapping queued as deleted before _delete() return try: self.placementclient.delete_allocation_for_instance( - context, instance.uuid) + context, instance_uuid) except exception.AllocationDeleteFailed: LOG.info("Allocation delete failed during local delete cleanup.", - instance=instance) + instance_uuid=instance_uuid) try: - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance_uuid, True) except exception.InstanceMappingNotFound: LOG.info("Instance Mapping does not exist while attempting " "local delete cleanup.", - instance=instance) + instance_uuid=instance_uuid) def _attempt_delete_of_buildrequest(self, context, instance): # If there is a BuildRequest then the instance may not have been @@ -2217,7 +2217,7 @@ class API(base.Base): if not instance.host and not may_have_ports_or_volumes: try: if self._delete_while_booting(context, instance): - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return # If instance.host was not set it's possible that the Instance # object here was pulled from a BuildRequest object and is not @@ -2226,6 +2226,11 @@ class API(base.Base): # properly. A lookup is attempted which will either return a # full Instance or None if not found. If not found then it's # acceptable to skip the rest of the delete processing. + + # Save a copy of the instance UUID early, in case + # _lookup_instance returns instance = None, to pass to + # _local_delete_cleanup if needed. + instance_uuid = instance.uuid cell, instance = self._lookup_instance(context, instance.uuid) if cell and instance: try: @@ -2236,11 +2241,11 @@ class API(base.Base): except exception.InstanceNotFound: pass # The instance was deleted or is already gone. - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return if not instance: # Instance is already deleted. - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance_uuid) return except exception.ObjectActionError: # NOTE(melwitt): This means the instance.host changed @@ -2253,7 +2258,7 @@ class API(base.Base): cell, instance = self._lookup_instance(context, instance.uuid) if not instance: # Instance is already deleted - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance_uuid) return bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( @@ -2297,7 +2302,7 @@ class API(base.Base): 'field, its vm_state is %(state)s.', {'state': instance.vm_state}, instance=instance) - self._local_delete_cleanup(context, instance) + self._local_delete_cleanup(context, instance.uuid) return except exception.ObjectActionError as ex: # The instance's host likely changed under us as @@ -2482,7 +2487,7 @@ class API(base.Base): instance.destroy() @staticmethod - def _update_queued_for_deletion(context, instance, qfd): + def _update_queued_for_deletion(context, instance_uuid, qfd): # NOTE(tssurya): We query the instance_mapping record of this instance # and update the queued_for_delete flag to True (or False according to # the state of the instance). This just means that the instance is @@ -2491,7 +2496,7 @@ class API(base.Base): # value could be stale which is fine, considering its use is only # during down cell (desperate) situation. im = objects.InstanceMapping.get_by_instance_uuid(context, - instance.uuid) + instance_uuid) im.queued_for_delete = qfd im.save() @@ -2503,7 +2508,7 @@ class API(base.Base): instance.save() else: self.compute_rpcapi.terminate_instance(context, instance, bdms) - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance.uuid, True) def _do_soft_delete(self, context, instance, bdms, local=False): if local: @@ -2513,7 +2518,7 @@ class API(base.Base): instance.save() else: self.compute_rpcapi.soft_delete_instance(context, instance) - self._update_queued_for_deletion(context, instance, True) + self._update_queued_for_deletion(context, instance.uuid, True) # NOTE(maoy): we allow delete to be called no matter what vm_state says. @check_instance_lock @@ -2566,7 +2571,7 @@ class API(base.Base): instance.task_state = None instance.deleted_at = None instance.save(expected_task_state=[None]) - self._update_queued_for_deletion(context, instance, False) + self._update_queued_for_deletion(context, instance.uuid, False) @check_instance_lock @check_instance_state(task_state=None, |