compute: Don't detach volumes when RescheduledException raised without retry

I8b1c05317734e14ea73dc868941351bb31210bf0 introduced a crude call to _cleanup_volumes within _do_build_and_run_instance when handling a RescheduledException exception raised from _build_and_run_instance without any retry information provided from the scheduler. This situation can arise when using the 'availability_zone' parameter to skip the scheduler by providing both a target availability_zone and host in the format of `$availability_zone:$host`. If the instance is unable to build on the compute the failure will eventually lead to _cleanup_volumes calling DriverVolumeBlockDevice.detach that will either detach (cinderv2) or delete the associated volume attachments (cinderv3) moving the volume to an `available` state, assuming it isn't multi-attached etc. The issue with this is that this behaviour is in stark contrast to that of volumes associated with instances that have failed to schedule. In that case the volumes remain marked as reserved and associated with the ERROR'd out instance until the instance itself is deleted. This change aims to align both cases by removing the call to _cleanup_volumes and in doing so keeping any volumes in a `reserved` state until the underlying instance is deleted. Note that leaving these volumes associated with ERROR'd out instances is now safe after I4dc6c8bd3bb6c135f8a698af41f5d0e026c39117 landed and now ensures that ports and volumes associated with such an instance are correctly cleaned up. Closes-Bug: #1899649 Change-Id: I5dda9e8bca5fbaae77ece12b67176945ca4d9a4c (cherry picked from commit 26c46a409fa3a75f11fe0ecfc3cf1a8e77da8f51)
author: Lee Yarwood <lyarwood@redhat.com> 2020-10-13 16:01:19 +0100
committer: Lee Yarwood <lyarwood@redhat.com> 2020-12-07 09:42:53 +0000
commit: eda458828bf44a6b17503f407448dce9133bfd36 (patch)
tree: 9be505a5dd7cfabfdb3e007dd9c74c08337d0076
parent: 95fc16133406da6bb07f9f8ffc21e99f1e4695f4 (diff)
download: nova-eda458828bf44a6b17503f407448dce9133bfd36.tar.gz
3 files changed, 4 insertions, 20 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 8b6041945f..d1ad933b76 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -2230,8 +2230,6 @@ class ComputeManager(manager.Manager):
                     instance=instance)
                 self._cleanup_allocated_networks(context, instance,
                     requested_networks)
-                self._cleanup_volumes(context, instance,
-                    block_device_mapping, raise_exc=False)
                 compute_utils.add_instance_fault_from_exc(context,
                         instance, e, sys.exc_info(),
                         fault_message=e.kwargs['reason'])
diff --git a/nova/tests/functional/regressions/test_bug_1899649.py b/nova/tests/functional/regressions/test_bug_1899649.py
index 3029046587..be75ea947f 100644
--- a/nova/tests/functional/regressions/test_bug_1899649.py
+++ b/nova/tests/functional/regressions/test_bug_1899649.py
@@ -92,19 +92,9 @@ class TestVolAttachmentsAfterFailureToScheduleOrBuild(base.ServersTestBase):
         self._assert_failure_and_volume_attachments(server)
 
     def test_failure_to_build_with_az_and_host(self):
-        # Assert that a volume attachments does not remain after a failure to
+        # Assert that a volume attachments remain after a failure to
         # build and reschedule by providing an availability_zone *and* host,
         # skipping the scheduler. This is bug #1899649.
         self.server['availability_zone'] = 'nova:compute1'
         server = self.admin_api.post_server({'server': self.server})
-
-        # Assert the server ends up in an ERROR state
-        self._wait_for_state_change(server, 'ERROR')
-
-        # FIXME(lyarwood): A single volume attachment should be present for the
-        # instance at this stage as the volume *can* otherwise be marked as
-        # available within Cinder if it isn't multi-attached.
-        attachments = self.cinder.volume_to_attachment.get(self.volume_id)
-        self.assertEqual(0, len(attachments))
-        self.assertNotIn(
-            self.volume_id, self.cinder.volume_ids_for_instance(server['id']))
+        self._assert_failure_and_volume_attachments(server)
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 3265f8056b..af21ffc899 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -6875,14 +6875,13 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
     @mock.patch.object(objects.Instance, 'save')
     @mock.patch.object(manager.ComputeManager,
                        '_nil_out_instance_obj_host_and_node')
-    @mock.patch.object(manager.ComputeManager, '_cleanup_volumes')
     @mock.patch.object(manager.ComputeManager, '_cleanup_allocated_networks')
     @mock.patch.object(manager.ComputeManager, '_set_instance_obj_error_state')
     @mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
     @mock.patch.object(manager.ComputeManager, '_build_and_run_instance')
     def test_rescheduled_exception_without_retry(self,
-            mock_build_run, mock_add, mock_set, mock_clean_net, mock_clean_vol,
-            mock_nil, mock_save, mock_start, mock_finish):
+            mock_build_run, mock_add, mock_set, mock_clean_net, mock_nil,
+            mock_save, mock_start, mock_finish):
         self._do_build_instance_update(mock_save)
         mock_build_run.side_effect = exception.RescheduledException(reason='',
                 instance_uuid=self.instance.uuid)
@@ -6918,9 +6917,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                 self.accel_uuids)
         mock_clean_net.assert_called_once_with(self.context, self.instance,
                 self.requested_networks)
-        mock_clean_vol.assert_called_once_with(self.context,
-                self.instance, self.block_device_mapping,
-                raise_exc=False)
         mock_add.assert_called_once_with(self.context, self.instance,
                 mock.ANY, mock.ANY, fault_message=mock.ANY)
         mock_nil.assert_called_once_with(self.instance)
author	Lee Yarwood <lyarwood@redhat.com>	2020-10-13 16:01:19 +0100
committer	Lee Yarwood <lyarwood@redhat.com>	2020-12-07 09:42:53 +0000
commit	eda458828bf44a6b17503f407448dce9133bfd36 (patch)
tree	9be505a5dd7cfabfdb3e007dd9c74c08337d0076
parent	95fc16133406da6bb07f9f8ffc21e99f1e4695f4 (diff)
download	nova-eda458828bf44a6b17503f407448dce9133bfd36.tar.gz