diff options
author | Lee Yarwood <lyarwood@redhat.com> | 2020-10-13 16:01:19 +0100 |
---|---|---|
committer | Lee Yarwood <lyarwood@redhat.com> | 2020-12-07 09:42:53 +0000 |
commit | eda458828bf44a6b17503f407448dce9133bfd36 (patch) | |
tree | 9be505a5dd7cfabfdb3e007dd9c74c08337d0076 | |
parent | 95fc16133406da6bb07f9f8ffc21e99f1e4695f4 (diff) | |
download | nova-eda458828bf44a6b17503f407448dce9133bfd36.tar.gz |
compute: Don't detach volumes when RescheduledException raised without retry
I8b1c05317734e14ea73dc868941351bb31210bf0 introduced a crude call to
_cleanup_volumes within _do_build_and_run_instance when handling a
RescheduledException exception raised from _build_and_run_instance
without any retry information provided from the scheduler.
This situation can arise when using the 'availability_zone' parameter to
skip the scheduler by providing both a target availability_zone and host
in the format of `$availability_zone:$host`. If the instance is unable
to build on the compute the failure will eventually lead to
_cleanup_volumes calling DriverVolumeBlockDevice.detach that will either
detach (cinderv2) or delete the associated volume attachments (cinderv3)
moving the volume to an `available` state, assuming it isn't
multi-attached etc.
The issue with this is that this behaviour is in stark contrast to that
of volumes associated with instances that have failed to schedule. In
that case the volumes remain marked as reserved and associated with the
ERROR'd out instance until the instance itself is deleted.
This change aims to align both cases by removing the call to
_cleanup_volumes and in doing so keeping any volumes in a `reserved`
state until the underlying instance is deleted.
Note that leaving these volumes associated with ERROR'd out instances is
now safe after I4dc6c8bd3bb6c135f8a698af41f5d0e026c39117 landed and now
ensures that ports and volumes associated with such an instance are
correctly cleaned up.
Closes-Bug: #1899649
Change-Id: I5dda9e8bca5fbaae77ece12b67176945ca4d9a4c
(cherry picked from commit 26c46a409fa3a75f11fe0ecfc3cf1a8e77da8f51)
-rw-r--r-- | nova/compute/manager.py | 2 | ||||
-rw-r--r-- | nova/tests/functional/regressions/test_bug_1899649.py | 14 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_compute_mgr.py | 8 |
3 files changed, 4 insertions, 20 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8b6041945f..d1ad933b76 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2230,8 +2230,6 @@ class ComputeManager(manager.Manager): instance=instance) self._cleanup_allocated_networks(context, instance, requested_networks) - self._cleanup_volumes(context, instance, - block_device_mapping, raise_exc=False) compute_utils.add_instance_fault_from_exc(context, instance, e, sys.exc_info(), fault_message=e.kwargs['reason']) diff --git a/nova/tests/functional/regressions/test_bug_1899649.py b/nova/tests/functional/regressions/test_bug_1899649.py index 3029046587..be75ea947f 100644 --- a/nova/tests/functional/regressions/test_bug_1899649.py +++ b/nova/tests/functional/regressions/test_bug_1899649.py @@ -92,19 +92,9 @@ class TestVolAttachmentsAfterFailureToScheduleOrBuild(base.ServersTestBase): self._assert_failure_and_volume_attachments(server) def test_failure_to_build_with_az_and_host(self): - # Assert that a volume attachments does not remain after a failure to + # Assert that a volume attachments remain after a failure to # build and reschedule by providing an availability_zone *and* host, # skipping the scheduler. This is bug #1899649. self.server['availability_zone'] = 'nova:compute1' server = self.admin_api.post_server({'server': self.server}) - - # Assert the server ends up in an ERROR state - self._wait_for_state_change(server, 'ERROR') - - # FIXME(lyarwood): A single volume attachment should be present for the - # instance at this stage as the volume *can* otherwise be marked as - # available within Cinder if it isn't multi-attached. - attachments = self.cinder.volume_to_attachment.get(self.volume_id) - self.assertEqual(0, len(attachments)) - self.assertNotIn( - self.volume_id, self.cinder.volume_ids_for_instance(server['id'])) + self._assert_failure_and_volume_attachments(server) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 3265f8056b..af21ffc899 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -6875,14 +6875,13 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): @mock.patch.object(objects.Instance, 'save') @mock.patch.object(manager.ComputeManager, '_nil_out_instance_obj_host_and_node') - @mock.patch.object(manager.ComputeManager, '_cleanup_volumes') @mock.patch.object(manager.ComputeManager, '_cleanup_allocated_networks') @mock.patch.object(manager.ComputeManager, '_set_instance_obj_error_state') @mock.patch.object(compute_utils, 'add_instance_fault_from_exc') @mock.patch.object(manager.ComputeManager, '_build_and_run_instance') def test_rescheduled_exception_without_retry(self, - mock_build_run, mock_add, mock_set, mock_clean_net, mock_clean_vol, - mock_nil, mock_save, mock_start, mock_finish): + mock_build_run, mock_add, mock_set, mock_clean_net, mock_nil, + mock_save, mock_start, mock_finish): self._do_build_instance_update(mock_save) mock_build_run.side_effect = exception.RescheduledException(reason='', instance_uuid=self.instance.uuid) @@ -6918,9 +6917,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.accel_uuids) mock_clean_net.assert_called_once_with(self.context, self.instance, self.requested_networks) - mock_clean_vol.assert_called_once_with(self.context, - self.instance, self.block_device_mapping, - raise_exc=False) mock_add.assert_called_once_with(self.context, self.instance, mock.ANY, mock.ANY, fault_message=mock.ANY) mock_nil.assert_called_once_with(self.instance) |