summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2023-03-24 14:07:59 +0000
committerGerrit Code Review <review@openstack.org>2023-03-24 14:07:59 +0000
commita32c00da948ca4f49188e946f0c77801b563a88b (patch)
tree91b68fbe2a8c1ea57c0c900811c942fae80d3e54
parentad227de245536a3c86b17132136c51670e87e3c5 (diff)
parent92c187a32a826cff45eb2d45d8a5251ede55e7df (diff)
downloadironic-stable/yoga.tar.gz
Merge "Do not move nodes to CLEAN FAILED with empty last_error" into stable/yogastable/yoga
-rw-r--r--ironic/common/states.py3
-rw-r--r--ironic/conductor/cleaning.py26
-rw-r--r--ironic/conductor/manager.py3
-rw-r--r--ironic/conductor/task_manager.py11
-rw-r--r--ironic/conductor/utils.py8
-rw-r--r--ironic/tests/unit/conductor/test_cleaning.py21
-rw-r--r--ironic/tests/unit/conductor/test_manager.py3
-rw-r--r--ironic/tests/unit/conductor/test_utils.py24
-rw-r--r--releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml8
9 files changed, 80 insertions, 27 deletions
diff --git a/ironic/common/states.py b/ironic/common/states.py
index 89b710189..f2238b41b 100644
--- a/ironic/common/states.py
+++ b/ironic/common/states.py
@@ -269,6 +269,9 @@ _FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
"""States where API lookups are permitted with fast track enabled."""
+FAILURE_STATES = frozenset((DEPLOYFAIL, CLEANFAIL, INSPECTFAIL,
+ RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL))
+
##############
# Power states
diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py
index cb801fc8c..8a5b8be36 100644
--- a/ironic/conductor/cleaning.py
+++ b/ironic/conductor/cleaning.py
@@ -245,12 +245,21 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None):
task.process_event(event)
+def get_last_error(node):
+ last_error = _('By request, the clean operation was aborted')
+ if node.clean_step:
+ last_error += (
+ _(' during or after the completion of step "%s"')
+ % conductor_steps.step_id(node.clean_step)
+ )
+ return last_error
+
+
@task_manager.require_exclusive_lock
-def do_node_clean_abort(task, step_name=None):
+def do_node_clean_abort(task):
"""Internal method to abort an ongoing operation.
:param task: a TaskManager instance with an exclusive lock
- :param step_name: The name of the clean step.
"""
node = task.node
try:
@@ -268,12 +277,13 @@ def do_node_clean_abort(task, step_name=None):
set_fail_state=False)
return
+ last_error = get_last_error(node)
info_message = _('Clean operation aborted for node %s') % node.uuid
- last_error = _('By request, the clean operation was aborted')
- if step_name:
- msg = _(' after the completion of step "%s"') % step_name
- last_error += msg
- info_message += msg
+ if node.clean_step:
+ info_message += (
+ _(' during or after the completion of step "%s"')
+ % node.clean_step
+ )
node.last_error = last_error
node.clean_step = None
@@ -315,7 +325,7 @@ def continue_node_clean(task):
target_state = None
task.process_event('fail', target_state=target_state)
- do_node_clean_abort(task, step_name)
+ do_node_clean_abort(task)
return
LOG.debug('The cleaning operation for node %(node)s was '
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 7f6470e38..a4fed6aff 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -1331,7 +1331,8 @@ class ConductorManager(base_manager.BaseConductorManager):
callback=self._spawn_worker,
call_args=(cleaning.do_node_clean_abort, task),
err_handler=utils.provisioning_error_handler,
- target_state=target_state)
+ target_state=target_state,
+ last_error=cleaning.get_last_error(node))
return
if node.provision_state == states.RESCUEWAIT:
diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py
index 509c9ce92..922e74cf6 100644
--- a/ironic/conductor/task_manager.py
+++ b/ironic/conductor/task_manager.py
@@ -527,7 +527,8 @@ class TaskManager(object):
self.release_resources()
def process_event(self, event, callback=None, call_args=None,
- call_kwargs=None, err_handler=None, target_state=None):
+ call_kwargs=None, err_handler=None, target_state=None,
+ last_error=None):
"""Process the given event for the task's current state.
:param event: the name of the event to process
@@ -540,6 +541,8 @@ class TaskManager(object):
prev_target_state)
:param target_state: if specified, the target provision state for the
node. Otherwise, use the target state from the fsm
+ :param last_error: last error to set on the node together with
+ the state transition.
:raises: InvalidState if the event is not allowed by the associated
state machine
"""
@@ -572,13 +575,15 @@ class TaskManager(object):
# set up the async worker
if callback:
- # clear the error if we're going to start work in a callback
- self.node.last_error = None
+ # update the error if we're going to start work in a callback
+ self.node.last_error = last_error
if call_args is None:
call_args = ()
if call_kwargs is None:
call_kwargs = {}
self.spawn_after(callback, *call_args, **call_kwargs)
+ elif last_error is not None:
+ self.node.last_error = last_error
# publish the state transition by saving the Node
self.node.save()
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 643bc4d10..b43de8e55 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -302,9 +302,11 @@ def node_power_action(task, new_state, timeout=None):
# Set the target_power_state and clear any last_error, if we're
# starting a new operation. This will expose to other processes
- # and clients that work is in progress.
- node['target_power_state'] = target_state
- node['last_error'] = None
+ # and clients that work is in progress. Keep the last_error intact
+ # if the power action happens as a result of a failure.
+ node.target_power_state = target_state
+ if node.provision_state not in states.FAILURE_STATES:
+ node.last_error = None
node.timestamp_driver_internal_info('last_power_state_change')
# NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in
# fast-track (or an accidentally booted agent) will cause subsequent
diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py
index 65261450a..8e961eb92 100644
--- a/ironic/tests/unit/conductor/test_cleaning.py
+++ b/ironic/tests/unit/conductor/test_cleaning.py
@@ -1124,12 +1124,12 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
class DoNodeCleanAbortTestCase(db_base.DbTestCase):
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
- def _test__do_node_clean_abort(self, step_name, tear_mock):
+ def _test_do_node_clean_abort(self, clean_step, tear_mock):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
- provision_state=states.CLEANFAIL,
+ provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE,
- clean_step={'step': 'foo', 'abortable': True},
+ clean_step=clean_step,
driver_internal_info={
'agent_url': 'some url',
'agent_secret_token': 'token',
@@ -1139,11 +1139,11 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
'skip_current_clean_step': True})
with task_manager.acquire(self.context, node.uuid) as task:
- cleaning.do_node_clean_abort(task, step_name=step_name)
+ cleaning.do_node_clean_abort(task)
self.assertIsNotNone(task.node.last_error)
tear_mock.assert_called_once_with(task.driver.deploy, task)
- if step_name:
- self.assertIn(step_name, task.node.last_error)
+ if clean_step:
+ self.assertIn(clean_step['step'], task.node.last_error)
# assert node's clean_step and metadata was cleaned up
self.assertEqual({}, task.node.clean_step)
self.assertNotIn('clean_step_index',
@@ -1159,11 +1159,12 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
self.assertNotIn('agent_secret_token',
task.node.driver_internal_info)
- def test__do_node_clean_abort(self):
- self._test__do_node_clean_abort(None)
+ def test_do_node_clean_abort_early(self):
+ self._test_do_node_clean_abort(None)
- def test__do_node_clean_abort_with_step_name(self):
- self._test__do_node_clean_abort('foo')
+ def test_do_node_clean_abort_with_step(self):
+ self._test_do_node_clean_abort({'step': 'foo', 'interface': 'deploy',
+ 'abortable': True})
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def test__do_node_clean_abort_tear_down_fail(self, tear_mock):
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index f49633364..60c5a6201 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -2716,7 +2716,8 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin,
# Node will be moved to tgt_prov_state after cleaning, not tested here
self.assertEqual(states.CLEANFAIL, node.provision_state)
self.assertEqual(tgt_prov_state, node.target_provision_state)
- self.assertIsNone(node.last_error)
+ self.assertEqual('By request, the clean operation was aborted',
+ node.last_error)
mock_spawn.assert_called_with(
self.service, cleaning.do_node_clean_abort, mock.ANY)
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index 3efc3052b..3b19f456a 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -196,7 +196,8 @@ class NodePowerActionTestCase(db_base.DbTestCase):
node = obj_utils.create_test_node(self.context,
uuid=uuidutils.generate_uuid(),
driver='fake-hardware',
- power_state=states.POWER_OFF)
+ power_state=states.POWER_OFF,
+ last_error='failed before')
task = task_manager.TaskManager(self.context, node.uuid)
get_power_mock.return_value = states.POWER_OFF
@@ -209,6 +210,27 @@ class NodePowerActionTestCase(db_base.DbTestCase):
self.assertIsNone(node['target_power_state'])
self.assertIsNone(node['last_error'])
+ @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
+ def test_node_power_action_keep_last_error(self, get_power_mock):
+ """Test node_power_action to keep last_error for failed states."""
+ node = obj_utils.create_test_node(self.context,
+ uuid=uuidutils.generate_uuid(),
+ driver='fake-hardware',
+ power_state=states.POWER_OFF,
+ provision_state=states.CLEANFAIL,
+ last_error='failed before')
+ task = task_manager.TaskManager(self.context, node.uuid)
+
+ get_power_mock.return_value = states.POWER_OFF
+
+ conductor_utils.node_power_action(task, states.POWER_ON)
+
+ node.refresh()
+ get_power_mock.assert_called_once_with(mock.ANY, mock.ANY)
+ self.assertEqual(states.POWER_ON, node['power_state'])
+ self.assertIsNone(node['target_power_state'])
+ self.assertEqual('failed before', node['last_error'])
+
@mock.patch('ironic.objects.node.NodeSetPowerStateNotification',
autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
diff --git a/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
new file mode 100644
index 000000000..270278f1b
--- /dev/null
+++ b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ When aborting cleaning, the ``last_error`` field is no longer initially
+ empty. It is now populated on the state transition to ``clean failed``.
+ - |
+ When cleaning or deployment fails, the ``last_error`` field is no longer
+ temporary set to ``None`` while the power off action is running.