summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ironic/drivers/modules/agent_base.py17
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_base.py45
-rw-r--r--releasenotes/notes/agent-power-off-2115fcfaac030bd0.yaml5
3 files changed, 62 insertions, 5 deletions
diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py
index 0cdd85470..61ef16451 100644
--- a/ironic/drivers/modules/agent_base.py
+++ b/ironic/drivers/modules/agent_base.py
@@ -1015,16 +1015,25 @@ class AgentDeployMixin(HeartbeatMixin):
if not oob_power_off:
try:
self._client.power_off(node)
- _wait_until_powered_off(task)
except Exception as e:
- LOG.warning('Failed to soft power off node %(node_uuid)s '
- 'in at least %(timeout)d seconds. '
+ LOG.warning('Failed to soft power off node %(node_uuid)s. '
'%(cls)s: %(error)s',
{'node_uuid': node.uuid,
- 'timeout': (wait * (attempts - 1)) / 1000,
'cls': e.__class__.__name__, 'error': e},
exc_info=not isinstance(
e, exception.IronicException))
+
+ # NOTE(dtantsur): in rare cases it may happen that the power
+ # off request comes through but we never receive the response.
+ # Check the power state before trying to force off.
+ try:
+ _wait_until_powered_off(task)
+ except Exception:
+ LOG.warning('Failed to soft power off node %(node_uuid)s '
+ 'in at least %(timeout)d seconds. Forcing '
+ 'hard power off and proceeding.',
+ {'node_uuid': node.uuid,
+ 'timeout': (wait * (attempts - 1)) / 1000})
manager_utils.node_power_action(task, states.POWER_OFF)
else:
# Flush the file system prior to hard rebooting the node
diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py
index 31a4ade11..d684fda9b 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_base.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_base.py
@@ -926,7 +926,10 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
@mock.patch.object(manager_utils, 'notify_conductor_resume_deploy',
autospec=True)
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
+ @mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
+ @mock.patch.object(fake.FakePower, 'get_power_state',
+ spec=types.FunctionType)
@mock.patch.object(agent_client.AgentClient, 'power_off',
spec=types.FunctionType)
@mock.patch('ironic.drivers.modules.network.noop.NoopNetwork.'
@@ -935,13 +938,15 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
'configure_tenant_networks', spec_set=True, autospec=True)
def test_reboot_and_finish_deploy_soft_poweroff_fails(
self, configure_tenant_net_mock, remove_provisioning_net_mock,
- power_off_mock, node_power_action_mock, mock_collect, resume_mock):
+ power_off_mock, get_power_state_mock, node_power_action_mock,
+ mock_collect, resume_mock):
power_off_mock.side_effect = RuntimeError("boom")
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
+ get_power_state_mock.return_value = states.POWER_ON
self.deploy.reboot_and_finish_deploy(task)
power_off_mock.assert_called_once_with(task.node)
node_power_action_mock.assert_has_calls([
@@ -954,6 +959,44 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
self.assertFalse(mock_collect.called)
+ @mock.patch.object(manager_utils, 'notify_conductor_resume_deploy',
+ autospec=True)
+ @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
+ @mock.patch.object(time, 'sleep', lambda seconds: None)
+ @mock.patch.object(manager_utils, 'node_power_action', autospec=True)
+ @mock.patch.object(fake.FakePower, 'get_power_state',
+ spec=types.FunctionType)
+ @mock.patch.object(agent_client.AgentClient, 'power_off',
+ spec=types.FunctionType)
+ @mock.patch('ironic.drivers.modules.network.noop.NoopNetwork.'
+ 'remove_provisioning_network', spec_set=True, autospec=True)
+ @mock.patch('ironic.drivers.modules.network.noop.NoopNetwork.'
+ 'configure_tenant_networks', spec_set=True, autospec=True)
+ def test_reboot_and_finish_deploy_soft_poweroff_race(
+ self, configure_tenant_net_mock, remove_provisioning_net_mock,
+ power_off_mock, get_power_state_mock, node_power_action_mock,
+ mock_collect, resume_mock):
+ # Test the situation when soft power off works, but ironic doesn't
+ # learn about it.
+ power_off_mock.side_effect = RuntimeError("boom")
+ self.node.provision_state = states.DEPLOYING
+ self.node.target_provision_state = states.ACTIVE
+ self.node.save()
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=True) as task:
+ get_power_state_mock.side_effect = [states.POWER_ON,
+ states.POWER_OFF]
+ self.deploy.reboot_and_finish_deploy(task)
+ power_off_mock.assert_called_once_with(task.node)
+ node_power_action_mock.assert_called_once_with(
+ task, states.POWER_ON)
+ remove_provisioning_net_mock.assert_called_once_with(mock.ANY,
+ task)
+ configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
+ self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
+ self.assertEqual(states.ACTIVE, task.node.target_provision_state)
+ self.assertFalse(mock_collect.called)
+
@mock.patch.object(manager_utils, 'power_on_node_if_needed')
@mock.patch.object(manager_utils, 'notify_conductor_resume_deploy',
autospec=True)
diff --git a/releasenotes/notes/agent-power-off-2115fcfaac030bd0.yaml b/releasenotes/notes/agent-power-off-2115fcfaac030bd0.yaml
new file mode 100644
index 000000000..74f84fbab
--- /dev/null
+++ b/releasenotes/notes/agent-power-off-2115fcfaac030bd0.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ Fixes a rare issue when agent successfully powers off a node after
+ deployment, but ironic never learns about it and does another reboot.