From 5155ade356eb88c9d8b74b33f88835914e2346f5 Mon Sep 17 00:00:00 2001 From: Zhou Hao Date: Wed, 15 Sep 2021 09:01:25 +0800 Subject: [iRMC] Avoid repeatedly resuming clean after creating raid configuration Fixed the bug of repeated resume cleaning due to the value of `fgi_status` not being updated correctly when obtaining the RAID configuration status of the node managed by the `irmc` hardware type. ``` Unexpected error when processing next clean step. TypeError: 'NoneType' object is not subscriptable ``` This `NoneType` error occurs because ironic resumes clean without waiting for IPA to get the clean steps, and then it tries to read the clean steps which should be a list but actually is none. During auto clean, resume clean should be triggered by IPA, but in this case, it is triggered by a iRMC periodic task which checks the progress of running raid config. This error does not occur every time, sometime raid can be configured, auto clean can complete without error and the node can be deployed successfully. Story: #2009206 Task: #43265 Signed-off-by: Zhou Hao Change-Id: I5a1c5708bdc1709e928f0faf7e18396e260dc551 (cherry picked from commit 8a5c672fa96521bd7d340c64b0c6ae2785643951) --- ironic/drivers/modules/irmc/raid.py | 2 +- .../drivers/modules/irmc/test_periodic_task.py | 38 ++++++++++++++++++++++ ...repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml | 7 ++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml diff --git a/ironic/drivers/modules/irmc/raid.py b/ironic/drivers/modules/irmc/raid.py index 34d1c3f38..25a856e8c 100644 --- a/ironic/drivers/modules/irmc/raid.py +++ b/ironic/drivers/modules/irmc/raid.py @@ -483,6 +483,7 @@ class IRMCRAID(base.RAIDInterface): if all(fgi_status == 'Idle' for fgi_status in fgi_status_dict.values()): raid_config.update({'fgi_status': RAID_COMPLETED}) + raid_common.update_raid_info(node, raid_config) LOG.info('RAID configuration has completed on ' 'node %(node)s with fgi_status is %(fgi)s', {'node': node_uuid, 'fgi': RAID_COMPLETED}) @@ -506,5 +507,4 @@ class IRMCRAID(base.RAIDInterface): task.process_event('fail') def _resume_cleaning(self, task): - raid_common.update_raid_info(task.node, task.node.raid_config) manager_utils.notify_conductor_resume_clean(task) diff --git a/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py b/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py index 6bda0fee6..865f58962 100644 --- a/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py +++ b/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py @@ -308,3 +308,41 @@ class iRMCPeriodicTaskTestCase(test_common.BaseIRMCTest): clean_fail_mock.assert_called_once_with(mock.ANY, task, fgi_status_dict) clean_mock.assert_called_once_with(mock.ANY, task) + + @mock.patch('ironic.drivers.modules.irmc.raid.IRMCRAID._resume_cleaning', + autospec=True) + @mock.patch('ironic.drivers.modules.irmc.raid.IRMCRAID._set_clean_failed', + autospec=True) + @mock.patch('ironic.drivers.modules.irmc.raid._get_fgi_status', + autospec=True) + @mock.patch.object(irmc_common, 'get_irmc_report', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_raid_config_fgi_status_avoid_repeatedly_resume_cleaning( + self, mock_acquire, report_mock, fgi_mock, clean_fail_mock, + clean_mock): + mock_manager = mock.Mock() + raid_config = self.raid_config + fgi_mock.return_value = {'0': 'Idle', '1': 'Idle'} + task = mock.Mock(node=self.node, driver=self.driver) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + task.node.raid_config = raid_config + node_list = [(self.node.uuid, 'irmc', '', raid_config)] + mock_manager.iter_nodes.return_value = node_list + # Set provision state value + task.node.provision_state = 'clean wait' + task.node.save() + task.driver.raid._query_raid_config_fgi_status(mock_manager, + self.context) + raid_config = task.node.raid_config + node_list = [(self.node.uuid, 'irmc', '', raid_config)] + mock_manager.iter_nodes.return_value = node_list + task.node.provision_state = 'clean wait' + task.node.save() + task.driver.raid._query_raid_config_fgi_status(mock_manager, + self.context) + self.assertEqual(0, clean_fail_mock.call_count) + report_mock.assert_called_once_with(task.node) + fgi_mock.assert_called_once_with(report_mock.return_value, + self.node.uuid) + clean_mock.assert_called_once_with(mock.ANY, task) diff --git a/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml b/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml new file mode 100644 index 000000000..02104cc18 --- /dev/null +++ b/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixed the bug of repeated resume cleaning due to the value of + `fgi_status` not being updated correctly when obtaining the + RAID configuration status of the node managed by the `irmc` + hardware type. -- cgit v1.2.1