From 397e49a5e673085ea45097925b716c10df6a4aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aija=20Jaunt=C4=93va?= Date: Wed, 14 Sep 2022 05:15:51 -0400 Subject: Fix idrac-redfish RAID controller mode conversion PERC 9 and PERC 10 might not be in RAID mode with no or limited RAID support. This fixes to convert any eligible controllers to RAID mode during delete_configuration clean step or deploy step. Story: 2010272 Task: 46199 Change-Id: I5e85df95a66aed9772ae0660b2c85ca3a39b96c7 --- driver-requirements.txt | 2 +- ironic/drivers/modules/drac/raid.py | 82 +++++++++ .../tests/unit/drivers/modules/drac/test_raid.py | 196 ++++++++++++++++++++- ...c-redfish-controller-mode-7b55c58d09240d3c.yaml | 5 + 4 files changed, 281 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml diff --git a/driver-requirements.txt b/driver-requirements.txt index 3725c3ddf..876e817cb 100644 --- a/driver-requirements.txt +++ b/driver-requirements.txt @@ -17,4 +17,4 @@ ansible>=2.7 python-ibmcclient>=0.2.2,<0.3.0 # Dell EMC iDRAC sushy OEM extension -sushy-oem-idrac>=4.0.0,<6.0.0 +sushy-oem-idrac>=5.0.0,<6.0.0 diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py index ae06f0dfa..8bad02bba 100644 --- a/ironic/drivers/modules/drac/raid.py +++ b/ironic/drivers/modules/drac/raid.py @@ -1327,6 +1327,8 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): """Perform post delete_configuration action to commit the config. Clears foreign configuration for all RAID controllers. + If no foreign configuration to clear, then checks if any controllers + can be converted to RAID mode. :param task: a TaskManager instance containing the node to act on. :param raid_configs: a list of dictionaries containing the RAID @@ -1338,7 +1340,15 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): async_proc = DracRedfishRAID._clear_foreign_config(system, task) if async_proc: # Async processing with system rebooting in progress + task.node.set_driver_internal_info( + 'raid_config_substep', 'clear_foreign_config') + task.node.save() return deploy_utils.get_async_step_return_state(task.node) + else: + conv_state = DracRedfishRAID._convert_controller_to_raid_mode( + task) + if conv_state: + return conv_state return return_state @@ -1486,6 +1496,69 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): task_mon.wait(CONF.drac.raid_job_timeout) return False + @staticmethod + def _convert_controller_to_raid_mode(task): + """Convert eligible controllers to RAID mode if not already. + + :param task: a TaskManager instance containing the node to act on + :returns: Return state if there are controllers to convert and + and rebooting, otherwise None. + """ + + system = redfish_utils.get_system(task.node) + task_mons = [] + warning_msg_templ = ( + 'Possibly because `%(pkg)s` is too old. Without newer `%(pkg)s` ' + 'PERC 9 and PERC 10 controllers that are not in RAID mode will ' + 'not be used or have limited RAID support. To avoid that update ' + '`%(pkg)s`') + for storage in system.storage.get_members(): + storage_controllers = None + try: + storage_controllers = storage.controllers + except sushy.exceptions.MissingAttributeError: + # Check if there storage_controllers to separate old iDRAC and + # storage without controller + if storage.storage_controllers: + LOG.warning('%(storage)s does not have controllers for ' + 'node %(node)s' + warning_msg_templ, + {'storage': storage.identity, + 'node': task.node.uuid, + 'pkg': 'iDRAC'}) + continue + except AttributeError: + LOG.warning('%(storage)s does not have controllers attribute. ' + + warning_msg_templ, {'storage': storage.identity, + 'pkg': 'sushy'}) + return None + if storage_controllers: + controller = storage.controllers.get_members()[0] + try: + oem_controller = controller.get_oem_extension('Dell') + except sushy.exceptions.ExtensionError as ee: + LOG.warning('Failed to find extension to convert ' + 'controller to RAID mode. ' + + warning_msg_templ + '. Error: %(err)s', + {'err': ee, 'pkg': 'sushy-oem-idrac'}) + return None + task_mon = oem_controller.convert_to_raid() + if task_mon: + task_mons.append(task_mon) + + if task_mons: + deploy_utils.set_async_step_flags( + task.node, + reboot=True, + skip_current_step=True, + polling=True) + + task.upgrade_lock() + task.node.set_driver_internal_info( + 'raid_task_monitor_uris', + [tm.task_monitor_uri for tm in task_mons]) + task.node.save() + return deploy_utils.reboot_to_finish_step(task) + @METRICS.timer('DracRedfishRAID._query_raid_tasks_status') @periodics.node_periodic( purpose='checking async RAID tasks', @@ -1545,6 +1618,15 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): else: # all tasks completed and none of them failed node.del_driver_internal_info('raid_task_monitor_uris') + substep = node.driver_internal_info.get( + 'raid_config_substep') + if substep == 'clear_foreign_config': + node.del_driver_internal_info('raid_config_substep') + node.save() + res = DracRedfishRAID._convert_controller_to_raid_mode( + task) + if res: # New tasks submitted + return self._set_success(task) node.save() diff --git a/ironic/tests/unit/drivers/modules/drac/test_raid.py b/ironic/tests/unit/drivers/modules/drac/test_raid.py index 780d2893c..acbd009d3 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_raid.py +++ b/ironic/tests/unit/drivers/modules/drac/test_raid.py @@ -2457,13 +2457,145 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): self.assertEqual(False, result) mock_log.assert_called_once() + @mock.patch.object(deploy_utils, 'reboot_to_finish_step', + autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode( + self, mock_get_system, mock_reboot): + mock_task_mon = mock.Mock(task_monitor_uri='/TaskService/1') + mock_oem_controller = mock.Mock() + mock_oem_controller.convert_to_raid.return_value = mock_task_mon + mock_controller = mock.Mock() + mock_controller.get_oem_extension.return_value = mock_oem_controller + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertEqual( + ['/TaskService/1'], + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertEqual(mock_reboot.return_value, result) + + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_no_conversion( + self, mock_get_system): + mock_oem_controller = mock.Mock() + mock_oem_controller.convert_to_raid.return_value = None + mock_controller = mock.Mock() + mock_controller.get_oem_extension.return_value = mock_oem_controller + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_not_raid( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(storage_controllers=None) + mock_controllers = mock.PropertyMock( + side_effect=sushy.exceptions.MissingAttributeError) + type(mock_storage).controllers = mock_controllers + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_not_called() + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_idrac( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(storage_controllers=mock.Mock()) + mock_controllers = mock.PropertyMock( + side_effect=sushy.exceptions.MissingAttributeError) + type(mock_storage).controllers = mock_controllers + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_called_once() + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_sushy( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(spec=[]) + mock_storage.identity = "Storage 1" + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_called_once() + + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_sushy_oem( + self, mock_get_system): + mock_controller = mock.Mock() + mock_controller.get_oem_extension.side_effect =\ + sushy.exceptions.ExtensionError + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) @mock.patch.object(deploy_utils, 'get_async_step_return_state', autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True) def test_post_delete_configuration_foreign_async( self, mock_get_system, mock_build_agent_options, - mock_get_async_step_return_state): + mock_get_async_step_return_state, mock_convert): fake_oem_system = mock.Mock() fake_system = mock.Mock() fake_system.get_oem_extension.return_value = fake_oem_system @@ -2497,9 +2629,13 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): mock_get_async_step_return_state.assert_called_once_with(task.node) mock_task_mon1.wait.assert_not_called() mock_task_mon2.wait.assert_not_called() + mock_convert.assert_not_called() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True) - def test_post_delete_configuration_foreign_sync(self, mock_get_system): + def test_post_delete_configuration_foreign_sync( + self, mock_get_system, mock_convert): fake_oem_system = mock.Mock() fake_system = mock.Mock() fake_system.get_oem_extension.return_value = fake_oem_system @@ -2520,15 +2656,34 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): mock_task_mon2.get_task.return_value = mock_task2 fake_oem_system.clear_foreign_config.return_value = [ mock_task_mon1, mock_task_mon2] + mock_convert_state = mock.Mock() + mock_convert.return_value = mock_convert_state result = self.raid.post_delete_configuration( task, None, return_state=mock_return_state1) - self.assertEqual(result, mock_return_state1) + self.assertEqual(result, mock_convert_state) fake_oem_system.clear_foreign_config.assert_called_once() mock_task_mon1.wait.assert_called_once_with(CONF.drac.raid_job_timeout) mock_task_mon2.wait.assert_not_called() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) + @mock.patch.object(drac_raid.DracRedfishRAID, + '_clear_foreign_config', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test_post_delete_configuration_no_subtasks( + self, mock_get_system, mock_foreign, mock_convert): + mock_foreign.return_value = False + mock_convert.return_value = None + task = mock.Mock(node=self.node, context=self.context) + mock_return_state1 = mock.Mock() + + result = self.raid.post_delete_configuration( + task, None, return_state=mock_return_state1) + + self.assertEqual(mock_return_state1, result) + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) def test__clear_foreign_config_attribute_error(self, mock_log): fake_oem_system = mock.Mock(spec=[]) @@ -2682,6 +2837,41 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): task.node.driver_internal_info.get('raid_task_monitor_uris')) self.raid._set_failed.assert_called_once() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + def test__check_raid_tasks_status_convert_controller( + self, mock_get_task_monitor, mock_convert): + driver_internal_info = { + 'raid_task_monitor_uris': '/TaskService/1', + 'raid_config_substep': 'clear_foreign_config'} + self.node.driver_internal_info = driver_internal_info + self.node.save() + + mock_config_task = mock.Mock() + mock_config_task.task_state = sushy.TASK_STATE_COMPLETED + mock_config_task.task_status = sushy.HEALTH_OK + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_config_task + mock_get_task_monitor.return_value = mock_task_monitor + + self.raid._set_success = mock.Mock() + self.raid._set_failed = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.raid._check_raid_tasks_status( + task, ['/TaskService/1']) + + mock_convert.assert_called_once_with(task) + self.raid._set_success.assert_not_called() + self.raid._set_failed.assert_not_called() + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone( + task.node.driver_internal_info.get('raid_config_substep')) + @mock.patch.object(manager_utils, 'notify_conductor_resume_deploy', autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', diff --git a/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml b/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml new file mode 100644 index 000000000..bf476dd63 --- /dev/null +++ b/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes ``idrac-redfish`` RAID ``delete_configuration`` step to convert PERC + 9 and PERC 10 controllers to RAID mode if it is not already set. -- cgit v1.2.1