diff options
author | Julia Kreger <juliaashleykreger@gmail.com> | 2015-03-26 15:17:41 -0400 |
---|---|---|
committer | Julia Kreger <juliaashleykreger@gmail.com> | 2015-04-07 07:14:32 -0400 |
commit | 59ccf51c179a92a1e9541fa54715ace8b110de91 (patch) | |
tree | f23147fd5b38bc634ffaa3431de404b47783da6e | |
parent | fb0cc8f86f1328ba2c22832c40d6a4664bca3742 (diff) | |
download | ironic-59ccf51c179a92a1e9541fa54715ace8b110de91.tar.gz |
Add retry logic to _exec_ipmitool
When using bridged IPMI buses, the possibility exists for the BMC
to return messages indicating that it is too busy to service the
request at that moment. This results in ipmitool returning an
error to the user instead of retrying the operation.
As a result of the error being returned, the current operation
being attempted by ironic is failed and is never retried as the
present retry configuration is purely a pass-through to ipmitool.
Added logic to allow for the identification of errors returned by
ipmitool that can be retried coupled with logic to retry the failed
operation until the configured retry window has passed.
Included three tests which are geared to exercise the retry logic
and ensure that the retry logic works as expected, including that
the retry operations are only performed a specific number of times
based on the conditions.
Updated one of the pre-existing tests that tested _exec_ipmitool
act of returning an exception to ensure that the retry logic is
executed only once.
Change-Id: I698a8ff1cb70d7758ed05e59b858d9e40708dd05
Closes-Bug: 1431929
-rw-r--r-- | ironic/drivers/modules/ipmitool.py | 72 | ||||
-rw-r--r-- | ironic/tests/drivers/test_ipmitool.py | 80 |
2 files changed, 138 insertions, 14 deletions
diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index 2826dd1e7..55fc1fa2d 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -111,6 +111,9 @@ ipmitool_command_options = { 'dual_bridge': ['ipmitool', '-m', '0', '-b', '0', '-t', '0', '-B', '0', '-T', '0', '-h']} +# Note(TheJulia): This string is hardcoded in ipmitool's lanplus driver. +IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session'] + def _check_option_support(options): """Checks if the specific ipmitool options are supported on host. @@ -335,32 +338,68 @@ def _exec_ipmitool(driver_info, command): args.append(driver_info[name]) # specify retry timing more precisely, if supported + num_tries = max( + (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1) + if _is_option_supported('timing'): - num_tries = max( - (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1) args.append('-R') args.append(str(num_tries)) args.append('-N') args.append(str(CONF.ipmi.min_command_interval)) - # 'ipmitool' command will prompt password if there is no '-f' option, - # we set it to '\0' to write a password file to support empty password - with _make_password_file(driver_info['password'] or '\0') as pw_file: - args.append('-f') - args.append(pw_file) - args.extend(command.split(" ")) + end_time = (_time() + CONF.ipmi.retry_timeout) + + while True: + num_tries = num_tries - 1 # NOTE(deva): ensure that no communications are sent to a BMC more # often than once every min_command_interval seconds. time_till_next_poll = CONF.ipmi.min_command_interval - ( - time.time() - LAST_CMD_TIME.get(driver_info['address'], 0)) + _time() - LAST_CMD_TIME.get(driver_info['address'], 0)) if time_till_next_poll > 0: time.sleep(time_till_next_poll) - try: - out, err = utils.execute(*args) - finally: - LAST_CMD_TIME[driver_info['address']] = time.time() - return out, err + # Resetting the list that will be utilized so the password arguments + # from any previous execution are preserved. + cmd_args = args[:] + # 'ipmitool' command will prompt password if there is no '-f' + # option, we set it to '\0' to write a password file to support + # empty password + with _make_password_file( + driver_info['password'] or '\0' + ) as pw_file: + cmd_args.append('-f') + cmd_args.append(pw_file) + cmd_args.extend(command.split(" ")) + try: + out, err = utils.execute(*cmd_args) + return out, err + except processutils.ProcessExecutionError as e: + with excutils.save_and_reraise_exception() as ctxt: + err_list = [x for x in IPMITOOL_RETRYABLE_FAILURES + if x in e.message] + if ((_time() > end_time) or + (num_tries == 0) or + not err_list): + LOG.error(_LE('IPMI Error attempting to execute ' + '"%(cmd)s" for node %(node)s. ' + 'Error: %(error)s'), + { + 'node': driver_info['uuid'], + 'cmd': e.cmd, + 'error': e + }) + else: + ctxt.reraise = False + LOG.warning(_LW('IPMI Error encountered, retrying ' + '"%(cmd)s" for node %(node)s ' + 'Error: %(error)s'), + { + 'node': driver_info['uuid'], + 'cmd': e.cmd, + 'error': e + }) + finally: + LAST_CMD_TIME[driver_info['address']] = _time() def _sleep_time(iter): @@ -587,6 +626,11 @@ def send_raw(task, raw_bytes): raise exception.IPMIFailure(cmd=cmd) +def _time(): + """Wrapper for time.time() enabling simplified unit testing.""" + return time.time() + + class IPMIPower(base.PowerInterface): def __init__(self): diff --git a/ironic/tests/drivers/test_ipmitool.py b/ironic/tests/drivers/test_ipmitool.py index a27060d0f..0828f2c60 100644 --- a/ironic/tests/drivers/test_ipmitool.py +++ b/ironic/tests/drivers/test_ipmitool.py @@ -840,6 +840,86 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): mock_support.assert_called_once_with('timing') mock_pwf.assert_called_once_with(self.info['password']) mock_exec.assert_called_once_with(*args) + self.assertEqual(1, mock_exec.call_count) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test__exec_ipmitool_exception_retry(self, + mock_exec, mock_support, mock_sleep): + + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + mock_exec.side_effect = iter([ + processutils.ProcessExecutionError( + stderr="insufficient resources for session" + ), + (None, None) + ]) + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to retry twice. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=2, group='ipmi') + + ipmi._exec_ipmitool(self.info, 'A B C') + + mock_support.assert_called_once_with('timing') + self.assertEqual(2, mock_exec.call_count) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test__exec_ipmitool_exception_retries_exceeded(self, + mock_exec, mock_support, mock_sleep): + + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + + mock_exec.side_effect = processutils.ProcessExecutionError( + stderr="insufficient resources for session" + ) + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to timeout. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=1, group='ipmi') + + self.assertRaises(processutils.ProcessExecutionError, + ipmi._exec_ipmitool, + self.info, 'A B C') + mock_support.assert_called_once_with('timing') + self.assertEqual(1, mock_exec.call_count) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test__exec_ipmitool_exception_retry_failure_unhandable(self, + mock_exec, mock_support, mock_sleep): + + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + + # Return a retryable error, then a error that cannot + # be retryable thus resulting in a single retry + # attempt by _exec_ipmitool that is successful. + mock_exec.side_effect = iter([ + processutils.ProcessExecutionError( + stderr="insufficient resources for session" + ), + processutils.ProcessExecutionError( + "Unknown" + ), + ]) + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to retry up + # to 3 times. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=3, group='ipmi') + + self.assertRaises(processutils.ProcessExecutionError, + ipmi._exec_ipmitool, + self.info, 'A B C') + mock_support.assert_called_once_with('timing') + self.assertEqual(2, mock_exec.call_count) @mock.patch.object(ipmi, '_exec_ipmitool', autospec=True) def test__power_status_on(self, mock_exec, mock_sleep): |