Add retry logic to _exec_ipmitool

When using bridged IPMI buses, the possibility exists for the BMC to return messages indicating that it is too busy to service the request at that moment. This results in ipmitool returning an error to the user instead of retrying the operation. As a result of the error being returned, the current operation being attempted by ironic is failed and is never retried as the present retry configuration is purely a pass-through to ipmitool. Added logic to allow for the identification of errors returned by ipmitool that can be retried coupled with logic to retry the failed operation until the configured retry window has passed. Included three tests which are geared to exercise the retry logic and ensure that the retry logic works as expected, including that the retry operations are only performed a specific number of times based on the conditions. Updated one of the pre-existing tests that tested _exec_ipmitool act of returning an exception to ensure that the retry logic is executed only once. Change-Id: I698a8ff1cb70d7758ed05e59b858d9e40708dd05 Closes-Bug: 1431929
author: Julia Kreger <juliaashleykreger@gmail.com> 2015-03-26 15:17:41 -0400
committer: Julia Kreger <juliaashleykreger@gmail.com> 2015-04-07 07:14:32 -0400
commit: 59ccf51c179a92a1e9541fa54715ace8b110de91 (patch)
tree: f23147fd5b38bc634ffaa3431de404b47783da6e
parent: fb0cc8f86f1328ba2c22832c40d6a4664bca3742 (diff)
download: ironic-59ccf51c179a92a1e9541fa54715ace8b110de91.tar.gz
2 files changed, 138 insertions, 14 deletions
diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py
index 2826dd1e7..55fc1fa2d 100644
--- a/ironic/drivers/modules/ipmitool.py
+++ b/ironic/drivers/modules/ipmitool.py
@@ -111,6 +111,9 @@ ipmitool_command_options = {
     'dual_bridge': ['ipmitool', '-m', '0', '-b', '0', '-t', '0',
                     '-B', '0', '-T', '0', '-h']}
 
+# Note(TheJulia): This string is hardcoded in ipmitool's lanplus driver.
+IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session']
+
 
 def _check_option_support(options):
     """Checks if the specific ipmitool options are supported on host.
@@ -335,32 +338,68 @@ def _exec_ipmitool(driver_info, command):
             args.append(driver_info[name])
 
     # specify retry timing more precisely, if supported
+    num_tries = max(
+        (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1)
+
     if _is_option_supported('timing'):
-        num_tries = max(
-            (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1)
         args.append('-R')
         args.append(str(num_tries))
 
         args.append('-N')
         args.append(str(CONF.ipmi.min_command_interval))
 
-    # 'ipmitool' command will prompt password if there is no '-f' option,
-    # we set it to '\0' to write a password file to support empty password
-    with _make_password_file(driver_info['password'] or '\0') as pw_file:
-        args.append('-f')
-        args.append(pw_file)
-        args.extend(command.split(" "))
+    end_time = (_time() + CONF.ipmi.retry_timeout)
+
+    while True:
+        num_tries = num_tries - 1
         # NOTE(deva): ensure that no communications are sent to a BMC more
         #             often than once every min_command_interval seconds.
         time_till_next_poll = CONF.ipmi.min_command_interval - (
-                time.time() - LAST_CMD_TIME.get(driver_info['address'], 0))
+                _time() - LAST_CMD_TIME.get(driver_info['address'], 0))
         if time_till_next_poll > 0:
             time.sleep(time_till_next_poll)
-        try:
-            out, err = utils.execute(*args)
-        finally:
-            LAST_CMD_TIME[driver_info['address']] = time.time()
-        return out, err
+        # Resetting the list that will be utilized so the password arguments
+        # from any previous execution are preserved.
+        cmd_args = args[:]
+        # 'ipmitool' command will prompt password if there is no '-f'
+        # option, we set it to '\0' to write a password file to support
+        # empty password
+        with _make_password_file(
+                    driver_info['password'] or '\0'
+                ) as pw_file:
+            cmd_args.append('-f')
+            cmd_args.append(pw_file)
+            cmd_args.extend(command.split(" "))
+            try:
+                out, err = utils.execute(*cmd_args)
+                return out, err
+            except processutils.ProcessExecutionError as e:
+                with excutils.save_and_reraise_exception() as ctxt:
+                    err_list = [x for x in IPMITOOL_RETRYABLE_FAILURES
+                                if x in e.message]
+                    if ((_time() > end_time) or
+                        (num_tries == 0) or
+                        not err_list):
+                        LOG.error(_LE('IPMI Error attempting to execute '
+                                  '"%(cmd)s" for node %(node)s. '
+                                  'Error: %(error)s'),
+                                  {
+                                        'node': driver_info['uuid'],
+                                        'cmd': e.cmd,
+                                        'error': e
+                                  })
+                    else:
+                        ctxt.reraise = False
+                        LOG.warning(_LW('IPMI Error encountered, retrying '
+                                    '"%(cmd)s" for node %(node)s '
+                                    'Error: %(error)s'),
+                                    {
+                                        'node': driver_info['uuid'],
+                                        'cmd': e.cmd,
+                                        'error': e
+                                    })
+            finally:
+                LAST_CMD_TIME[driver_info['address']] = _time()
 
 
 def _sleep_time(iter):
@@ -587,6 +626,11 @@ def send_raw(task, raw_bytes):
         raise exception.IPMIFailure(cmd=cmd)
 
 
+def _time():
+    """Wrapper for time.time() enabling simplified unit testing."""
+    return time.time()
+
+
 class IPMIPower(base.PowerInterface):
 
     def __init__(self):
diff --git a/ironic/tests/drivers/test_ipmitool.py b/ironic/tests/drivers/test_ipmitool.py
index a27060d0f..0828f2c60 100644
--- a/ironic/tests/drivers/test_ipmitool.py
+++ b/ironic/tests/drivers/test_ipmitool.py
@@ -840,6 +840,86 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase):
         mock_support.assert_called_once_with('timing')
         mock_pwf.assert_called_once_with(self.info['password'])
         mock_exec.assert_called_once_with(*args)
+        self.assertEqual(1, mock_exec.call_count)
+
+    @mock.patch.object(ipmi, '_is_option_supported', autospec=True)
+    @mock.patch.object(utils, 'execute', autospec=True)
+    def test__exec_ipmitool_exception_retry(self,
+            mock_exec, mock_support, mock_sleep):
+
+        ipmi.LAST_CMD_TIME = {}
+        mock_support.return_value = False
+        mock_exec.side_effect = iter([
+            processutils.ProcessExecutionError(
+                stderr="insufficient resources for session"
+            ),
+            (None, None)
+            ])
+
+        # Directly set the configuration values such that
+        # the logic will cause _exec_ipmitool to retry twice.
+        self.config(min_command_interval=1, group='ipmi')
+        self.config(retry_timeout=2, group='ipmi')
+
+        ipmi._exec_ipmitool(self.info, 'A B C')
+
+        mock_support.assert_called_once_with('timing')
+        self.assertEqual(2, mock_exec.call_count)
+
+    @mock.patch.object(ipmi, '_is_option_supported', autospec=True)
+    @mock.patch.object(utils, 'execute', autospec=True)
+    def test__exec_ipmitool_exception_retries_exceeded(self,
+            mock_exec, mock_support, mock_sleep):
+
+        ipmi.LAST_CMD_TIME = {}
+        mock_support.return_value = False
+
+        mock_exec.side_effect = processutils.ProcessExecutionError(
+            stderr="insufficient resources for session"
+        )
+
+        # Directly set the configuration values such that
+        # the logic will cause _exec_ipmitool to timeout.
+        self.config(min_command_interval=1, group='ipmi')
+        self.config(retry_timeout=1, group='ipmi')
+
+        self.assertRaises(processutils.ProcessExecutionError,
+                          ipmi._exec_ipmitool,
+                          self.info, 'A B C')
+        mock_support.assert_called_once_with('timing')
+        self.assertEqual(1, mock_exec.call_count)
+
+    @mock.patch.object(ipmi, '_is_option_supported', autospec=True)
+    @mock.patch.object(utils, 'execute', autospec=True)
+    def test__exec_ipmitool_exception_retry_failure_unhandable(self,
+            mock_exec, mock_support, mock_sleep):
+
+        ipmi.LAST_CMD_TIME = {}
+        mock_support.return_value = False
+
+        # Return a retryable error, then a error that cannot
+        # be retryable thus resulting in a single retry
+        # attempt by _exec_ipmitool that is successful.
+        mock_exec.side_effect = iter([
+            processutils.ProcessExecutionError(
+                stderr="insufficient resources for session"
+            ),
+            processutils.ProcessExecutionError(
+                "Unknown"
+            ),
+            ])
+
+        # Directly set the configuration values such that
+        # the logic will cause _exec_ipmitool to retry up
+        # to 3 times.
+        self.config(min_command_interval=1, group='ipmi')
+        self.config(retry_timeout=3, group='ipmi')
+
+        self.assertRaises(processutils.ProcessExecutionError,
+                          ipmi._exec_ipmitool,
+                          self.info, 'A B C')
+        mock_support.assert_called_once_with('timing')
+        self.assertEqual(2, mock_exec.call_count)
 
     @mock.patch.object(ipmi, '_exec_ipmitool', autospec=True)
     def test__power_status_on(self, mock_exec, mock_sleep):
author	Julia Kreger <juliaashleykreger@gmail.com>	2015-03-26 15:17:41 -0400
committer	Julia Kreger <juliaashleykreger@gmail.com>	2015-04-07 07:14:32 -0400
commit	59ccf51c179a92a1e9541fa54715ace8b110de91 (patch)
tree	f23147fd5b38bc634ffaa3431de404b47783da6e
parent	fb0cc8f86f1328ba2c22832c40d6a4664bca3742 (diff)
download	ironic-59ccf51c179a92a1e9541fa54715ace8b110de91.tar.gz