summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaifeng Wang <kaifeng.w@gmail.com>2022-09-14 23:42:40 +0800
committerJulia Kreger <juliaashleykreger@gmail.com>2022-09-20 09:31:51 -0700
commit31c80874087b87dca3353710bdd36c775eae89b5 (patch)
treee23c6d4f8b5ba316b70d5c6be7cb70b832b31fe3
parentaae524a46ce66a91a5461bb6b5ae525dea31946f (diff)
downloadironic-31c80874087b87dca3353710bdd36c775eae89b5.tar.gz
Fix nodes stuck at cleaning on Network Service issues
Ironic validates network interface before the cleaning process, currently invalid parameter is captured but for not others. There is chance that a node could be stucked at the cleaning state on networking issues or temporary service down of neutron service. This patch adds NetworkError to the exception hanlding to cover such cases. Change-Id: If20de2ad4ae4177dea10b7ebfc9a91ca6fbabdb9
-rw-r--r--ironic/conductor/cleaning.py2
-rw-r--r--ironic/tests/unit/conductor/test_cleaning.py26
-rw-r--r--releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml8
3 files changed, 29 insertions, 7 deletions
diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py
index e3151d4b8..53d66ddd8 100644
--- a/ironic/conductor/cleaning.py
+++ b/ironic/conductor/cleaning.py
@@ -69,7 +69,7 @@ def do_node_clean(task, clean_steps=None, disable_ramdisk=False):
task.driver.power.validate(task)
if not disable_ramdisk:
task.driver.network.validate(task)
- except exception.InvalidParameterValue as e:
+ except (exception.InvalidParameterValue, exception.NetworkError) as e:
msg = (_('Validation of node %(node)s for cleaning failed: %(msg)s') %
{'node': node.uuid, 'msg': e})
return utils.cleaning_error_handler(task, msg)
diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py
index 65261450a..a4c3d57b6 100644
--- a/ironic/tests/unit/conductor/test_cleaning.py
+++ b/ironic/tests/unit/conductor/test_cleaning.py
@@ -51,8 +51,6 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
'step': 'build_raid', 'priority': 0, 'interface': 'deploy'}
def __do_node_clean_validate_fail(self, mock_validate, clean_steps=None):
- # InvalidParameterValue should cause node to go to CLEANFAIL
- mock_validate.side_effect = exception.InvalidParameterValue('error')
tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
@@ -68,26 +66,42 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
self.assertIsNone(node.fault)
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
+ def __do_node_clean_validate_fail_invalid(self, mock_validate,
+ clean_steps=None):
+ # InvalidParameterValue should cause node to go to CLEANFAIL
+ mock_validate.side_effect = exception.InvalidParameterValue('error')
+ self.__do_node_clean_validate_fail(mock_validate,
+ clean_steps=clean_steps)
+
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
def test__do_node_clean_automated_power_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate)
+ self.__do_node_clean_validate_fail_invalid(mock_validate)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
def test__do_node_clean_manual_power_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate, clean_steps=[])
+ self.__do_node_clean_validate_fail_invalid(mock_validate,
+ clean_steps=[])
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
autospec=True)
def test__do_node_clean_automated_network_validate_fail(self,
mock_validate):
- self.__do_node_clean_validate_fail(mock_validate)
+ self.__do_node_clean_validate_fail_invalid(mock_validate)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
autospec=True)
def test__do_node_clean_manual_network_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate, clean_steps=[])
+ self.__do_node_clean_validate_fail_invalid(mock_validate,
+ clean_steps=[])
+
+ @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
+ autospec=True)
+ def test__do_node_clean_network_error_fail(self, mock_validate):
+ # NetworkError should cause node to go to CLEANFAIL
+ mock_validate.side_effect = exception.NetworkError()
+ self.__do_node_clean_validate_fail(mock_validate)
@mock.patch.object(conductor_utils, 'LOG', autospec=True)
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
diff --git a/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml b/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml
new file mode 100644
index 000000000..f7769afc1
--- /dev/null
+++ b/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ Fixes an issue where cleaning operations could fail in such a way that was
+ not easily recoverable when pre-cleaning network interface configuration
+ was validated, yet contained invalid configuration.
+ Now Ironic properly captures the error and exits from cleaning in a
+ state which allows for cleaning to be retried.