diff options
author | Ruby Loo <rloo@verizonmedia.com> | 2020-01-16 15:55:29 +0000 |
---|---|---|
committer | Ruby Loo <rloo@verizonmedia.com> | 2020-01-16 17:10:05 +0000 |
commit | dd18c70543db6f42e531db24606ed8008015828f (patch) | |
tree | c929522dc906e26829f38fbcf7a0b35b888ec8f0 /ironic | |
parent | c4352b66fe0987249a71d9992000bb6b8fd48c9d (diff) | |
download | ironic-dd18c70543db6f42e531db24606ed8008015828f.tar.gz |
Nodes in maintenance didn't fail, when they should have
In this code in base_manager.py, _fail_if_in_state() [1]: if the
node is in maintenance, nothing is done. This means that when a
node in maintenance is in mid deployment or cleaning and their
conductor dies, it won't get put into a failed state [2].
This fixes it.
[1] https://opendev.org/openstack/ironic/src/commit/8294afa6231629f9734f19ea5b3b0253ee9b8957/ironic/conductor/base_manager.py#L485
[2] https://opendev.org/openstack/ironic/src/commit/8294afa6231629f9734f19ea5b3b0253ee9b8957/ironic/conductor/base_manager.py#L235
Story #2007098
Task #38134
Change-Id: Ide70619271455685d09671ae16d744fc9ae58a02
Diffstat (limited to 'ironic')
-rw-r--r-- | ironic/conductor/base_manager.py | 12 | ||||
-rw-r--r-- | ironic/conductor/manager.py | 1 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_base_manager.py | 23 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_manager.py | 1 |
4 files changed, 33 insertions, 4 deletions
diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index d610d2163..8ef568d83 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -476,15 +476,19 @@ class BaseConductorManager(object): node_iter = self.iter_nodes(filters=filters, sort_key=sort_key, sort_dir='asc') - + desired_maintenance = filters.get('maintenance') workers_count = 0 for node_uuid, driver, conductor_group in node_iter: try: with task_manager.acquire(context, node_uuid, purpose='node state check') as task: - if (task.node.maintenance - or task.node.provision_state - not in provision_state): + # Check maintenance value since it could have changed + # after the filtering was done. + if (desired_maintenance is not None + and desired_maintenance != task.node.maintenance): + continue + + if task.node.provision_state not in provision_state: continue target_state = (None if not keep_target_state else diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 8cc5a1c0b..05abed1e1 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -3188,6 +3188,7 @@ class ConductorManager(base_manager.BaseConductorManager): callback_timeout = CONF.conductor.inspect_wait_timeout filters = {'reserved': False, + 'maintenance': False, 'provision_state': states.INSPECTWAIT, 'inspection_started_before': callback_timeout} sort_key = 'inspection_started_at' diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 02bd171f9..f8ba77c3f 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -521,3 +521,26 @@ class StartConsolesTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertIsNone(test_node.last_error) self.assertTrue(log_mock.warning.called) self.assertFalse(mock_notify.called) + + +class MiscTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): + def setUp(self): + super(MiscTestCase, self).setUp() + self._start_service() + + def test__fail_transient_state(self): + node = obj_utils.create_test_node(self.context, + driver='fake-hardware', + provision_state=states.DEPLOYING) + self.service._fail_transient_state(states.DEPLOYING, 'unknown err') + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) + + def test__fail_transient_state_maintenance(self): + node = obj_utils.create_test_node(self.context, + driver='fake-hardware', + maintenance=True, + provision_state=states.DEPLOYING) + self.service._fail_transient_state(states.DEPLOYING, 'unknown err') + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index f1ca25400..b1a27af1a 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -8103,6 +8103,7 @@ class ManagerCheckInspectWaitTimeoutsTestCase(mgr_utils.CommonMixIn, self.task2 = self._create_task(node=self.node2) self.filters = {'reserved': False, + 'maintenance': False, 'inspection_started_before': 300, 'provision_state': states.INSPECTWAIT} self.columns = ['uuid', 'driver', 'conductor_group'] |