diff options
author | Jenkins <jenkins@review.openstack.org> | 2014-07-24 22:02:47 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2014-07-24 22:02:47 +0000 |
commit | 22f2f1d4c25f2f0c1ebeacd501a302cec8879c77 (patch) | |
tree | 5702a67251add4ea5a598ad26b9117ef4c37a88e | |
parent | b89b5f2ef421ee3311af7ec62dadbd34608bdd53 (diff) | |
parent | 8b554deed62d86d19d7459ab1ac36cf2dc27745a (diff) | |
download | ironic-22f2f1d4c25f2f0c1ebeacd501a302cec8879c77.tar.gz |
Merge "Fix nodes left in an incosistent state if no workers"
-rw-r--r-- | ironic/conductor/manager.py | 54 | ||||
-rw-r--r-- | ironic/tests/conductor/test_manager.py | 27 |
2 files changed, 73 insertions, 8 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 30b107e1b..39c231d62 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -53,6 +53,7 @@ from oslo import messaging from ironic.common import driver_factory from ironic.common import exception from ironic.common import hash_ring as hash +from ironic.common import i18n from ironic.common import neutron from ironic.common import states from ironic.conductor import task_manager @@ -60,7 +61,6 @@ from ironic.conductor import utils from ironic.db import api as dbapi from ironic import objects from ironic.openstack.common import excutils -from ironic.openstack.common.gettextutils import _LI from ironic.openstack.common import lockutils from ironic.openstack.common import log from ironic.openstack.common import periodic_task @@ -68,6 +68,9 @@ from ironic.openstack.common import periodic_task MANAGER_TOPIC = 'ironic.conductor_manager' WORKER_SPAWN_lOCK = "conductor_worker_spawn" +_LW = i18n._LW +_LI = i18n._LI + LOG = log.getLogger(__name__) conductor_opts = [ @@ -351,6 +354,34 @@ class ConductorManager(periodic_task.PeriodicTasks): method=driver_method, **info) + def _provisioning_error_handler(self, e, node, context, provision_state, + target_provision_state): + """Set the node's provisioning states if error occurs. + + This hook gets called upon an exception being raised when spawning + the worker to do the deployment or tear down of a node. + + :param e: the exception object that was raised. + :param node: an Ironic node object. + :param context: security context. + :param provision_state: the provision state to be set on + the node. + :param target_provision_state: the target provision state to be + set on the node. + + """ + if isinstance(e, exception.NoFreeConductorWorker): + node.provision_state = provision_state + node.target_provision_state = target_provision_state + node.last_error = (_("No free conductor workers available")) + node.save(context) + LOG.warning(_LW("No free conductor workers available to perform " + "an action on node %(node)s, setting node's " + "provision_state back to %(prov_state)s and " + "target_provision_state to %(tgt_prov_state)s."), + {'node': node.uuid, 'prov_state': provision_state, + 'tgt_prov_state': target_provision_state}) + @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.NodeInMaintenance, @@ -406,11 +437,21 @@ class ConductorManager(periodic_task.PeriodicTasks): "RPC do_node_deploy failed to validate deploy info. " "Error: %(msg)s") % {'msg': e}) + # Save the previous states so we can rollback the node to a + # consistent state in case there's no free workers to do the + # deploy work + previous_prov_state = node.provision_state + previous_tgt_provision_state = node.target_provision_state + # Set target state to expose that work is in progress node.provision_state = states.DEPLOYING node.target_provision_state = states.DEPLOYDONE node.last_error = None node.save(context) + + task.set_spawn_error_hook(self._provisioning_error_handler, node, + context, previous_prov_state, + previous_tgt_provision_state) task.spawn_after(self._spawn_worker, self._do_node_deploy, context, task) @@ -477,10 +518,21 @@ class ConductorManager(periodic_task.PeriodicTasks): "RPC do_node_tear_down failed to validate deploy info. " "Error: %(msg)s") % {'msg': e}) + # save the previous states so we can rollback the node to a + # consistent state in case there's no free workers to do the + # tear down work + previous_prov_state = node.provision_state + previous_tgt_provision_state = node.target_provision_state + + # set target state to expose that work is in progress node.provision_state = states.DELETING node.target_provision_state = states.DELETED node.last_error = None node.save(context) + + task.set_spawn_error_hook(self._provisioning_error_handler, node, + context, previous_prov_state, + previous_tgt_provision_state) task.spawn_after(self._spawn_worker, self._do_node_tear_down, context, task) diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py index 8af07b747..c92c47d36 100644 --- a/ironic/tests/conductor/test_manager.py +++ b/ironic/tests/conductor/test_manager.py @@ -730,7 +730,12 @@ class ManagerTestCase(tests_db_base.DbTestCase): mock_spawn.assert_called_once_with(mock.ANY, mock.ANY, mock.ANY) def test_do_node_deploy_worker_pool_full(self): - node = obj_utils.create_test_node(self.context, driver='fake') + prv_state = states.NOSTATE + tgt_prv_state = states.NOSTATE + node = obj_utils.create_test_node(self.context, + provision_state=prv_state, + target_provision_state=tgt_prv_state, + last_error=None, driver='fake') self._start_service() with mock.patch.object(self.service, '_spawn_worker') as mock_spawn: @@ -743,8 +748,10 @@ class ManagerTestCase(tests_db_base.DbTestCase): self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0]) self.service._worker_pool.waitall() node.refresh() - # This is a sync operation last_error should be None. - self.assertIsNone(node.last_error) + # Make sure things were rolled back + self.assertEqual(prv_state, node.provision_state) + self.assertEqual(tgt_prv_state, node.target_provision_state) + self.assertIsNotNone(node.last_error) # Verify reservation has been cleared. self.assertIsNone(node.reservation) @@ -828,10 +835,14 @@ class ManagerTestCase(tests_db_base.DbTestCase): @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') def test_do_node_tear_down_worker_pool_full(self, mock_spawn): + prv_state = states.ACTIVE + tgt_prv_state = states.NOSTATE fake_instance_info = {'foo': 'bar'} node = obj_utils.create_test_node(self.context, driver='fake', - provision_state=states.ACTIVE, - instance_info=fake_instance_info) + provision_state=prv_state, + target_provision_state=tgt_prv_state, + instance_info=fake_instance_info, + last_error=None) self._start_service() mock_spawn.side_effect = exception.NoFreeConductorWorker() @@ -843,10 +854,12 @@ class ManagerTestCase(tests_db_base.DbTestCase): self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0]) self.service._worker_pool.waitall() node.refresh() - # This is a sync operation last_error should be None. - self.assertIsNone(node.last_error) # Assert instance_info was not touched self.assertEqual(fake_instance_info, node.instance_info) + # Make sure things were rolled back + self.assertEqual(prv_state, node.provision_state) + self.assertEqual(tgt_prv_state, node.target_provision_state) + self.assertIsNotNone(node.last_error) # Verify reservation has been cleared. self.assertIsNone(node.reservation) |