summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2014-07-24 22:02:47 +0000
committerGerrit Code Review <review@openstack.org>2014-07-24 22:02:47 +0000
commit22f2f1d4c25f2f0c1ebeacd501a302cec8879c77 (patch)
tree5702a67251add4ea5a598ad26b9117ef4c37a88e
parentb89b5f2ef421ee3311af7ec62dadbd34608bdd53 (diff)
parent8b554deed62d86d19d7459ab1ac36cf2dc27745a (diff)
downloadironic-22f2f1d4c25f2f0c1ebeacd501a302cec8879c77.tar.gz
Merge "Fix nodes left in an incosistent state if no workers"
-rw-r--r--ironic/conductor/manager.py54
-rw-r--r--ironic/tests/conductor/test_manager.py27
2 files changed, 73 insertions, 8 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 30b107e1b..39c231d62 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -53,6 +53,7 @@ from oslo import messaging
from ironic.common import driver_factory
from ironic.common import exception
from ironic.common import hash_ring as hash
+from ironic.common import i18n
from ironic.common import neutron
from ironic.common import states
from ironic.conductor import task_manager
@@ -60,7 +61,6 @@ from ironic.conductor import utils
from ironic.db import api as dbapi
from ironic import objects
from ironic.openstack.common import excutils
-from ironic.openstack.common.gettextutils import _LI
from ironic.openstack.common import lockutils
from ironic.openstack.common import log
from ironic.openstack.common import periodic_task
@@ -68,6 +68,9 @@ from ironic.openstack.common import periodic_task
MANAGER_TOPIC = 'ironic.conductor_manager'
WORKER_SPAWN_lOCK = "conductor_worker_spawn"
+_LW = i18n._LW
+_LI = i18n._LI
+
LOG = log.getLogger(__name__)
conductor_opts = [
@@ -351,6 +354,34 @@ class ConductorManager(periodic_task.PeriodicTasks):
method=driver_method,
**info)
+ def _provisioning_error_handler(self, e, node, context, provision_state,
+ target_provision_state):
+ """Set the node's provisioning states if error occurs.
+
+ This hook gets called upon an exception being raised when spawning
+ the worker to do the deployment or tear down of a node.
+
+ :param e: the exception object that was raised.
+ :param node: an Ironic node object.
+ :param context: security context.
+ :param provision_state: the provision state to be set on
+ the node.
+ :param target_provision_state: the target provision state to be
+ set on the node.
+
+ """
+ if isinstance(e, exception.NoFreeConductorWorker):
+ node.provision_state = provision_state
+ node.target_provision_state = target_provision_state
+ node.last_error = (_("No free conductor workers available"))
+ node.save(context)
+ LOG.warning(_LW("No free conductor workers available to perform "
+ "an action on node %(node)s, setting node's "
+ "provision_state back to %(prov_state)s and "
+ "target_provision_state to %(tgt_prov_state)s."),
+ {'node': node.uuid, 'prov_state': provision_state,
+ 'tgt_prov_state': target_provision_state})
+
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked,
exception.NodeInMaintenance,
@@ -406,11 +437,21 @@ class ConductorManager(periodic_task.PeriodicTasks):
"RPC do_node_deploy failed to validate deploy info. "
"Error: %(msg)s") % {'msg': e})
+ # Save the previous states so we can rollback the node to a
+ # consistent state in case there's no free workers to do the
+ # deploy work
+ previous_prov_state = node.provision_state
+ previous_tgt_provision_state = node.target_provision_state
+
# Set target state to expose that work is in progress
node.provision_state = states.DEPLOYING
node.target_provision_state = states.DEPLOYDONE
node.last_error = None
node.save(context)
+
+ task.set_spawn_error_hook(self._provisioning_error_handler, node,
+ context, previous_prov_state,
+ previous_tgt_provision_state)
task.spawn_after(self._spawn_worker, self._do_node_deploy,
context, task)
@@ -477,10 +518,21 @@ class ConductorManager(periodic_task.PeriodicTasks):
"RPC do_node_tear_down failed to validate deploy info. "
"Error: %(msg)s") % {'msg': e})
+ # save the previous states so we can rollback the node to a
+ # consistent state in case there's no free workers to do the
+ # tear down work
+ previous_prov_state = node.provision_state
+ previous_tgt_provision_state = node.target_provision_state
+
+ # set target state to expose that work is in progress
node.provision_state = states.DELETING
node.target_provision_state = states.DELETED
node.last_error = None
node.save(context)
+
+ task.set_spawn_error_hook(self._provisioning_error_handler, node,
+ context, previous_prov_state,
+ previous_tgt_provision_state)
task.spawn_after(self._spawn_worker, self._do_node_tear_down,
context, task)
diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py
index 8af07b747..c92c47d36 100644
--- a/ironic/tests/conductor/test_manager.py
+++ b/ironic/tests/conductor/test_manager.py
@@ -730,7 +730,12 @@ class ManagerTestCase(tests_db_base.DbTestCase):
mock_spawn.assert_called_once_with(mock.ANY, mock.ANY, mock.ANY)
def test_do_node_deploy_worker_pool_full(self):
- node = obj_utils.create_test_node(self.context, driver='fake')
+ prv_state = states.NOSTATE
+ tgt_prv_state = states.NOSTATE
+ node = obj_utils.create_test_node(self.context,
+ provision_state=prv_state,
+ target_provision_state=tgt_prv_state,
+ last_error=None, driver='fake')
self._start_service()
with mock.patch.object(self.service, '_spawn_worker') as mock_spawn:
@@ -743,8 +748,10 @@ class ManagerTestCase(tests_db_base.DbTestCase):
self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0])
self.service._worker_pool.waitall()
node.refresh()
- # This is a sync operation last_error should be None.
- self.assertIsNone(node.last_error)
+ # Make sure things were rolled back
+ self.assertEqual(prv_state, node.provision_state)
+ self.assertEqual(tgt_prv_state, node.target_provision_state)
+ self.assertIsNotNone(node.last_error)
# Verify reservation has been cleared.
self.assertIsNone(node.reservation)
@@ -828,10 +835,14 @@ class ManagerTestCase(tests_db_base.DbTestCase):
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_do_node_tear_down_worker_pool_full(self, mock_spawn):
+ prv_state = states.ACTIVE
+ tgt_prv_state = states.NOSTATE
fake_instance_info = {'foo': 'bar'}
node = obj_utils.create_test_node(self.context, driver='fake',
- provision_state=states.ACTIVE,
- instance_info=fake_instance_info)
+ provision_state=prv_state,
+ target_provision_state=tgt_prv_state,
+ instance_info=fake_instance_info,
+ last_error=None)
self._start_service()
mock_spawn.side_effect = exception.NoFreeConductorWorker()
@@ -843,10 +854,12 @@ class ManagerTestCase(tests_db_base.DbTestCase):
self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0])
self.service._worker_pool.waitall()
node.refresh()
- # This is a sync operation last_error should be None.
- self.assertIsNone(node.last_error)
# Assert instance_info was not touched
self.assertEqual(fake_instance_info, node.instance_info)
+ # Make sure things were rolled back
+ self.assertEqual(prv_state, node.provision_state)
+ self.assertEqual(tgt_prv_state, node.target_provision_state)
+ self.assertIsNotNone(node.last_error)
# Verify reservation has been cleared.
self.assertIsNone(node.reservation)