summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Gachnang <josh@pcsforeducation.com>2015-04-10 15:15:33 -0700
committerJosh Gachnang <josh@pcsforeducation.com>2015-04-13 10:12:55 -0700
commitaf3918fb69701fa794cbbe9de9cafc69ac3e936d (patch)
tree653f9f14d55c0e709cbee723b3c0ce022a33f234
parentaa2cf65e4db36184e538885656f98e753fa6a768 (diff)
downloadironic-af3918fb69701fa794cbbe9de9cafc69ac3e936d.tar.gz
Convert internal RPC continue_node_cleaning to a "cast"
The agent driver is using RPCs to call back from the driver to the conductor asynchronously. When using the RPC.call() method, some nodes would end up with stuck locks when using the agent driver during cleaning. The agent driver would issue a call() to continue_node_cleaning() after either the first heartbeat (from prepare_cleaning) or a heartbeat after a clean step had completed. The conductor would attempt to get a lock, but would not be able to. The node would retain its locked state (so far as I could tell), even after the error. Other nodes would continue and complete cleaning just fine. The exception raised by continue_node_cleaning() was likely not caught by the agent driver, but caught by vendor_passthru() in the conductor as an expected exception. Switching to cast() avoids the issue because the errors are not sent back to the caller. I didn't experience any more stuck locks with this change. Change-Id: I4dbb04ccb93199bba4e1a1614bc19b70a068a9ea Closes-Bug: 1442810
-rw-r--r--ironic/conductor/manager.py6
-rw-r--r--ironic/conductor/rpcapi.py16
-rw-r--r--ironic/tests/conductor/test_manager.py16
-rw-r--r--ironic/tests/conductor/test_rpcapi.py4
4 files changed, 16 insertions, 26 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 3322741ab..c2b75bca0 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -204,7 +204,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
"""Ironic Conductor manager main class."""
# NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's.
- RPC_API_VERSION = '1.26'
+ RPC_API_VERSION = '1.27'
target = messaging.Target(version=RPC_API_VERSION)
@@ -824,10 +824,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
state=node.provision_state)
self._do_node_clean(task)
- @messaging.expected_exceptions(exception.NoFreeConductorWorker,
- exception.NodeLocked,
- exception.InvalidStateRequested,
- exception.NodeNotFound)
def continue_node_clean(self, context, node_id):
"""RPC method to continue cleaning a node.
diff --git a/ironic/conductor/rpcapi.py b/ironic/conductor/rpcapi.py
index 06c7d434c..1ca0db5ee 100644
--- a/ironic/conductor/rpcapi.py
+++ b/ironic/conductor/rpcapi.py
@@ -69,11 +69,12 @@ class ConductorAPI(object):
| 1.24 - Added inspect_hardware method
| 1.25 - Added destroy_port
| 1.26 - Added continue_node_clean
+ | 1.27 - Convert continue_node_clean to cast
"""
# NOTE(rloo): This must be in sync with manager.ConductorManager's.
- RPC_API_VERSION = '1.26'
+ RPC_API_VERSION = '1.27'
def __init__(self, topic=None):
super(ConductorAPI, self).__init__()
@@ -329,18 +330,15 @@ class ConductorAPI(object):
def continue_node_clean(self, context, node_id, topic=None):
"""Signal to conductor service to start the next cleaning action.
+ NOTE(JoshNang) this is an RPC cast, there will be no response or
+ exception raised by the conductor for this RPC.
+
:param context: request context.
:param node_id: node id or uuid.
:param topic: RPC topic. Defaults to self.topic.
- :raises: NoFreeConductorWorker when there is no free worker to start
- async task.
- :raises: InvalidStateRequested if the requested action can not
- be performed.
- :raises: NodeLocked if node is locked by another conductor.
- :raises: NodeNotFound if the node no longer appears in the database
"""
- cctxt = self.client.prepare(topic=topic or self.topic, version='1.26')
- return cctxt.call(context, 'continue_node_clean',
+ cctxt = self.client.prepare(topic=topic or self.topic, version='1.27')
+ return cctxt.cast(context, 'continue_node_clean',
node_id=node_id)
def validate_driver_interfaces(self, context, node_id, topic=None):
diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py
index 47a6e77f7..df0078712 100644
--- a/ironic/tests/conductor/test_manager.py
+++ b/ironic/tests/conductor/test_manager.py
@@ -1606,11 +1606,9 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
mock_spawn.side_effect = exception.NoFreeConductorWorker()
- exc = self.assertRaises(messaging.rpc.ExpectedException,
- self.service.continue_node_clean,
- self.context, node.uuid)
- # Compare true exception hidden by @messaging.expected_exceptions
- self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0])
+ self.assertRaises(exception.NoFreeConductorWorker,
+ self.service.continue_node_clean,
+ self.context, node.uuid)
self.service._worker_pool.waitall()
node.refresh()
@@ -1630,11 +1628,9 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
last_error=None)
self._start_service()
- exc = self.assertRaises(messaging.rpc.ExpectedException,
- self.service.continue_node_clean,
- self.context, node.uuid)
- # Compare true exception hidden by @messaging.expected_exceptions
- self.assertEqual(exception.InvalidStateRequested, exc.exc_info[0])
+ self.assertRaises(exception.InvalidStateRequested,
+ self.service.continue_node_clean,
+ self.context, node.uuid)
self.service._worker_pool.waitall()
node.refresh()
diff --git a/ironic/tests/conductor/test_rpcapi.py b/ironic/tests/conductor/test_rpcapi.py
index 1ec18db3f..e7d27627c 100644
--- a/ironic/tests/conductor/test_rpcapi.py
+++ b/ironic/tests/conductor/test_rpcapi.py
@@ -291,6 +291,6 @@ class RPCAPITestCase(base.DbTestCase):
def test_continue_node_clean(self):
self._test_rpcapi('continue_node_clean',
- 'call',
- version='1.26',
+ 'cast',
+ version='1.27',
node_id=self.fake_node['uuid'])