summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClif Houck <me@clifhouck.com>2015-01-14 17:00:57 -0600
committerThom "tteggel" Leggett <thom@tteggel.org>2015-01-29 19:29:31 +0000
commit8833f69693e026d43f579cfa9839d45ce4e3fb8d (patch)
treeafef311a1feaeb2ef28623c08058d72dd57a3553
parent5d43d3cca269be4fa0f3a615c9c7400b5c882dc5 (diff)
downloadironic-8833f69693e026d43f579cfa9839d45ce4e3fb8d.tar.gz
Fix RPCService and Ironic Conductor so they shut down gracefully
Ironic Conductor Manager now waits on its greenpool workers to finish before del_host completes. This will ensure more-correct behavior, as well as giving workers the opportunity to release locks before the RPCService stops. Added a manager test to confirm del_host waits on the manager's greenpool. This modifies RPCService in two important ways: 1) The order of shutdown operations in RPCService.stop is changed to be more correct. First, the server is stopped to prevent new requests from arriving. Second, the manager is instructed to inform the cluster that this conductor is shutting down. Finally, the conductor service stop() method is called with graceful set to True to allow any tasks the service currently has running to finish normally. This is in contrast to the previous order of operations where the service was told to immediately stop before stopping the server. This could allow requests to arrive while the service has already stopped, which is not ideal. Additionally, Service.stop was called in a way which caused existing/running service tasks to immediately exit, which could result in bad node or database state, necessitating manual operator intervention. 2) The underlying Service object is told to shutdown gracefully, which will allow tasks to complete before RPCService completely shuts down. Change-Id: I639f2fa7b9349822fca9180966e0932224fa70a2 Closes-Bug: #1382698 (cherry picked from commit e9d1274c77cd919822d3f1af7c83e0ad0210f2a1)
-rw-r--r--ironic/common/service.py3
-rw-r--r--ironic/conductor/manager.py4
-rw-r--r--ironic/tests/conductor/test_manager.py6
3 files changed, 12 insertions, 1 deletions
diff --git a/ironic/common/service.py b/ironic/common/service.py
index 1d7722ee9..32c869ceb 100644
--- a/ironic/common/service.py
+++ b/ironic/common/service.py
@@ -80,7 +80,6 @@ class RPCService(service.Service):
{'service': self.topic, 'host': self.host})
def stop(self):
- super(RPCService, self).stop()
try:
self.rpcserver.stop()
self.rpcserver.wait()
@@ -92,6 +91,8 @@ class RPCService(service.Service):
except Exception as e:
LOG.exception(_LE('Service error occurred when cleaning up '
'the RPC manager. Error: %s'), e)
+
+ super(RPCService, self).stop(graceful=True)
LOG.info(_LI('Stopped RPC server for service %(service)s on host '
'%(host)s.'),
{'service': self.topic, 'host': self.host})
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 0a2edfccd..debce57b7 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -249,6 +249,10 @@ class ConductorManager(periodic_task.PeriodicTasks):
{'hostname': self.host})
except exception.ConductorNotFound:
pass
+ # Waiting here to give workers the chance to finish. This has the
+ # benefit of releasing locks workers placed on nodes, as well as
+ # having work complete normally.
+ self._worker_pool.waitall()
def periodic_tasks(self, context, raise_on_error=False):
"""Periodic tasks are run at pre-specified interval."""
diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py
index e13465291..1c754f519 100644
--- a/ironic/tests/conductor/test_manager.py
+++ b/ironic/tests/conductor/test_manager.py
@@ -227,6 +227,12 @@ class StartStopTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.service.init_host)
self.assertTrue(log_mock.error.called)
+ @mock.patch.object(eventlet.greenpool.GreenPool, 'waitall')
+ def test_del_host_waits_on_workerpool(self, wait_mock):
+ self._start_service()
+ self.service.del_host()
+ self.assertTrue(wait_mock.called)
+
class KeepAliveTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
def test__conductor_service_record_keepalive(self):