diff options
author | Clif Houck <me@clifhouck.com> | 2015-01-14 17:00:57 -0600 |
---|---|---|
committer | Thom "tteggel" Leggett <thom@tteggel.org> | 2015-01-29 19:29:31 +0000 |
commit | 8833f69693e026d43f579cfa9839d45ce4e3fb8d (patch) | |
tree | afef311a1feaeb2ef28623c08058d72dd57a3553 | |
parent | 5d43d3cca269be4fa0f3a615c9c7400b5c882dc5 (diff) | |
download | ironic-8833f69693e026d43f579cfa9839d45ce4e3fb8d.tar.gz |
Fix RPCService and Ironic Conductor so they shut down gracefully
Ironic Conductor Manager now waits on its greenpool workers to finish
before del_host completes. This will ensure more-correct behavior, as
well as giving workers the opportunity to release locks before the
RPCService stops. Added a manager test to confirm del_host waits on the
manager's greenpool.
This modifies RPCService in two important ways:
1) The order of shutdown operations in RPCService.stop is changed to be
more correct. First, the server is stopped to prevent new requests from
arriving. Second, the manager is instructed to inform the cluster that
this conductor is shutting down. Finally, the conductor service stop()
method is called with graceful set to True to allow any tasks the
service currently has running to finish normally.
This is in contrast to the previous order of operations where the
service was told to immediately stop before stopping the server. This
could allow requests to arrive while the service has already stopped,
which is not ideal. Additionally, Service.stop was called in a way
which caused existing/running service tasks to immediately exit, which
could result in bad node or database state, necessitating manual
operator intervention.
2) The underlying Service object is told to shutdown gracefully, which
will allow tasks to complete before RPCService completely shuts down.
Change-Id: I639f2fa7b9349822fca9180966e0932224fa70a2
Closes-Bug: #1382698
(cherry picked from commit e9d1274c77cd919822d3f1af7c83e0ad0210f2a1)
-rw-r--r-- | ironic/common/service.py | 3 | ||||
-rw-r--r-- | ironic/conductor/manager.py | 4 | ||||
-rw-r--r-- | ironic/tests/conductor/test_manager.py | 6 |
3 files changed, 12 insertions, 1 deletions
diff --git a/ironic/common/service.py b/ironic/common/service.py index 1d7722ee9..32c869ceb 100644 --- a/ironic/common/service.py +++ b/ironic/common/service.py @@ -80,7 +80,6 @@ class RPCService(service.Service): {'service': self.topic, 'host': self.host}) def stop(self): - super(RPCService, self).stop() try: self.rpcserver.stop() self.rpcserver.wait() @@ -92,6 +91,8 @@ class RPCService(service.Service): except Exception as e: LOG.exception(_LE('Service error occurred when cleaning up ' 'the RPC manager. Error: %s'), e) + + super(RPCService, self).stop(graceful=True) LOG.info(_LI('Stopped RPC server for service %(service)s on host ' '%(host)s.'), {'service': self.topic, 'host': self.host}) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 0a2edfccd..debce57b7 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -249,6 +249,10 @@ class ConductorManager(periodic_task.PeriodicTasks): {'hostname': self.host}) except exception.ConductorNotFound: pass + # Waiting here to give workers the chance to finish. This has the + # benefit of releasing locks workers placed on nodes, as well as + # having work complete normally. + self._worker_pool.waitall() def periodic_tasks(self, context, raise_on_error=False): """Periodic tasks are run at pre-specified interval.""" diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py index e13465291..1c754f519 100644 --- a/ironic/tests/conductor/test_manager.py +++ b/ironic/tests/conductor/test_manager.py @@ -227,6 +227,12 @@ class StartStopTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase): self.service.init_host) self.assertTrue(log_mock.error.called) + @mock.patch.object(eventlet.greenpool.GreenPool, 'waitall') + def test_del_host_waits_on_workerpool(self, wait_mock): + self._start_service() + self.service.del_host() + self.assertTrue(wait_mock.called) + class KeepAliveTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase): def test__conductor_service_record_keepalive(self): |