summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2022-05-10 18:47:33 +0000
committerGerrit Code Review <review@openstack.org>2022-05-10 18:47:33 +0000
commitee0b32933f724932cb21e5ca860aaa7544e85d6b (patch)
tree19d6d18ad314f81416a98ee9f984c3cda351e3cf
parent0a68e224992264b347ed51cf5dde77550f653cb6 (diff)
parent86e326a59126a24024649e8454b9c9253206c58b (diff)
downloadironic-ee0b32933f724932cb21e5ca860aaa7544e85d6b.tar.gz
Merge "Exclude current conductor from offline_conductors" into stable/yoga
-rw-r--r--ironic/conductor/manager.py6
-rw-r--r--ironic/conductor/utils.py21
-rw-r--r--ironic/tests/unit/conductor/test_utils.py10
-rw-r--r--releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml12
4 files changed, 47 insertions, 2 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index d11224852..7f6470e38 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -1604,7 +1604,8 @@ class ConductorManager(base_manager.BaseConductorManager):
:param context: request context.
"""
- offline_conductors = self.dbapi.get_offline_conductors()
+ offline_conductors = utils.exclude_current_conductor(
+ self.host, self.dbapi.get_offline_conductors())
if not offline_conductors:
return
@@ -3436,7 +3437,8 @@ class ConductorManager(base_manager.BaseConductorManager):
:param context: request context.
"""
- offline_conductors = self.dbapi.get_offline_conductors(field='id')
+ offline_conductors = utils.exclude_current_conductor(
+ self.conductor.id, self.dbapi.get_offline_conductors(field='id'))
for conductor_id in offline_conductors:
filters = {'state': states.ALLOCATING,
'conductor_affinity': conductor_id}
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 4a0d68a5d..b418d9d0a 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -1671,3 +1671,24 @@ def update_image_type(context, node):
'image_type',
images.IMAGE_TYPE_WHOLE_DISK if iwdi else images.IMAGE_TYPE_PARTITION)
return True
+
+
+def exclude_current_conductor(current_conductor, offline_conductors):
+ """Wrapper to exclude current conductor from offline_conductors
+
+ In some cases the current conductor may have failed to update
+ the heartbeat timestamp due to failure or resource starvation.
+ When this occurs the dbapi get_offline_conductors method will
+ include the current conductor in its return value.
+
+ :param current_conductor: id or hostname of the current conductor
+ :param offline_conductors: List of offline conductors.
+ :return: List of offline conductors, excluding current conductor
+ """
+ if current_conductor in offline_conductors:
+ LOG.warning('Current conductor %s will be excluded from offline '
+ 'conductors. Conductor heartbeat has failed to update the '
+ 'database timestamp. This is sign of resource starvation.',
+ current_conductor)
+
+ return [x for x in offline_conductors if x != current_conductor]
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index 5363fe801..7d8e70a1f 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -1921,6 +1921,16 @@ class MiscTestCase(db_base.DbTestCase):
conductor_utils.restore_power_state_if_needed(task, power_state)
self.assertEqual(0, power_action_mock.call_count)
+ @mock.patch.object(conductor_utils.LOG, 'warning', autospec=True)
+ def test_exclude_current_conductor(self, mock_log):
+ current_conductor = 'foo'
+ offline_conductos = ['foo', 'bar']
+ result = conductor_utils.exclude_current_conductor(current_conductor,
+ offline_conductos)
+ self.assertTrue(mock_log.called)
+ self.assertIn('bar', result)
+ self.assertNotIn('foo', result)
+
class ValidateInstanceInfoTraitsTestCase(tests_base.TestCase):
diff --git a/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml b/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml
new file mode 100644
index 000000000..c7f3e4acc
--- /dev/null
+++ b/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml
@@ -0,0 +1,12 @@
+---
+fixes:
+ - |
+ Fixes an issue where a conductor would attempt local takeover. In case of
+ heartbeat failure due to resource starvation, the current conductor was
+ detected as offline when querying the database. In this scenario the
+ conductor would forcibly remove reservations of it's own and initiate
+ takeover. Current conductor is now excluded from the list of offline
+ conductors, so that local takeover does not occur for this case. A warning
+ is logged to highlight the potential resource starvation issue.
+ See bug: `2010016 <https://storyboard.openstack.org/#!/story/2010016>`_.
+