diff options
author | Zuul <zuul@review.opendev.org> | 2022-05-10 18:47:33 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2022-05-10 18:47:33 +0000 |
commit | ee0b32933f724932cb21e5ca860aaa7544e85d6b (patch) | |
tree | 19d6d18ad314f81416a98ee9f984c3cda351e3cf | |
parent | 0a68e224992264b347ed51cf5dde77550f653cb6 (diff) | |
parent | 86e326a59126a24024649e8454b9c9253206c58b (diff) | |
download | ironic-ee0b32933f724932cb21e5ca860aaa7544e85d6b.tar.gz |
Merge "Exclude current conductor from offline_conductors" into stable/yoga
-rw-r--r-- | ironic/conductor/manager.py | 6 | ||||
-rw-r--r-- | ironic/conductor/utils.py | 21 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_utils.py | 10 | ||||
-rw-r--r-- | releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml | 12 |
4 files changed, 47 insertions, 2 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index d11224852..7f6470e38 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1604,7 +1604,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ - offline_conductors = self.dbapi.get_offline_conductors() + offline_conductors = utils.exclude_current_conductor( + self.host, self.dbapi.get_offline_conductors()) if not offline_conductors: return @@ -3436,7 +3437,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ - offline_conductors = self.dbapi.get_offline_conductors(field='id') + offline_conductors = utils.exclude_current_conductor( + self.conductor.id, self.dbapi.get_offline_conductors(field='id')) for conductor_id in offline_conductors: filters = {'state': states.ALLOCATING, 'conductor_affinity': conductor_id} diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 4a0d68a5d..b418d9d0a 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -1671,3 +1671,24 @@ def update_image_type(context, node): 'image_type', images.IMAGE_TYPE_WHOLE_DISK if iwdi else images.IMAGE_TYPE_PARTITION) return True + + +def exclude_current_conductor(current_conductor, offline_conductors): + """Wrapper to exclude current conductor from offline_conductors + + In some cases the current conductor may have failed to update + the heartbeat timestamp due to failure or resource starvation. + When this occurs the dbapi get_offline_conductors method will + include the current conductor in its return value. + + :param current_conductor: id or hostname of the current conductor + :param offline_conductors: List of offline conductors. + :return: List of offline conductors, excluding current conductor + """ + if current_conductor in offline_conductors: + LOG.warning('Current conductor %s will be excluded from offline ' + 'conductors. Conductor heartbeat has failed to update the ' + 'database timestamp. This is sign of resource starvation.', + current_conductor) + + return [x for x in offline_conductors if x != current_conductor] diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 5363fe801..7d8e70a1f 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1921,6 +1921,16 @@ class MiscTestCase(db_base.DbTestCase): conductor_utils.restore_power_state_if_needed(task, power_state) self.assertEqual(0, power_action_mock.call_count) + @mock.patch.object(conductor_utils.LOG, 'warning', autospec=True) + def test_exclude_current_conductor(self, mock_log): + current_conductor = 'foo' + offline_conductos = ['foo', 'bar'] + result = conductor_utils.exclude_current_conductor(current_conductor, + offline_conductos) + self.assertTrue(mock_log.called) + self.assertIn('bar', result) + self.assertNotIn('foo', result) + class ValidateInstanceInfoTraitsTestCase(tests_base.TestCase): diff --git a/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml b/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml new file mode 100644 index 000000000..c7f3e4acc --- /dev/null +++ b/releasenotes/notes/exclude-current-conductor-from-offline-conductors-2e2ef401a8b7d7e8.yaml @@ -0,0 +1,12 @@ +--- +fixes: + - | + Fixes an issue where a conductor would attempt local takeover. In case of + heartbeat failure due to resource starvation, the current conductor was + detected as offline when querying the database. In this scenario the + conductor would forcibly remove reservations of it's own and initiate + takeover. Current conductor is now excluded from the list of offline + conductors, so that local takeover does not occur for this case. A warning + is logged to highlight the potential resource starvation issue. + See bug: `2010016 <https://storyboard.openstack.org/#!/story/2010016>`_. + |