summaryrefslogtreecommitdiff
path: root/nova/tests/unit/compute
diff options
context:
space:
mode:
authorStephen Finucane <stephenfin@redhat.com>2021-04-28 13:53:39 +0100
committerLee Yarwood <lyarwood@redhat.com>2021-08-12 14:26:45 +0100
commit32676a9f45807ea8770dc7bdff1e859673af1b61 (patch)
treec757d6e88c3ee13042603cf7c1b9437f9df3ba3f /nova/tests/unit/compute
parent59d9871e8a0672538f8ffc43ae99b3d1c4b08909 (diff)
downloadnova-32676a9f45807ea8770dc7bdff1e859673af1b61.tar.gz
Clear rebalanced compute nodes from resource tracker
There is a race condition in nova-compute with the ironic virt driver as nodes get rebalanced. It can lead to compute nodes being removed in the DB and not repopulated. Ultimately this prevents these nodes from being scheduled to. The issue being addressed here is that if a compute node is deleted by a host which thinks it is an orphan, then the compute host that actually owns the node might not recreate it if the node is already in its resource tracker cache. This change fixes the issue by clearing nodes from the resource tracker cache for which a compute node entry does not exist. Then, when the available resource for the node is updated, the compute node object is not found in the cache and gets recreated. Change-Id: I39241223b447fcc671161c370dbf16e1773b684a Partial-Bug: #1853009
Diffstat (limited to 'nova/tests/unit/compute')
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py6
-rw-r--r--nova/tests/unit/compute/test_resource_tracker.py17
2 files changed, 21 insertions, 2 deletions
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 3a8fa207db..282f2ee506 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -373,18 +373,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
)
# First node in set should have been removed from DB
+ # Last node in set should have been added to DB.
for db_node in db_nodes:
if db_node.hypervisor_hostname == 'node1':
db_node.destroy.assert_called_once_with()
rc_mock.delete_resource_provider.assert_called_once_with(
self.context, db_node, cascade=True)
- mock_rt.remove_node.assert_called_once_with(
- 'node1')
+ mock_rt.remove_node.assert_called_once_with('node1')
mock_log.error.assert_called_once_with(
"Failed to delete compute node resource provider for "
"compute node %s: %s", db_node.uuid, mock.ANY)
else:
self.assertFalse(db_node.destroy.called)
+ self.assertEqual(1, mock_rt.remove_node.call_count)
+ mock_rt.clean_compute_node_cache.assert_called_once_with(db_nodes)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_resource_provider')
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 947e281b98..147a02bc90 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -4177,3 +4177,20 @@ class ProviderConfigTestCases(BaseTestCase):
mock_log.warning.assert_called_once_with(*expected_log_call)
self.assertIn(uuids.unknown, self.rt.absent_providers)
self.assertEqual(result, [])
+
+
+class TestCleanComputeNodeCache(BaseTestCase):
+
+ def setUp(self):
+ super(TestCleanComputeNodeCache, self).setUp()
+ self._setup_rt()
+ self.context = context.RequestContext(
+ mock.sentinel.user_id, mock.sentinel.project_id)
+
+ @mock.patch.object(resource_tracker.ResourceTracker, "remove_node")
+ def test_clean_compute_node_cache(self, mock_remove):
+ invalid_nodename = "invalid-node"
+ self.rt.compute_nodes[_NODENAME] = self.compute
+ self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute
+ self.rt.clean_compute_node_cache([self.compute])
+ mock_remove.assert_called_once_with(invalid_nodename)