Unify placement client singleton implementations

We have many places where we implement singleton behavior for the placement client. This unifies them into a single place and implementation. Not only does this DRY things up, but may cause us to initialize it fewer times and also allows for emitting a common set of error messages about expected failures for better troubleshooting. Change-Id: Iab8a791f64323f996e1d6e6d5a7e7a7c34eb4fb3 Related-Bug: #1846820 (cherry picked from commit c178d9360665c219cbcc71c9f37b9e6e3055a5e5)
author: Dan Smith <dansmith@redhat.com> 2022-08-11 09:50:30 -0700
committer: melanie witt <melwittt@gmail.com> 2022-09-22 22:54:46 +0000
commit: 77273f067d96a4ec401c3b36f2922d63c4ad7103 (patch)
tree: 3271dbe56ab45fd4f2151317aee453077d689033
parent: 6d61fccb8455367aaa37ae7bddf3b8befd3c3d88 (diff)
download: nova-77273f067d96a4ec401c3b36f2922d63c4ad7103.tar.gz
16 files changed, 115 insertions, 44 deletions
diff --git a/nova/api/openstack/compute/services.py b/nova/api/openstack/compute/services.py
index 6deb84a7f1..e9d51d4d0c 100644
--- a/nova/api/openstack/compute/services.py
+++ b/nova/api/openstack/compute/services.py
@@ -48,13 +48,10 @@ class ServiceController(wsgi.Controller):
         self.actions = {"enable": self._enable,
                         "disable": self._disable,
                         "disable-log-reason": self._disable_log_reason}
-        self._placementclient = None  # Lazy-load on first access.
 
     @property
     def placementclient(self):
-        if self._placementclient is None:
-            self._placementclient = report.SchedulerReportClient()
-        return self._placementclient
+        return report.report_client_singleton()
 
     def _get_services(self, req):
         # The API services are filtered out since they are not RPC services
@@ -328,7 +325,7 @@ class ServiceController(wsgi.Controller):
                             "Failed to delete compute node resource provider "
                             "for compute node %s: %s",
                             compute_node.uuid, str(e))
-                # remove the host_mapping of this host.
+                # Remove the host_mapping of this host.
                 try:
                     hm = objects.HostMapping.get_by_host(context, service.host)
                     hm.destroy()
diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py
index f704a42698..7067facde7 100644
--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@@ -2217,7 +2217,7 @@ class PlacementCommands(object):
                 output(_('No cells to process.'))
                 return 4
 
-        placement = report.SchedulerReportClient()
+        placement = report.report_client_singleton()
 
         neutron = None
         if heal_port_allocations:
@@ -2718,7 +2718,7 @@ class PlacementCommands(object):
         if verbose:
             output = lambda msg: print(msg)
 
-        placement = report.SchedulerReportClient()
+        placement = report.report_client_singleton()
         # Resets two in-memory dicts for knowing instances per compute node
         self.cn_uuid_mapping = collections.defaultdict(tuple)
         self.instances_mapping = collections.defaultdict(list)
diff --git a/nova/compute/api.py b/nova/compute/api.py
index 9a2cbd3325..ddea854fe2 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -384,7 +384,6 @@ class API:
         self.image_api = image_api or glance.API()
         self.network_api = network_api or neutron.API()
         self.volume_api = volume_api or cinder.API()
-        self._placementclient = None  # Lazy-load on first access.
         self.compute_rpcapi = compute_rpcapi.ComputeAPI()
         self.compute_task_api = conductor.ComputeTaskAPI()
         self.servicegroup_api = servicegroup.API()
@@ -2573,9 +2572,7 @@ class API:
 
     @property
     def placementclient(self):
-        if self._placementclient is None:
-            self._placementclient = report.SchedulerReportClient()
-        return self._placementclient
+        return report.report_client_singleton()
 
     def _local_delete(self, context, instance, bdms, delete_type, cb):
         if instance.vm_state == vm_states.SHELVED_OFFLOADED:
@@ -6309,13 +6306,10 @@ class AggregateAPI:
     def __init__(self):
         self.compute_rpcapi = compute_rpcapi.ComputeAPI()
         self.query_client = query.SchedulerQueryClient()
-        self._placement_client = None  # Lazy-load on first access.
 
     @property
     def placement_client(self):
-        if self._placement_client is None:
-            self._placement_client = report.SchedulerReportClient()
-        return self._placement_client
+        return report.report_client_singleton()
 
     @wrap_exception()
     def create_aggregate(self, context, aggregate_name, availability_zone):
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 4df1c4112c..fa9425f50c 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -623,6 +623,11 @@ class ComputeManager(manager.Manager):
         # We want the ComputeManager, ResourceTracker and ComputeVirtAPI all
         # using the same instance of SchedulerReportClient which has the
         # ProviderTree cache for this compute service.
+        # NOTE(danms): We do not use the global placement client
+        # singleton here, because the above-mentioned stack of objects
+        # maintain local state in the client. Thus, keeping our own
+        # private object for that stack avoids any potential conflict
+        # with other users in our process outside of the above.
         self.reportclient = report.SchedulerReportClient()
         self.virtapi = ComputeVirtAPI(self)
         self.network_api = neutron.API()
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 0b801f7ddf..058777d1ed 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -103,7 +103,7 @@ class ResourceTracker(object):
         monitor_handler = monitors.MonitorHandler(self)
         self.monitors = monitor_handler.monitors
         self.old_resources = collections.defaultdict(objects.ComputeNode)
-        self.reportclient = reportclient or report.SchedulerReportClient()
+        self.reportclient = reportclient or report.report_client_singleton()
         self.ram_allocation_ratio = CONF.ram_allocation_ratio
         self.cpu_allocation_ratio = CONF.cpu_allocation_ratio
         self.disk_allocation_ratio = CONF.disk_allocation_ratio
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index 99e5514136..aaec1c99b7 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -243,7 +243,7 @@ class ComputeTaskManager:
         self.network_api = neutron.API()
         self.servicegroup_api = servicegroup.API()
         self.query_client = query.SchedulerQueryClient()
-        self.report_client = report.SchedulerReportClient()
+        self.report_client = report.report_client_singleton()
         self.notifier = rpc.get_notifier('compute')
         # Help us to record host in EventReporter
         self.host = CONF.host
diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py
index 6ff6206f65..8838d0240a 100644
--- a/nova/conductor/tasks/migrate.py
+++ b/nova/conductor/tasks/migrate.py
@@ -54,7 +54,7 @@ def replace_allocation_with_migration(context, instance, migration):
         # and do any rollback required
         raise
 
-    reportclient = report.SchedulerReportClient()
+    reportclient = report.report_client_singleton()
 
     orig_alloc = reportclient.get_allocs_for_consumer(
         context, instance.uuid)['allocations']
@@ -94,7 +94,7 @@ def replace_allocation_with_migration(context, instance, migration):
 def revert_allocation_for_migration(context, source_cn, instance, migration):
     """Revert an allocation made for a migration back to the instance."""
 
-    reportclient = report.SchedulerReportClient()
+    reportclient = report.report_client_singleton()
 
     # FIXME(gibi): This method is flawed in that it does not handle allocations
     # against sharing providers in any special way. This leads to duplicate
diff --git a/nova/limit/placement.py b/nova/limit/placement.py
index 497986c4ab..eedf7d69e1 100644
--- a/nova/limit/placement.py
+++ b/nova/limit/placement.py
@@ -43,10 +43,8 @@ LEGACY_LIMITS = {
 def _get_placement_usages(
     context: 'nova.context.RequestContext', project_id: str
 ) -> ty.Dict[str, int]:
-    global PLACEMENT_CLIENT
-    if not PLACEMENT_CLIENT:
-        PLACEMENT_CLIENT = report.SchedulerReportClient()
-    return PLACEMENT_CLIENT.get_usages_counts_for_limits(context, project_id)
+    return report.report_client_singleton().get_usages_counts_for_limits(
+        context, project_id)
 
 
 def _get_usage(
diff --git a/nova/quota.py b/nova/quota.py
index b9dd763012..eafad4cd23 100644
--- a/nova/quota.py
+++ b/nova/quota.py
@@ -1348,11 +1348,8 @@ def _instances_cores_ram_count_legacy(context, project_id, user_id=None):
 
 
 def _cores_ram_count_placement(context, project_id, user_id=None):
-    global PLACEMENT_CLIENT
-    if not PLACEMENT_CLIENT:
-        PLACEMENT_CLIENT = report.SchedulerReportClient()
-    return PLACEMENT_CLIENT.get_usages_counts_for_quota(context, project_id,
-                                                        user_id=user_id)
+    return report.report_client_singleton().get_usages_counts_for_quota(
+        context, project_id, user_id=user_id)
 
 
 def _instances_cores_ram_count_api_db_placement(context, project_id,
diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py
index e4d0c8e3db..ff86527cf5 100644
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@@ -52,6 +52,7 @@ AGGREGATE_GENERATION_VERSION = '1.19'
 NESTED_PROVIDER_API_VERSION = '1.14'
 POST_ALLOCATIONS_API_VERSION = '1.13'
 GET_USAGES_VERSION = '1.9'
+PLACEMENTCLIENT = None
 
 AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation'])
 TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation'])
@@ -67,6 +68,51 @@ def warn_limit(self, msg):
         LOG.warning(msg)
 
 
+def report_client_singleton():
+    """Return a reference to the global placement client singleton.
+
+    This initializes the placement client once and returns a reference
+    to that singleton on subsequent calls. Errors are raised
+    (particularly ks_exc.*) but context-specific error messages are
+    logged for consistency.
+    """
+    # NOTE(danms): The report client maintains internal state in the
+    # form of the provider tree, which will be shared across all users
+    # of this global client. That is not a problem now, but in the
+    # future it may be beneficial to fix that. One idea would be to
+    # change the behavior of the client such that the static-config
+    # pieces of the actual keystone client are separate from the
+    # internal state, so that we can return a new object here with a
+    # context-specific local state object, but with the client bits
+    # shared.
+    global PLACEMENTCLIENT
+    if PLACEMENTCLIENT is None:
+        try:
+            PLACEMENTCLIENT = SchedulerReportClient()
+        except ks_exc.EndpointNotFound:
+            LOG.error('The placement API endpoint was not found.')
+            raise
+        except ks_exc.MissingAuthPlugin:
+            LOG.error('No authentication information found for placement API.')
+            raise
+        except ks_exc.Unauthorized:
+            LOG.error('Placement service credentials do not work.')
+            raise
+        except ks_exc.DiscoveryFailure:
+            LOG.error('Discovering suitable URL for placement API failed.')
+            raise
+        except (ks_exc.ConnectFailure,
+                ks_exc.RequestTimeout,
+                ks_exc.GatewayTimeout):
+            LOG.error('Placement API service is not responding.')
+            raise
+        except Exception:
+            LOG.error('Failed to initialize placement client '
+                      '(is keystone available?)')
+            raise
+    return PLACEMENTCLIENT
+
+
 def safe_connect(f):
     @functools.wraps(f)
     def wrapper(self, *a, **k):
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 03df615f6a..10b330653d 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -66,7 +66,7 @@ class SchedulerManager(manager.Manager):
         self.host_manager = host_manager.HostManager()
         self.servicegroup_api = servicegroup.API()
         self.notifier = rpc.get_notifier('scheduler')
-        self.placement_client = report.SchedulerReportClient()
+        self.placement_client = report.report_client_singleton()
 
         super().__init__(service_name='scheduler', *args, **kwargs)
 
diff --git a/nova/scheduler/request_filter.py b/nova/scheduler/request_filter.py
index bd237b06ca..3f96b7a880 100644
--- a/nova/scheduler/request_filter.py
+++ b/nova/scheduler/request_filter.py
@@ -311,7 +311,7 @@ def routed_networks_filter(
 
     # Get the clients we need
     network_api = neutron.API()
-    report_api = report.SchedulerReportClient()
+    report_api = report.report_client_singleton()
 
     for requested_network in requested_networks:
         network_id = None
diff --git a/nova/test.py b/nova/test.py
index a6449c01f0..35fef9cdd7 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -61,6 +61,7 @@ from nova import exception
 from nova import objects
 from nova.objects import base as objects_base
 from nova import quota
+from nova.scheduler.client import report
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.unit import matchers
 from nova import utils
@@ -290,6 +291,9 @@ class TestCase(base.BaseTestCase):
         # instead of only once initialized for test worker
         wsgi_app.init_global_data.reset()
 
+        # Reset the placement client singleton
+        report.PLACEMENTCLIENT = None
+
     def _setup_cells(self):
         """Setup a normal cellsv2 environment.
 
diff --git a/nova/tests/unit/compute/test_api.py b/nova/tests/unit/compute/test_api.py
index 9e85ef633d..87a6a152cd 100644
--- a/nova/tests/unit/compute/test_api.py
+++ b/nova/tests/unit/compute/test_api.py
@@ -7740,16 +7740,13 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
         self.assertTrue(hasattr(self.compute_api, 'host'))
         self.assertEqual(CONF.host, self.compute_api.host)
 
-    @mock.patch('nova.scheduler.client.report.SchedulerReportClient')
+    @mock.patch('nova.scheduler.client.report.report_client_singleton')
     def test_placement_client_init(self, mock_report_client):
         """Tests to make sure that the construction of the placement client
-        only happens once per API class instance.
+        uses the singleton helper, and happens only when needed.
         """
-        self.assertIsNone(self.compute_api._placementclient)
-        # Access the property twice to make sure SchedulerReportClient is
-        # only loaded once.
-        for x in range(2):
-            self.compute_api.placementclient
+        self.assertFalse(mock_report_client.called)
+        self.compute_api.placementclient
         mock_report_client.assert_called_once_with()
 
     def test_validate_host_for_cold_migrate_same_host_fails(self):
diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py
index d8f443843f..df2bd328a3 100644
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@@ -13046,16 +13046,13 @@ class ComputeAPIAggrTestCase(BaseTestCase):
         hosts = aggregate.hosts if 'hosts' in aggregate else None
         self.assertIn(values[0][1][0], hosts)
 
-    @mock.patch('nova.scheduler.client.report.SchedulerReportClient')
+    @mock.patch('nova.scheduler.client.report.report_client_singleton')
     def test_placement_client_init(self, mock_report_client):
         """Tests to make sure that the construction of the placement client
-        only happens once per AggregateAPI class instance.
+        uses the singleton helper, and happens only when needed.
         """
-        self.assertIsNone(self.api._placement_client)
-        # Access the property twice to make sure SchedulerReportClient is
-        # only loaded once.
-        for x in range(2):
-            self.api.placement_client
+        self.assertFalse(mock_report_client.called)
+        self.api.placement_client
         mock_report_client.assert_called_once_with()
 
 
diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py
index 0650c62096..485f187d9e 100644
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@@ -10,6 +10,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 import copy
+import ddt
 import time
 from urllib import parse
 
@@ -150,6 +151,41 @@ class SafeConnectedTestCase(test.NoDBTestCase):
         self.assertTrue(req.called)
 
 
+@ddt.ddt
+class TestSingleton(test.NoDBTestCase):
+    def test_singleton(self):
+        # Make sure we start with a clean slate
+        self.assertIsNone(report.PLACEMENTCLIENT)
+
+        # Make sure the first call creates the singleton, sets it
+        # globally, and returns it
+        client = report.report_client_singleton()
+        self.assertEqual(client, report.PLACEMENTCLIENT)
+
+        # Make sure that a subsequent call returns the same thing
+        # again and that the global is unchanged
+        self.assertEqual(client, report.report_client_singleton())
+        self.assertEqual(client, report.PLACEMENTCLIENT)
+
+    @ddt.data(ks_exc.EndpointNotFound,
+              ks_exc.MissingAuthPlugin,
+              ks_exc.Unauthorized,
+              ks_exc.DiscoveryFailure,
+              ks_exc.ConnectFailure,
+              ks_exc.RequestTimeout,
+              ks_exc.GatewayTimeout,
+              test.TestingException)
+    def test_errors(self, exc):
+        self._test_error(exc)
+
+    @mock.patch.object(report, 'LOG')
+    def _test_error(self, exc, mock_log):
+        with mock.patch.object(report.SchedulerReportClient, '_create_client',
+                               side_effect=exc):
+            self.assertRaises(exc, report.report_client_singleton)
+        mock_log.error.assert_called_once()
+
+
 class TestConstructor(test.NoDBTestCase):
     def setUp(self):
         super(TestConstructor, self).setUp()
author	Dan Smith <dansmith@redhat.com>	2022-08-11 09:50:30 -0700
committer	melanie witt <melwittt@gmail.com>	2022-09-22 22:54:46 +0000
commit	77273f067d96a4ec401c3b36f2922d63c4ad7103 (patch)
tree	3271dbe56ab45fd4f2151317aee453077d689033
parent	6d61fccb8455367aaa37ae7bddf3b8befd3c3d88 (diff)
download	nova-77273f067d96a4ec401c3b36f2922d63c4ad7103.tar.gz