scheduler: re-calculate NUMA on consume_from_instance

This patch narrows down the race window between the filter running and the consumption of resources from the instance after the host has been chosen. It does so by re-calculating the fitted NUMA topology just before consuming it from the chosen host. Thus we avoid any locking, but also make sure that the host_state is kept as up to date as possible for concurrent requests, as there is no opportunity for switching threads inside a consume_from_instance. Several things worth noting: * Scheduler being lock free (and thus racy) does not really affect resources other than PCI and NUMA topology this badly - this is due to complexity of said resources. In order for scheduler decesions to not be based on basically guessing, in case of those two we will likely need to introduce either locking or special heuristics. * There is a lot of repeated code between the 'consume_from_instance' method and the actual filters. This situation should really be fixed but is out of scope for this bug fix (which is about preventing valid requests failing because of races in the scheduler). Change-Id: If0c7ad20506c9dddf4dec1eb64c9d6dd4fb75633 Closes-bug: #1438238 (cherry picked from commit d6b3156a6c89ddff9b149452df34c4b32c50b6c3)
author: Nikola Dipanov <ndipanov@redhat.com> 2015-04-07 20:53:32 +0100
committer: Nikola Dipanov <ndipanov@redhat.com> 2015-04-21 10:37:33 +0100
commit: 880a356e40d327c0af4ce94b5a08fe0cd6fcab5d (patch)
tree: 91c377ea4949433ab53e6d4cda1687cbab330278
parent: 22d7547c6b62fb9dabd861e4941edd34eedabfc6 (diff)
download: nova-880a356e40d327c0af4ce94b5a08fe0cd6fcab5d.tar.gz
4 files changed, 35 insertions, 44 deletions
diff --git a/nova/scheduler/filters/numa_topology_filter.py b/nova/scheduler/filters/numa_topology_filter.py
index 769f191cbe..938bf112cc 100644
--- a/nova/scheduler/filters/numa_topology_filter.py
+++ b/nova/scheduler/filters/numa_topology_filter.py
@@ -47,7 +47,6 @@ class NUMATopologyFilter(filters.BaseHostFilter):
             if not instance_topology:
                 return False
             host_state.limits['numa_topology'] = limits
-            host_state.instance_numa_topology = instance_topology
             return True
         elif requested_topology:
             return False
diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py
index ab945b6ba2..9b2acf7a6f 100644
--- a/nova/scheduler/host_manager.py
+++ b/nova/scheduler/host_manager.py
@@ -125,8 +125,8 @@ class HostState(object):
         self.free_disk_mb = 0
         self.vcpus_total = 0
         self.vcpus_used = 0
+        self.pci_stats = None
         self.numa_topology = None
-        self.instance_numa_topology = None
 
         # Additional host information from the compute node stats:
         self.num_instances = 0
@@ -212,7 +212,6 @@ class HostState(object):
         self.vcpus_used = compute.vcpus_used
         self.updated = compute.updated_at
         self.numa_topology = compute.numa_topology
-        self.instance_numa_topology = None
         self.pci_stats = pci_stats.PciDeviceStats(
             compute.pci_device_pools)
 
@@ -269,14 +268,18 @@ class HostState(object):
         # to a primitive early on, and is thus a dict here. Convert this when
         # we get an object all the way to this path.
         if pci_requests and pci_requests['requests'] and self.pci_stats:
-            self.pci_stats.apply_requests(pci_requests.requests,
-                                          instance_cells)
+            pci_requests = pci_requests.requests
+            self.pci_stats.apply_requests(pci_requests, instance_cells)
 
         # Calculate the numa usage
-        instance['numa_topology'] = self.instance_numa_topology
-        updated_numa_topology = hardware.get_host_numa_usage_from_instance(
+        host_numa_topology, _fmt = hardware.host_topology_and_format_from_host(
+                                self)
+        instance['numa_topology'] = hardware.numa_fit_instance_to_host(
+            host_numa_topology, instance_numa_topology,
+            limits=self.limits.get('numa_topology'),
+            pci_requests=pci_requests, pci_stats=self.pci_stats)
+        self.numa_topology = hardware.get_host_numa_usage_from_instance(
                 self, instance)
-        self.numa_topology = updated_numa_topology
 
         vm_state = instance.get('vm_state', vm_states.BUILDING)
         task_state = instance.get('task_state')
diff --git a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
index 8a842a925f..2c884a53e9 100644
--- a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
+++ b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
@@ -42,8 +42,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None})
         self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsInstance(host.instance_numa_topology,
-                              objects.InstanceNUMATopology)
 
     def test_numa_topology_filter_numa_instance_no_numa_host_fail(self):
         instance_topology = objects.InstanceNUMATopology(
@@ -59,7 +57,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1', {'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsNone(host.instance_numa_topology)
 
     def test_numa_topology_filter_numa_host_no_numa_instance_pass(self):
         instance = fake_instance.fake_instance_obj(mock.sentinel.ctx)
@@ -71,7 +68,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY})
         self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsNone(host.instance_numa_topology)
 
     def test_numa_topology_filter_fail_fit(self):
         instance_topology = objects.InstanceNUMATopology(
@@ -89,7 +85,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsNone(host.instance_numa_topology)
 
     def test_numa_topology_filter_fail_memory(self):
         self.flags(ram_allocation_ratio=1)
@@ -109,7 +104,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsNone(host.instance_numa_topology)
 
     def test_numa_topology_filter_fail_cpu(self):
         self.flags(cpu_allocation_ratio=1)
@@ -128,7 +122,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsNone(host.instance_numa_topology)
 
     def test_numa_topology_filter_pass_set_limit(self):
         self.flags(cpu_allocation_ratio=21)
@@ -148,8 +141,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None})
         self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
-        self.assertIsInstance(host.instance_numa_topology,
-                              objects.InstanceNUMATopology)
         limits = host.limits['numa_topology']
         self.assertEqual(limits.cpu_allocation_ratio, 21)
         self.assertEqual(limits.ram_allocation_ratio, 1.3)
diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py
index 7115582f9d..9217c26cbd 100644
--- a/nova/tests/unit/scheduler/test_host_manager.py
+++ b/nova/tests/unit/scheduler/test_host_manager.py
@@ -864,27 +864,43 @@ class HostStateTestCase(test.NoDBTestCase):
         self.assertEqual(hyper_ver_int, host.hypervisor_version)
 
     @mock.patch('nova.virt.hardware.get_host_numa_usage_from_instance')
-    def test_stat_consumption_from_instance(self, numa_usage_mock):
+    @mock.patch('nova.virt.hardware.numa_fit_instance_to_host')
+    @mock.patch('nova.virt.hardware.instance_topology_from_instance')
+    @mock.patch('nova.virt.hardware.host_topology_and_format_from_host')
+    def test_stat_consumption_from_instance(self, host_topo_mock,
+                                            instance_topo_mock,
+                                            numa_fit_mock,
+                                            numa_usage_mock):
+        fake_numa_topology = mock.Mock()
+        host_topo_mock.return_value = ('fake-host-topology', None)
         numa_usage_mock.return_value = 'fake-consumed-once'
-        host = host_manager.HostState("fakehost", "fakenode")
-        host.instance_numa_topology = 'fake-instance-topology'
-
+        numa_fit_mock.return_value = 'fake-fitted-once'
+        instance_topo_mock.return_value = fake_numa_topology
         instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                         project_id='12345', vm_state=vm_states.BUILDING,
                         task_state=task_states.SCHEDULING, os_type='Linux',
-                        uuid='fake-uuid', numa_topology=None)
+                        uuid='fake-uuid',
+                        numa_topology=fake_numa_topology)
+        host = host_manager.HostState("fakehost", "fakenode")
+
         host.consume_from_instance(instance)
+        numa_fit_mock.assert_called_once_with('fake-host-topology',
+                                              fake_numa_topology,
+                                              limits=None, pci_requests=None,
+                                              pci_stats=None)
         numa_usage_mock.assert_called_once_with(host, instance)
         self.assertEqual('fake-consumed-once', host.numa_topology)
-        self.assertEqual('fake-instance-topology', instance['numa_topology'])
+        self.assertEqual('fake-fitted-once', instance['numa_topology'])
 
-        numa_usage_mock.return_value = 'fake-consumed-twice'
         instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                         project_id='12345', vm_state=vm_states.PAUSED,
                         task_state=None, os_type='Linux',
-                        uuid='fake-uuid', numa_topology=None)
+                        uuid='fake-uuid',
+                        numa_topology=fake_numa_topology)
+        numa_usage_mock.return_value = 'fake-consumed-twice'
+        numa_fit_mock.return_value = 'fake-fitted-twice'
         host.consume_from_instance(instance)
-        self.assertEqual('fake-instance-topology', instance['numa_topology'])
+        self.assertEqual('fake-fitted-twice', instance['numa_topology'])
 
         self.assertEqual(2, host.num_instances)
         self.assertEqual(1, host.num_io_ops)
@@ -926,21 +942,3 @@ class HostStateTestCase(test.NoDBTestCase):
         self.assertEqual('string2', host.metrics['res2'].value)
         self.assertEqual('source2', host.metrics['res2'].source)
         self.assertIsInstance(host.numa_topology, six.string_types)
-
-    def test_update_from_compute_node_resets_stashed_numa(self):
-        hyper_ver_int = utils.convert_version_to_int('6.0.0')
-        compute = objects.ComputeNode(
-            memory_mb=0, free_disk_gb=0, local_gb=0, metrics=None,
-            local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
-            disk_available_least=None,
-            updated_at=None, host_ip='127.0.0.1',
-            hypervisor_type='htype',
-            hypervisor_hostname='hostname', cpu_info='cpu_info',
-            supported_hv_specs=[],
-            hypervisor_version=hyper_ver_int,
-            numa_topology=fakes.NUMA_TOPOLOGY._to_json(),
-            stats=None, pci_device_pools=None)
-        host = host_manager.HostState("fakehost", "fakenode")
-        host.instance_numa_topology = 'fake-instance-topology'
-        host.update_from_compute_node(compute)
-        self.assertIsNone(host.instance_numa_topology)
author	Nikola Dipanov <ndipanov@redhat.com>	2015-04-07 20:53:32 +0100
committer	Nikola Dipanov <ndipanov@redhat.com>	2015-04-21 10:37:33 +0100
commit	880a356e40d327c0af4ce94b5a08fe0cd6fcab5d (patch)
tree	91c377ea4949433ab53e6d4cda1687cbab330278
parent	22d7547c6b62fb9dabd861e4941edd34eedabfc6 (diff)
download	nova-880a356e40d327c0af4ce94b5a08fe0cd6fcab5d.tar.gz