PCI NUMA filtering

Add pci device numa awareness to scheduling logic. The NUMA topology filter is modified to consider PCI device NUMA node locality during guest placement. When a VM has a defined NUMA topology it will be placed on host numa nodes that have requested pci devices. If a VM doesn't have a NUMA topology if possible it will be placed in one host NUMA node that has requested pci devices. Implements: blueprint input-output-based-numa-scheduling Change-Id: Id076a76d05f3d64facbeb60a7be3d4b60f817b94 Co-Authored-By: James Chapman <james.p.chapman@intel.com>
author: Przemyslaw Czesnowicz <przemyslaw.czesnowicz@intel.com> 2014-11-28 17:37:26 +0000
committer: Przemyslaw Czesnowicz <przemyslaw.czesnowicz@intel.com> 2015-02-04 14:31:28 +0000
commit: 08713a8e3e03b497ead9913659d31daadcae8ed9 (patch)
tree: 890a9704781922c3c4fdf68fdafdf5d7838129e8
parent: a219393c8c6cb935721e45271f8108c0a51571db (diff)
download: nova-08713a8e3e03b497ead9913659d31daadcae8ed9.tar.gz
15 files changed, 485 insertions, 66 deletions
diff --git a/nova/compute/claims.py b/nova/compute/claims.py
index 1469d91ecf..1cb03779d7 100644
--- a/nova/compute/claims.py
+++ b/nova/compute/claims.py
@@ -206,12 +206,27 @@ class Claim(NopClaim):
         if host_topology:
             host_topology = objects.NUMATopology.obj_from_db_obj(
                     host_topology)
+            pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
+                                        self.context, self.instance['uuid'])
+
+            pci_stats = None
+            if pci_requests.requests:
+                pci_stats = self.tracker.pci_tracker.stats
+
             instance_topology = (
                     hardware.numa_fit_instance_to_host(
                         host_topology, requested_topology,
-                        limits_topology=limit))
+                        limits_topology=limit,
+                        pci_requests=pci_requests.requests,
+                        pci_stats=pci_stats))
+
             if requested_topology and not instance_topology:
-                return (_("Requested instance NUMA topology cannot fit "
+                if pci_requests.requests:
+                    return (_("Requested instance NUMA topology together with"
+                              " requested PCI devices cannot fit the given"
+                              " host NUMA topology"))
+                else:
+                    return (_("Requested instance NUMA topology cannot fit "
                           "the given host NUMA topology"))
             elif instance_topology:
                 self.claimed_numa_topology = instance_topology
diff --git a/nova/pci/manager.py b/nova/pci/manager.py
index e6c17db3e7..22d3960d53 100644
--- a/nova/pci/manager.py
+++ b/nova/pci/manager.py
@@ -25,6 +25,7 @@ from nova import objects
 from nova.openstack.common import log as logging
 from nova.pci import device
 from nova.pci import stats
+from nova.virt import hardware
 
 LOG = logging.getLogger(__name__)
 
@@ -155,11 +156,23 @@ class PciDevTracker(object):
             context, instance)
         if not pci_requests.requests:
             return None
-        devs = self.stats.consume_requests(pci_requests.requests)
+        instance_numa_topology = hardware.instance_topology_from_instance(
+            instance)
+        instance_cells = None
+        if instance_numa_topology:
+            instance_cells = instance_numa_topology.cells
+
+        devs = self.stats.consume_requests(pci_requests.requests,
+                                           instance_cells)
         if not devs:
             raise exception.PciDeviceRequestFailed(pci_requests)
         for dev in devs:
             device.claim(dev, instance)
+        if instance_numa_topology and any(
+                                        dev.numa_node is None for dev in devs):
+            LOG.warning(_LW("Assigning a pci device without numa affinity to"
+            "instance %(instance)s which has numa topology"),
+                        {'instance': instance['uuid']})
         return devs
 
     def _allocate_instance(self, instance, devs):
diff --git a/nova/pci/stats.py b/nova/pci/stats.py
index 03fe60566a..41737812f1 100644
--- a/nova/pci/stats.py
+++ b/nova/pci/stats.py
@@ -52,7 +52,7 @@ class PciDeviceStats(object):
     This summary information will be helpful for cloud management also.
     """
 
-    pool_keys = ['product_id', 'vendor_id']
+    pool_keys = ['product_id', 'vendor_id', 'numa_node']
 
     def __init__(self, stats=None):
         super(PciDeviceStats, self).__init__()
@@ -135,7 +135,7 @@ class PciDeviceStats(object):
             free_devs.extend(pool['devices'])
         return free_devs
 
-    def consume_requests(self, pci_requests):
+    def consume_requests(self, pci_requests, numa_cells=None):
         alloc_devices = []
         for request in pci_requests:
             count = request.count
@@ -143,6 +143,8 @@ class PciDeviceStats(object):
             # For now, keep the same algorithm as during scheduling:
             # a spec may be able to match multiple pools.
             pools = self._filter_pools_for_spec(self.pools, spec)
+            if numa_cells:
+                pools = self._filter_pools_for_numa_cells(pools, numa_cells)
             # Failed to allocate the required number of devices
             # Return the devices already allocated back to their pools
             if sum([pool['count'] for pool in pools]) < count:
@@ -176,9 +178,24 @@ class PciDeviceStats(object):
         return [pool for pool in pools
                 if utils.pci_device_prop_match(pool, request_specs)]
 
-    def _apply_request(self, pools, request):
+    @staticmethod
+    def _filter_pools_for_numa_cells(pools, numa_cells):
+        # Some systems don't report numa node info for pci devices, in
+        # that case None is reported in pci_device.numa_node, by adding None
+        # to numa_cells we allow assigning those devices to instances with
+        # numa topology
+        numa_cells = [None] + [cell.id for cell in numa_cells]
+        # filter out pools which numa_node is not included in numa_cells
+        return [pool for pool in pools if any(utils.pci_device_prop_match(
+                                pool, [{'numa_node': cell}])
+                                              for cell in numa_cells)]
+
+    def _apply_request(self, pools, request, numa_cells=None):
         count = request.count
         matching_pools = self._filter_pools_for_spec(pools, request.spec)
+        if numa_cells:
+            matching_pools = self._filter_pools_for_numa_cells(matching_pools,
+                                                          numa_cells)
         if sum([pool['count'] for pool in matching_pools]) < count:
             return False
         else:
@@ -188,25 +205,31 @@ class PciDeviceStats(object):
                     break
         return True
 
-    def support_requests(self, requests):
+    def support_requests(self, requests, numa_cells=None):
         """Check if the pci requests can be met.
 
         Scheduler checks compute node's PCI stats to decide if an
         instance can be scheduled into the node. Support does not
         mean real allocation.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
         """
         # note (yjiang5): this function has high possibility to fail,
         # so no exception should be triggered for performance reason.
         pools = copy.deepcopy(self.pools)
-        return all([self._apply_request(pools, r) for r in requests])
+        return all([self._apply_request(pools, r, numa_cells)
+                        for r in requests])
 
-    def apply_requests(self, requests):
+    def apply_requests(self, requests, numa_cells=None):
         """Apply PCI requests to the PCI stats.
 
         This is used in multiple instance creation, when the scheduler has to
         maintain how the resources are consumed by the instances.
+        If numa_cells is provided then only devices contained in
+        those nodes are considered.
         """
-        if not all([self._apply_request(self.pools, r) for r in requests]):
+        if not all([self._apply_request(self.pools, r, numa_cells)
+                                            for r in requests]):
             raise exception.PciDeviceRequestFailed(requests=requests)
 
     @staticmethod
diff --git a/nova/scheduler/filters/numa_topology_filter.py b/nova/scheduler/filters/numa_topology_filter.py
index 1721f01a87..e0d2fe8d59 100644
--- a/nova/scheduler/filters/numa_topology_filter.py
+++ b/nova/scheduler/filters/numa_topology_filter.py
@@ -31,6 +31,9 @@ class NUMATopologyFilter(filters.BaseHostFilter):
         requested_topology = hardware.instance_topology_from_instance(instance)
         host_topology, _fmt = hardware.host_topology_and_format_from_host(
                 host_state)
+        pci_requests = filter_properties.get('pci_requests')
+        if pci_requests:
+            pci_requests = pci_requests.requests
         if requested_topology and host_topology:
             limit_cells = []
             for cell in host_topology.cells:
@@ -42,7 +45,9 @@ class NUMATopologyFilter(filters.BaseHostFilter):
             limits = hardware.VirtNUMALimitTopology(cells=limit_cells)
             instance_topology = (hardware.numa_fit_instance_to_host(
                         host_topology, requested_topology,
-                        limits_topology=limits))
+                        limits_topology=limits,
+                        pci_requests=pci_requests,
+                        pci_stats=host_state.pci_stats))
             if not instance_topology:
                 return False
             host_state.limits['numa_topology'] = limits.to_json()
diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py
index 5f81fca642..65e2ff907f 100644
--- a/nova/scheduler/host_manager.py
+++ b/nova/scheduler/host_manager.py
@@ -243,13 +243,20 @@ class HostState(object):
         # Track number of instances on host
         self.num_instances += 1
 
+        instance_numa_topology = hardware.instance_topology_from_instance(
+            instance)
+        instance_cells = None
+        if instance_numa_topology:
+            instance_cells = instance_numa_topology.cells
+
         pci_requests = instance.get('pci_requests')
         # NOTE(danms): Instance here is still a dict, which is converted from
         # an object. Thus, it has a .pci_requests field, which gets converted
         # to a primitive early on, and is thus a dict here. Convert this when
         # we get an object all the way to this path.
         if pci_requests and pci_requests['requests'] and self.pci_stats:
-            self.pci_stats.apply_requests(pci_requests.requests)
+            self.pci_stats.apply_requests(pci_requests.requests,
+                                          instance_cells)
 
         # Calculate the numa usage
         updated_numa_topology = hardware.get_host_numa_usage_from_instance(
diff --git a/nova/tests/unit/compute/test_claims.py b/nova/tests/unit/compute/test_claims.py
index cdcde67f8e..7eac61bec4 100644
--- a/nova/tests/unit/compute/test_claims.py
+++ b/nova/tests/unit/compute/test_claims.py
@@ -192,6 +192,7 @@ class ClaimTestCase(test.NoDBTestCase):
             'address': 'a',
             'product_id': 'p',
             'vendor_id': 'v',
+            'numa_node': 0,
             'status': 'available'}
         self.tracker.new_pci_tracker()
         self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -209,6 +210,7 @@ class ClaimTestCase(test.NoDBTestCase):
             'address': 'a',
             'product_id': 'p',
             'vendor_id': 'v1',
+            'numa_node': 1,
             'status': 'available'}
         self.tracker.new_pci_tracker()
         self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -226,6 +228,7 @@ class ClaimTestCase(test.NoDBTestCase):
             'address': 'a',
             'product_id': 'p',
             'vendor_id': 'v',
+            'numa_node': 0,
             'status': 'available'}
         self.tracker.new_pci_tracker()
         self.tracker.pci_tracker.set_hvdevs([dev_dict])
@@ -269,6 +272,83 @@ class ClaimTestCase(test.NoDBTestCase):
         self._claim(limits={'numa_topology': limit_topo.to_json()},
                     numa_topology=huge_instance)
 
+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 1,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict])
+        request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self._claim(numa_topology= huge_instance)
+
+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci_fail(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 1,
+            'status': 'available'}
+        dev_dict2 = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': 2,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict, dev_dict2])
+
+        request = objects.InstancePCIRequest(count=2,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self.assertRaises(exception.ComputeResourcesUnavailable,
+                          self._claim,
+                          numa_topology=huge_instance)
+
+    @pci_fakes.patch_pci_whitelist
+    def test_numa_topology_with_pci_no_numa_info(self, mock_get):
+        dev_dict = {
+            'compute_node_id': 1,
+            'address': 'a',
+            'product_id': 'p',
+            'vendor_id': 'v',
+            'numa_node': None,
+            'status': 'available'}
+        self.tracker.new_pci_tracker()
+        self.tracker.pci_tracker.set_hvdevs([dev_dict])
+
+        request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': 'v', 'product_id': 'p'}])
+        mock_get.return_value = objects.InstancePCIRequests(
+            requests=[request])
+
+        huge_instance = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    id=1, cpuset=set([1, 2]), memory=512)])
+
+        self._claim(numa_topology= huge_instance)
+
     def test_abort(self, mock_get):
         claim = self._abort()
         self.assertTrue(claim.tracker.icalled)
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 3a4eeb820d..855054354e 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -101,7 +101,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                 'product_id': '0443',
                 'vendor_id': '8086',
                 'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
             },
             {
                 'label': 'label_8086_0443',
@@ -111,7 +112,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                 'product_id': '0443',
                 'vendor_id': '8086',
                 'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
             },
             {
                 'label': 'label_8086_0443',
@@ -121,7 +123,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                 'product_id': '0443',
                 'vendor_id': '8086',
                 'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
             },
             {
                 'label': 'label_8086_0123',
@@ -131,7 +134,8 @@ class FakeVirtDriver(driver.ComputeDriver):
                 'product_id': '0123',
                 'vendor_id': '8086',
                 'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': 1
             },
             {
                 'label': 'label_8086_7891',
@@ -141,19 +145,22 @@ class FakeVirtDriver(driver.ComputeDriver):
                 'product_id': '7891',
                 'vendor_id': '8086',
                 'status': 'available',
-                'extra_k1': 'v1'
+                'extra_k1': 'v1',
+                'numa_node': None
             },
         ] if self.pci_support else []
         self.pci_stats = [
             {
                 'count': 2,
                 'vendor_id': '8086',
-                'product_id': '0443'
+                'product_id': '0443',
+                'numa_node': 1
             },
             {
                 'count': 1,
                 'vendor_id': '8086',
-                'product_id': '7891'
+                'product_id': '7891',
+                'numa_node': None
             },
         ] if self.pci_support else []
         if stats is not None:
diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py
index 08cf42e82c..c612a1105f 100644
--- a/nova/tests/unit/pci/test_manager.py
+++ b/nova/tests/unit/pci/test_manager.py
@@ -36,7 +36,8 @@ fake_pci = {
     'product_id': 'p',
     'vendor_id': 'v',
     'request_id': None,
-    'status': 'available'}
+    'status': 'available',
+    'numa_node': 0}
 fake_pci_1 = dict(fake_pci, address='0000:00:00.2',
                   product_id='p1', vendor_id='v1')
 fake_pci_2 = dict(fake_pci, address='0000:00:00.3')
@@ -63,8 +64,10 @@ fake_db_dev = {
     }
 fake_db_dev_1 = dict(fake_db_dev, vendor_id='v1',
                      product_id='p1', id=2,
-                     address='0000:00:00.2')
-fake_db_dev_2 = dict(fake_db_dev, id=3, address='0000:00:00.3')
+                     address='0000:00:00.2',
+                     numa_node=0)
+fake_db_dev_2 = dict(fake_db_dev, id=3, address='0000:00:00.3',
+                     numa_node=None)
 fake_db_devs = [fake_db_dev, fake_db_dev_1, fake_db_dev_2]
 
 
@@ -82,6 +85,7 @@ class PciDevTrackerTestCase(test.TestCase):
         self.inst.pci_devices = objects.PciDeviceList()
         self.inst.vm_state = vm_states.ACTIVE
         self.inst.task_state = None
+        self.inst.numa_topology = None
 
     def _fake_get_pci_devices(self, ctxt, node_id):
         return fake_db_devs[:]
@@ -119,7 +123,7 @@ class PciDevTrackerTestCase(test.TestCase):
         free_devs = self.tracker.pci_stats.get_free_devs()
         self.assertEqual(len(free_devs), 3)
         self.assertEqual(self.tracker.stale.keys(), [])
-        self.assertEqual(len(self.tracker.stats.pools), 2)
+        self.assertEqual(len(self.tracker.stats.pools), 3)
         self.assertEqual(self.tracker.node_id, 1)
 
     def test_pcidev_tracker_create_no_nodeid(self):
@@ -186,6 +190,36 @@ class PciDevTrackerTestCase(test.TestCase):
                           self.inst)
 
     @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
+    def test_update_pci_for_instance_with_numa(self, mock_get):
+        fake_db_dev_3 = dict(fake_db_dev_1, id=4, address='0000:00:00.4')
+        fake_devs_numa = copy.deepcopy(fake_db_devs)
+        fake_devs_numa.append(fake_db_dev_3)
+        self.tracker = manager.PciDevTracker(1)
+        self.tracker.set_hvdevs(fake_devs_numa)
+        pci_requests = copy.deepcopy(fake_pci_requests)[:1]
+        pci_requests[0]['count'] = 2
+        self._create_pci_requests_object(mock_get, pci_requests)
+        self.inst.numa_topology = objects.InstanceNUMATopology(
+                    cells=[objects.InstanceNUMACell(
+                        id=1, cpuset=set([1, 2]), memory=512)])
+        self.tracker.update_pci_for_instance(None, self.inst)
+        free_devs = self.tracker.pci_stats.get_free_devs()
+        self.assertEqual(2, len(free_devs))
+        self.assertEqual('v1', free_devs[0]['vendor_id'])
+        self.assertEqual('v1', free_devs[1]['vendor_id'])
+
+    @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
+    def test_update_pci_for_instance_with_numa_fail(self, mock_get):
+        self._create_pci_requests_object(mock_get, fake_pci_requests)
+        self.inst.numa_topology = objects.InstanceNUMATopology(
+                    cells=[objects.InstanceNUMACell(
+                        id=1, cpuset=set([1, 2]), memory=512)])
+        self.assertRaises(exception.PciDeviceRequestFailed,
+                          self.tracker.update_pci_for_instance,
+                          None,
+                          self.inst)
+
+    @mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
     def test_update_pci_for_instance_deleted(self, mock_get):
         self._create_pci_requests_object(mock_get, fake_pci_requests)
         self.tracker.update_pci_for_instance(None, self.inst)
diff --git a/nova/tests/unit/pci/test_stats.py b/nova/tests/unit/pci/test_stats.py
index 6960cf93cf..840a4e8c8e 100644
--- a/nova/tests/unit/pci/test_stats.py
+++ b/nova/tests/unit/pci/test_stats.py
@@ -22,7 +22,7 @@ from nova.pci import stats
 from nova.pci import whitelist
 from nova import test
 from nova.tests.unit.pci import fakes
-
+from nova.virt import hardware
 fake_pci_1 = {
     'compute_node_id': 1,
     'address': '0000:00:00.1',
@@ -31,16 +31,22 @@ fake_pci_1 = {
     'status': 'available',
     'extra_k1': 'v1',
     'request_id': None,
+    'numa_node': 0,
     }
 
 
 fake_pci_2 = dict(fake_pci_1, vendor_id='v2',
                   product_id='p2',
-                  address='0000:00:00.2')
+                  address='0000:00:00.2',
+                  numa_node=1)
 
 
 fake_pci_3 = dict(fake_pci_1, address='0000:00:00.3')
 
+fake_pci_4 = dict(fake_pci_1, vendor_id='v3',
+                  product_id='p3',
+                  address='0000:00:00.3',
+                  numa_node= None)
 
 pci_requests = [objects.InstancePCIRequest(count=1,
                     spec=[{'vendor_id': 'v1'}]),
@@ -59,9 +65,11 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
         self.fake_dev_1 = objects.PciDevice.create(fake_pci_1)
         self.fake_dev_2 = objects.PciDevice.create(fake_pci_2)
         self.fake_dev_3 = objects.PciDevice.create(fake_pci_3)
+        self.fake_dev_4 = objects.PciDevice.create(fake_pci_4)
 
         map(self.pci_stats.add_device,
-            [self.fake_dev_1, self.fake_dev_2, self.fake_dev_3])
+            [self.fake_dev_1, self.fake_dev_2,
+             self.fake_dev_3, self.fake_dev_4])
 
     def setUp(self):
         super(PciDeviceStatsTestCase, self).setUp()
@@ -72,15 +80,15 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
         self._create_fake_devs()
 
     def test_add_device(self):
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
         self.assertEqual(set([d['vendor_id'] for d in self.pci_stats]),
-                         set(['v1', 'v2']))
+                         set(['v1', 'v2', 'v3']))
         self.assertEqual(set([d['count'] for d in self.pci_stats]),
                          set([1, 2]))
 
     def test_remove_device(self):
         self.pci_stats.remove_device(self.fake_dev_2)
-        self.assertEqual(len(self.pci_stats.pools), 1)
+        self.assertEqual(len(self.pci_stats.pools), 2)
         self.assertEqual(self.pci_stats.pools[0]['count'], 2)
         self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')
 
@@ -94,29 +102,29 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
         m = jsonutils.dumps(self.pci_stats)
         new_stats = stats.PciDeviceStats(m)
 
-        self.assertEqual(len(new_stats.pools), 2)
+        self.assertEqual(len(new_stats.pools), 3)
         self.assertEqual(set([d['count'] for d in new_stats]),
                          set([1, 2]))
         self.assertEqual(set([d['vendor_id'] for d in new_stats]),
-                         set(['v1', 'v2']))
+                         set(['v1', 'v2', 'v3']))
 
     def test_support_requests(self):
         self.assertEqual(self.pci_stats.support_requests(pci_requests),
                          True)
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
         self.assertEqual(set([d['count'] for d in self.pci_stats]),
                          set((1, 2)))
 
     def test_support_requests_failed(self):
         self.assertEqual(
             self.pci_stats.support_requests(pci_requests_multiple), False)
-        self.assertEqual(len(self.pci_stats.pools), 2)
+        self.assertEqual(len(self.pci_stats.pools), 3)
         self.assertEqual(set([d['count'] for d in self.pci_stats]),
                          set([1, 2]))
 
     def test_apply_requests(self):
         self.pci_stats.apply_requests(pci_requests)
-        self.assertEqual(len(self.pci_stats.pools), 1)
+        self.assertEqual(len(self.pci_stats.pools), 2)
         self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')
         self.assertEqual(self.pci_stats.pools[0]['count'], 1)
 
@@ -140,6 +148,47 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
             self.pci_stats.consume_requests,
             pci_requests_multiple)
 
+    def test_support_requests_numa(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None),
+                 hardware.VirtNUMATopologyCell(1, None, None)]
+        self.assertEqual(True, self.pci_stats.support_requests(
+                                                        pci_requests, cells))
+
+    def test_support_requests_numa_failed(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        self.assertEqual(False, self.pci_stats.support_requests(
+                                                        pci_requests, cells))
+
+    def test_support_requests_no_numa_info(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        pci_request = [objects.InstancePCIRequest(count=1,
+                    spec=[{'vendor_id': 'v3'}])]
+        self.assertEqual(True, self.pci_stats.support_requests(
+                                                        pci_request, cells))
+
+    def test_consume_requests_numa(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None),
+                 hardware.VirtNUMATopologyCell(1, None, None)]
+        devs = self.pci_stats.consume_requests(pci_requests, cells)
+        self.assertEqual(2, len(devs))
+        self.assertEqual(set(['v1', 'v2']),
+                         set([dev['vendor_id'] for dev in devs]))
+
+    def test_consume_requests_numa_failed(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        self.assertRaises(exception.PciDeviceRequestFailed,
+            self.pci_stats.consume_requests,
+            pci_requests, cells)
+
+    def test_consume_requests_no_numa_info(self):
+        cells = [hardware.VirtNUMATopologyCell(0, None, None)]
+        pci_request = [objects.InstancePCIRequest(count=1,
+                    spec=[{'vendor_id': 'v3'}])]
+        devs = self.pci_stats.consume_requests(pci_request, cells)
+        self.assertEqual(1, len(devs))
+        self.assertEqual(set(['v3']),
+                         set([dev['vendor_id'] for dev in devs]))
+
 
 @mock.patch.object(whitelist, 'get_pci_devices_filter')
 class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
@@ -163,7 +212,8 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
                        'vendor_id': '1137',
                        'product_id': '0071',
                        'status': 'available',
-                       'request_id': None}
+                       'request_id': None,
+                       'numa_node': 0}
             self.pci_tagged_devices.append(objects.PciDevice.create(pci_dev))
 
         self.pci_untagged_devices = []
@@ -173,7 +223,8 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
                        'vendor_id': '1137',
                        'product_id': '0072',
                        'status': 'available',
-                       'request_id': None}
+                       'request_id': None,
+                       'numa_node': 0}
             self.pci_untagged_devices.append(objects.PciDevice.create(pci_dev))
 
         map(self.pci_stats.add_device, self.pci_tagged_devices)
diff --git a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
index 58b4148374..17a2948d78 100644
--- a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
+++ b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
@@ -40,7 +40,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
         self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
 
     def test_numa_topology_filter_numa_instance_no_numa_host_fail(self):
@@ -55,7 +56,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
             'request_spec': {
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
-        host = fakes.FakeHostState('host1', 'node1', {})
+        host = fakes.FakeHostState('host1', 'node1', {'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
 
     def test_numa_topology_filter_numa_host_no_numa_instance_pass(self):
@@ -82,7 +83,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
 
     def test_numa_topology_filter_fail_memory(self):
@@ -100,7 +102,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
 
     def test_numa_topology_filter_fail_cpu(self):
@@ -117,7 +120,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
 
     def test_numa_topology_filter_pass_set_limit(self):
@@ -135,7 +139,8 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                 'instance_properties': jsonutils.to_primitive(
                     obj_base.obj_to_primitive(instance))}}
         host = fakes.FakeHostState('host1', 'node1',
-                                   {'numa_topology': fakes.NUMA_TOPOLOGY})
+                                   {'numa_topology': fakes.NUMA_TOPOLOGY,
+                                    'pci_stats': None})
         self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
         limits_topology = hardware.VirtNUMALimitTopology.from_json(
                 host.limits['numa_topology'])
diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py
index d5b0315228..c45f86a7f8 100644
--- a/nova/tests/unit/scheduler/test_host_manager.py
+++ b/nova/tests/unit/scheduler/test_host_manager.py
@@ -492,7 +492,7 @@ class HostStateTestCase(test.NoDBTestCase):
         instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                         project_id='12345', vm_state=vm_states.BUILDING,
                         task_state=task_states.SCHEDULING, os_type='Linux',
-                        uuid='fake-uuid')
+                        uuid='fake-uuid', numa_topology=None)
         host.consume_from_instance(instance)
         numa_usage_mock.assert_called_once_with(host, instance)
         self.assertEqual('fake-consumed-once', host.numa_topology)
@@ -501,7 +501,7 @@ class HostStateTestCase(test.NoDBTestCase):
         instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
                         project_id='12345', vm_state=vm_states.PAUSED,
                         task_state=None, os_type='Linux',
-                        uuid='fake-uuid')
+                        uuid='fake-uuid', numa_topology=None)
         host.consume_from_instance(instance)
 
         self.assertEqual(2, host.num_instances)
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index 214bb2938a..71ab76a06d 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -1183,6 +1183,144 @@ class LibvirtConnTestCase(test.NoDBTestCase):
                               'something', 'something'))
 
     @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_1pci_fits(self, mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=1, vcpus=2, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=1)
+        pci_device = objects.PciDevice(**pci_device_info)
+
+        with contextlib.nested(
+                mock.patch.object(host.Host, 'has_min_version',
+                                  return_value=True),
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                        random, 'choice', side_effect=lambda cells: cells[0]),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device])):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertIsNone(instance_ref.numa_topology)
+            self.assertEqual(set([2, 3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
+    @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_pci_no_numa_info(self,
+                                                                  mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=1, vcpus=2, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=None)
+        pci_device = objects.PciDevice(**pci_device_info)
+
+        with contextlib.nested(
+                mock.patch.object(host.Host, 'has_min_version',
+                                  return_value=True),
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                    hardware, 'get_vcpu_pin_set', return_value=set([3])),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device])):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertEqual(set([3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
+    @mock.patch.object(objects.Flavor, 'get_by_id')
+    def test_get_guest_config_numa_host_instance_2pci_no_fit(self,
+                                                             mock_flavor):
+        instance_ref = objects.Instance(**self.test_instance)
+        image_meta = {}
+        flavor = objects.Flavor(memory_mb=4096, vcpus=4, root_gb=496,
+                                ephemeral_gb=8128, swap=33550336, name='fake',
+                                extra_specs={})
+        mock_flavor.return_value = flavor
+
+        caps = vconfig.LibvirtConfigCaps()
+        caps.host = vconfig.LibvirtConfigCapsHost()
+        caps.host.cpu = vconfig.LibvirtConfigCPU()
+        caps.host.cpu.arch = "x86_64"
+        caps.host.topology = self._fake_caps_numa_topology()
+
+        conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
+        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
+                                            instance_ref,
+                                            image_meta)
+        pci_device_info = dict(test_pci_device.fake_db_dev)
+        pci_device_info.update(compute_node_id=1,
+                               label='fake',
+                               status='available',
+                               address='0000:00:00.1',
+                               instance_uuid=None,
+                               request_id=None,
+                               extra_info={},
+                               numa_node=1)
+        pci_device = objects.PciDevice(**pci_device_info)
+        pci_device_info.update(numa_node=0, address='0000:00:00.2')
+        pci_device2 = objects.PciDevice(**pci_device_info)
+        with contextlib.nested(
+                mock.patch.object(
+                    host.Host, "get_capabilities", return_value=caps),
+                mock.patch.object(
+                    hardware, 'get_vcpu_pin_set', return_value=set([3])),
+                mock.patch.object(random, 'choice'),
+                mock.patch.object(pci_manager, "get_instance_pci_devs",
+                                  return_value=[pci_device, pci_device2])
+            ) as (get_host_cap_mock,
+                  get_vcpu_pin_set_mock, choice_mock, pci_mock):
+            cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
+            self.assertFalse(choice_mock.called)
+            self.assertEqual(set([3]), cfg.cpuset)
+            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cpu.numa)
+
+    @mock.patch.object(objects.Flavor, 'get_by_id')
     def test_get_guest_config_numa_host_instance_fit_w_cpu_pinset(self,
                                                                   mock_flavor):
         instance_ref = objects.Instance(**self.test_instance)
diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py
index 492a02ac25..86b9e94c13 100644
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@@ -22,6 +22,7 @@ from nova import context
 from nova import exception
 from nova import objects
 from nova.objects import base as base_obj
+from nova.pci import stats
 from nova import test
 from nova.virt import hardware as hw
 
@@ -1421,6 +1422,34 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
         self.assertIsInstance(fitted_instance2, objects.InstanceNUMATopology)
         self.assertEqual(2, fitted_instance2.cells[0].id)
 
+    def test_get_fitting_pci_success(self):
+        pci_request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': '8086'}])
+        pci_reqs = [pci_request]
+        pci_stats = stats.PciDeviceStats()
+        with mock.patch.object(stats.PciDeviceStats,
+                'support_requests', return_value= True):
+            fitted_instance1 = hw.numa_fit_instance_to_host(self.host,
+                                                        self.instance1,
+                                                        pci_requests=pci_reqs,
+                                                        pci_stats=pci_stats)
+            self.assertIsInstance(fitted_instance1,
+                                  objects.InstanceNUMATopology)
+
+    def test_get_fitting_pci_fail(self):
+        pci_request = objects.InstancePCIRequest(count=1,
+            spec=[{'vendor_id': '8086'}])
+        pci_reqs = [pci_request]
+        pci_stats = stats.PciDeviceStats()
+        with mock.patch.object(stats.PciDeviceStats,
+                'support_requests', return_value= False):
+            fitted_instance1 = hw.numa_fit_instance_to_host(
+                                                        self.host,
+                                                        self.instance1,
+                                                        pci_requests=pci_reqs,
+                                                        pci_stats=pci_stats)
+            self.assertIsNone(fitted_instance1)
+
 
 class NumberOfSerialPortsTest(test.NoDBTestCase):
     def test_flavor(self):
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index 6eb3e137c5..ddc84fbdd0 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -1126,12 +1126,15 @@ class VirtNUMALimitTopology(VirtNUMATopology):
 
 
 def numa_fit_instance_to_host(
-        host_topology, instance_topology, limits_topology=None):
+        host_topology, instance_topology, limits_topology=None,
+        pci_requests=None, pci_stats=None):
     """Fit the instance topology onto the host topology given the limits
 
     :param host_topology: objects.NUMATopology object to fit an instance on
     :param instance_topology: objects.InstanceNUMATopology to be fitted
     :param limits_topology: VirtNUMALimitTopology that defines limits
+    :param pci_requests: instance pci_requests
+    :param pci_stats: pci_stats for the host
 
     Given a host and instance topology and optionally limits - this method
     will attempt to fit instance cells onto all permutations of host cells
@@ -1163,7 +1166,12 @@ def numa_fit_instance_to_host(
                     break
                 cells.append(got_cell)
             if len(cells) == len(host_cell_perm):
-                return objects.InstanceNUMATopology(cells=cells)
+                if not pci_requests:
+                    return objects.InstanceNUMATopology(cells=cells)
+                elif ((pci_stats is not None) and
+                    pci_stats.support_requests(pci_requests,
+                                                     cells)):
+                    return objects.InstanceNUMATopology(cells=cells)
 
 
 def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 00420391ec..58961ec37b 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -3376,7 +3376,7 @@ class LibvirtDriver(driver.ComputeDriver):
                 guest_cpu_numa.cells.append(guest_cell)
             return guest_cpu_numa
 
-    def _get_guest_numa_config(self, instance_numa_topology, flavor,
+    def _get_guest_numa_config(self, instance_numa_topology, flavor, pci_devs,
                                allowed_cpus=None):
         """Returns the config objects for the guest NUMA specs.
 
@@ -3417,24 +3417,27 @@ class LibvirtDriver(driver.ComputeDriver):
             memory = flavor.memory_mb
             if topology:
                 # Host is NUMA capable so try to keep the instance in a cell
-                viable_cells_cpus = []
-                for cell in topology.cells:
+                pci_cells = {pci.numa_node for pci in pci_devs}
+                if len(pci_cells) == 0:
+                    viable_cells_cpus = []
+                    for cell in topology.cells:
+                        if vcpus <= len(cell.cpuset) and memory <= cell.memory:
+                            viable_cells_cpus.append(cell.cpuset)
+
+                    if viable_cells_cpus:
+                        pin_cpuset = random.choice(viable_cells_cpus)
+                        return GuestNumaConfig(pin_cpuset, None, None, None)
+                elif len(pci_cells) == 1 and None not in pci_cells:
+                    cell = topology.cells[pci_cells.pop()]
                     if vcpus <= len(cell.cpuset) and memory <= cell.memory:
-                        viable_cells_cpus.append(cell.cpuset)
-
-                if not viable_cells_cpus:
-                    # We can't contain the instance in a cell - do nothing for
-                    # now.
-                    # TODO(ndipanov): Attempt to spread the instance across
-                    # NUMA nodes and expose the topology to the instance as an
-                    # optimisation
-                    return GuestNumaConfig(allowed_cpus, None, None, None)
-                else:
-                    pin_cpuset = random.choice(viable_cells_cpus)
-                    return GuestNumaConfig(pin_cpuset, None, None, None)
-            else:
-                # We have no NUMA topology in the host either
-                return GuestNumaConfig(allowed_cpus, None, None, None)
+                        return GuestNumaConfig(cell.cpuset, None, None, None)
+
+            # We have no NUMA topology in the host either,
+            # or we can't find a single cell to acomodate the instance
+            # TODO(ndipanov): Attempt to spread the instance
+            # accross NUMA nodes and expose the topology to the
+            # instance as an optimisation
+            return GuestNumaConfig(allowed_cpus, None, None, None)
         else:
             if topology:
                 # Now get the CpuTune configuration from the numa_topology
@@ -3856,9 +3859,10 @@ class LibvirtDriver(driver.ComputeDriver):
         guest.memory = flavor.memory_mb * units.Ki
         guest.vcpus = flavor.vcpus
         allowed_cpus = hardware.get_vcpu_pin_set()
+        pci_devs = pci_manager.get_instance_pci_devs(instance, 'all')
 
         guest_numa_config = self._get_guest_numa_config(
-                instance.numa_topology, flavor, allowed_cpus)
+                instance.numa_topology, flavor, pci_devs, allowed_cpus)
 
         guest.cpuset = guest_numa_config.cpuset
         guest.cputune = guest_numa_config.cputune
@@ -3980,7 +3984,7 @@ class LibvirtDriver(driver.ComputeDriver):
             for pci_dev in pci_manager.get_instance_pci_devs(instance):
                 guest.add_device(self._get_guest_pci_device(pci_dev))
         else:
-            if len(pci_manager.get_instance_pci_devs(instance)) > 0:
+            if len(pci_devs) > 0:
                 raise exception.PciDeviceUnsupportedHypervisor(
                     type=virt_type)
author	Przemyslaw Czesnowicz <przemyslaw.czesnowicz@intel.com>	2014-11-28 17:37:26 +0000
committer	Przemyslaw Czesnowicz <przemyslaw.czesnowicz@intel.com>	2015-02-04 14:31:28 +0000
commit	08713a8e3e03b497ead9913659d31daadcae8ed9 (patch)
tree	890a9704781922c3c4fdf68fdafdf5d7838129e8
parent	a219393c8c6cb935721e45271f8108c0a51571db (diff)
download	nova-08713a8e3e03b497ead9913659d31daadcae8ed9.tar.gz