From ee00c8015ca2c71095ffd87c190a47f22c4f73fb Mon Sep 17 00:00:00 2001 From: Nikola Dipanov Date: Wed, 12 Nov 2014 17:14:01 +0100 Subject: Instances with NUMA will be packed onto hosts This patch makes the NUMATopologyFilter and instance claims on the compute host use instance fitting logic to allow for actually packing instances onto NUMA capable hosts. This also means that the NUMA placement that is calculated during a successfull claim will need to be updated in the database to reflect the host NUMA cell ids the instance cells will be pinned to. Using fit_instance_to_host() to decide weather an instance can land on a host makes the NUMATopologyFilter code cleaner as it now fully re-uses all the logic in VirtNUMAHostTopology and VirtNUMATopologyCellUsage classes. Closes-bug: #1386236 (cherry picked from commit 53099f3bf23d0d160fc690a90cf4f32506adf076) Conflicts: nova/compute/manager.py nova/tests/unit/compute/test_claims.py nova/tests/unit/compute/test_resource_tracker.py nova/virt/hardware.py Change-Id: Ieabafea73b4d566f4194ca60be38b6415d8a8f3d --- doc/source/devref/filter_scheduler.rst | 3 +- nova/compute/claims.py | 25 ++++++++++---- nova/compute/manager.py | 6 ++-- nova/compute/resource_tracker.py | 8 +++++ nova/scheduler/filters/numa_topology_filter.py | 46 +++++++++++--------------- nova/tests/compute/test_claims.py | 4 +-- nova/tests/compute/test_compute.py | 1 + nova/tests/compute/test_resource_tracker.py | 4 +-- nova/virt/hardware.py | 9 +++-- 9 files changed, 61 insertions(+), 45 deletions(-) diff --git a/doc/source/devref/filter_scheduler.rst b/doc/source/devref/filter_scheduler.rst index 931a27ca2d..63a610de13 100644 --- a/doc/source/devref/filter_scheduler.rst +++ b/doc/source/devref/filter_scheduler.rst @@ -282,8 +282,7 @@ and try to match it with the topology exposed by the host, accounting for the ``ram_allocation_ratio`` and ``cpu_allocation_ratio`` for over-subscription. The filtering is done in the following manner: -* Filter will try to match the exact NUMA cells of the instance to those of - the host. It *will not* attempt to pack the instance onto the host. +* Filter will attempt to pack instance cells onto host cells. * It will consider the standard over-subscription limits for each host NUMA cell, and provide limits to the compute host accordingly (as mentioned above). * If instance has no topology defined, it will be considered for any host. diff --git a/nova/compute/claims.py b/nova/compute/claims.py index 1df481d069..ccd016ba13 100644 --- a/nova/compute/claims.py +++ b/nova/compute/claims.py @@ -35,6 +35,7 @@ class NopClaim(object): def __init__(self, migration=None): self.migration = migration + self.claimed_numa_topology = None @property def disk_gb(self): @@ -200,13 +201,22 @@ class Claim(NopClaim): def _test_numa_topology(self, resources, limit): host_topology = resources.get('numa_topology') - if host_topology and limit: + requested_topology = (self.numa_topology and + self.numa_topology.topology_from_obj()) + if host_topology: host_topology = hardware.VirtNUMAHostTopology.from_json( host_topology) - instances_topology = ( - [self.numa_topology] if self.numa_topology else []) - return hardware.VirtNUMAHostTopology.claim_test( - host_topology, instances_topology, limit) + instance_topology = ( + hardware.VirtNUMAHostTopology.fit_instance_to_host( + host_topology, requested_topology, + limits_topology=limit)) + if requested_topology and not instance_topology: + return (_("Requested instance NUMA topology cannot fit " + "the given host NUMA topology")) + elif instance_topology: + self.claimed_numa_topology = ( + objects.InstanceNUMATopology.obj_from_topology( + instance_topology)) def _test(self, type_, unit, total, used, requested, limit): """Test if the given type of resource needed for a claim can be safely @@ -263,8 +273,11 @@ class ResizeClaim(Claim): @property def numa_topology(self): - return hardware.VirtNUMAInstanceTopology.get_constraints( + instance_topology = hardware.VirtNUMAInstanceTopology.get_constraints( self.instance_type, self.image_meta) + if instance_topology: + return objects.InstanceNUMATopology.obj_from_topology( + instance_topology) def _test_pci(self): pci_requests = objects.InstancePCIRequests.\ diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 031ae194af..145e20abaf 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1404,7 +1404,7 @@ class ComputeManager(manager.Manager): rt = self._get_resource_tracker(node) try: limits = filter_properties.get('limits', {}) - with rt.instance_claim(context, instance, limits): + with rt.instance_claim(context, instance, limits) as inst_claim: # NOTE(russellb) It's important that this validation be done # *after* the resource tracker instance claim, as that is where # the host is set on the instance. @@ -1419,6 +1419,7 @@ class ComputeManager(manager.Manager): instance.vm_state = vm_states.BUILDING instance.task_state = task_states.BLOCK_DEVICE_MAPPING + instance.numa_topology = inst_claim.claimed_numa_topology instance.save() # Verify that all the BDMs have a device_name set and assign a @@ -2090,7 +2091,7 @@ class ComputeManager(manager.Manager): extra_usage_info={'image_name': image_name}) try: rt = self._get_resource_tracker(node) - with rt.instance_claim(context, instance, limits): + with rt.instance_claim(context, instance, limits) as inst_claim: # NOTE(russellb) It's important that this validation be done # *after* the resource tracker instance claim, as that is where # the host is set on the instance. @@ -2101,6 +2102,7 @@ class ComputeManager(manager.Manager): block_device_mapping) as resources: instance.vm_state = vm_states.BUILDING instance.task_state = task_states.SPAWNING + instance.numa_topology = inst_claim.claimed_numa_topology instance.save(expected_task_state= task_states.BLOCK_DEVICE_MAPPING) block_device_info = resources['block_device_info'] diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 7a056ae257..6aaadc9d49 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -130,6 +130,7 @@ class ResourceTracker(object): overhead=overhead, limits=limits) self._set_instance_host_and_node(context, instance_ref) + instance_ref['numa_topology'] = claim.claimed_numa_topology # Mark resources in-use and update stats self._update_usage_from_instance(context, self.compute_node, @@ -593,9 +594,16 @@ class ResourceTracker(object): instance['system_metadata']) if itype: + host_topology = resources.get('numa_topology') + if host_topology: + host_topology = hardware.VirtNUMAHostTopology.from_json( + host_topology) numa_topology = ( hardware.VirtNUMAInstanceTopology.get_constraints( itype, image_meta)) + numa_topology = ( + hardware.VirtNUMAHostTopology.fit_instance_to_host( + host_topology, numa_topology)) usage = self._get_usage_dict( itype, numa_topology=numa_topology) if self.pci_tracker: diff --git a/nova/scheduler/filters/numa_topology_filter.py b/nova/scheduler/filters/numa_topology_filter.py index f68c8e8f26..fe26c393ad 100644 --- a/nova/scheduler/filters/numa_topology_filter.py +++ b/nova/scheduler/filters/numa_topology_filter.py @@ -28,34 +28,28 @@ class NUMATopologyFilter(filters.BaseHostFilter): cpu_ratio = CONF.cpu_allocation_ratio request_spec = filter_properties.get('request_spec', {}) instance = request_spec.get('instance_properties', {}) - instance_topology = hardware.instance_topology_from_instance(instance) + requested_topology = hardware.instance_topology_from_instance(instance) host_topology, _fmt = hardware.host_topology_and_format_from_host( host_state) - if instance_topology: - if host_topology: - if not hardware.VirtNUMAHostTopology.can_fit_instances( - host_topology, [instance_topology]): - return False - - limit_cells = [] - usage_after_instance = ( - hardware.VirtNUMAHostTopology.usage_from_instances( - host_topology, [instance_topology])) - for cell in usage_after_instance.cells: - max_cell_memory = int(cell.memory * ram_ratio) - max_cell_cpu = len(cell.cpuset) * cpu_ratio - if (cell.memory_usage > max_cell_memory or - cell.cpu_usage > max_cell_cpu): - return False - limit_cells.append( - hardware.VirtNUMATopologyCellLimit( - cell.id, cell.cpuset, cell.memory, - max_cell_cpu, max_cell_memory)) - host_state.limits['numa_topology'] = ( - hardware.VirtNUMALimitTopology( - cells=limit_cells).to_json()) - return True - else: + if requested_topology and host_topology: + limit_cells = [] + for cell in host_topology.cells: + max_cell_memory = int(cell.memory * ram_ratio) + max_cell_cpu = len(cell.cpuset) * cpu_ratio + limit_cells.append(hardware.VirtNUMATopologyCellLimit( + cell.id, cell.cpuset, cell.memory, + max_cell_cpu, max_cell_memory)) + limits = hardware.VirtNUMALimitTopology(cells=limit_cells) + instance_topology = ( + hardware.VirtNUMAHostTopology.fit_instance_to_host( + host_topology, requested_topology, + limits_topology=limits)) + if not instance_topology: return False + host_state.limits['numa_topology'] = limits.to_json() + instance['numa_topology'] = instance_topology.to_json() + return True + elif requested_topology: + return False else: return True diff --git a/nova/tests/compute/test_claims.py b/nova/tests/compute/test_claims.py index a5b7a0e46c..8098b80449 100644 --- a/nova/tests/compute/test_claims.py +++ b/nova/tests/compute/test_claims.py @@ -244,7 +244,7 @@ class ClaimTestCase(test.NoDBTestCase): def test_numa_topology_no_limit(self, mock_get): huge_instance = hardware.VirtNUMAInstanceTopology( cells=[hardware.VirtNUMATopologyCell( - 1, set([1, 2, 3, 4, 5]), 2048)]) + 1, set([1, 2]), 512)]) self._claim(numa_topology=huge_instance) def test_numa_topology_fails(self, mock_get): @@ -264,7 +264,7 @@ class ClaimTestCase(test.NoDBTestCase): def test_numa_topology_passes(self, mock_get): huge_instance = hardware.VirtNUMAInstanceTopology( cells=[hardware.VirtNUMATopologyCell( - 1, set([1, 2, 3, 4, 5]), 2048)]) + 1, set([1, 2]), 512)]) limit_topo = hardware.VirtNUMALimitTopology( cells=[hardware.VirtNUMATopologyCellLimit( 1, [1, 2], 512, cpu_limit=5, memory_limit=4096), diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 2cd434826f..b074721ca4 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -317,6 +317,7 @@ class BaseTestCase(test.TestCase): inst['updated_at'] = timeutils.utcnow() inst['launched_at'] = timeutils.utcnow() inst['security_groups'] = [] + inst['numa_topology'] = None inst.update(params) if services: _create_service_entries(self.context.elevated(), diff --git a/nova/tests/compute/test_resource_tracker.py b/nova/tests/compute/test_resource_tracker.py index 8ae5a86abb..6bb36fe96d 100644 --- a/nova/tests/compute/test_resource_tracker.py +++ b/nova/tests/compute/test_resource_tracker.py @@ -862,8 +862,8 @@ class InstanceClaimTestCase(BaseTrackerTestCase): memory_mb = FAKE_VIRT_MEMORY_MB * 2 root_gb = ephemeral_gb = FAKE_VIRT_LOCAL_GB vcpus = FAKE_VIRT_VCPUS * 2 - claim_topology = self._claim_topology(memory_mb) - instance_topology = self._instance_topology(memory_mb) + claim_topology = self._claim_topology(3) + instance_topology = self._instance_topology(3) limits = {'memory_mb': memory_mb + FAKE_VIRT_MEMORY_OVERHEAD, 'disk_gb': root_gb * 2, diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index a5a167439b..46d2d3cf14 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -1040,12 +1040,11 @@ def instance_topology_from_instance(instance): # Remove when request_spec is a proper object itself! dict_cells = instance_numa_topology.get('cells') if dict_cells: - cells = [objects.InstanceNUMACell(id=cell['id'], - cpuset=set(cell['cpuset']), - memory=cell['memory']) + cells = [VirtNUMATopologyCell(cell['id'], + set(cell['cpuset']), + cell['memory']) for cell in dict_cells] - instance_numa_topology = ( - objects.InstanceNUMATopology(cells=cells)) + instance_numa_topology = VirtNUMAInstanceTopology(cells=cells) return instance_numa_topology -- cgit v1.2.1