diff options
Diffstat (limited to 'nova/tests/functional/regressions')
11 files changed, 389 insertions, 45 deletions
diff --git a/nova/tests/functional/regressions/test_bug_1595962.py b/nova/tests/functional/regressions/test_bug_1595962.py index bf1c7e53be..8eef0d3b7a 100644 --- a/nova/tests/functional/regressions/test_bug_1595962.py +++ b/nova/tests/functional/regressions/test_bug_1595962.py @@ -57,7 +57,6 @@ class TestSerialConsoleLiveMigrate(test.TestCase): self.flags(enabled=True, group="serial_console") self.flags(enabled=False, group="vnc") self.flags(enabled=False, group="spice") - self.flags(use_usb_tablet=False, group="libvirt") self.start_service('conductor') self.start_service('scheduler') diff --git a/nova/tests/functional/regressions/test_bug_1669054.py b/nova/tests/functional/regressions/test_bug_1669054.py index 751466fd41..6180dbfbaa 100644 --- a/nova/tests/functional/regressions/test_bug_1669054.py +++ b/nova/tests/functional/regressions/test_bug_1669054.py @@ -57,14 +57,9 @@ class ResizeEvacuateTestCase(integrated_helpers._IntegratedTestBase): host2.stop() self.api.force_down_service('host2', 'nova-compute', forced_down=True) # Now try to evacuate the server back to the original source compute. - req = {'evacuate': {'onSharedStorage': False}} - self.api.post_server_action(server['id'], req) - server = self._wait_for_state_change(server, 'ACTIVE') - # The evacuate flow in the compute manager is annoying in that it - # sets the instance status to ACTIVE before updating the host, so we - # have to wait for the migration record to be 'done' to avoid a race. - self._wait_for_migration_status(server, ['done']) - self.assertEqual(self.compute.host, server['OS-EXT-SRV-ATTR:host']) + server = self._evacuate_server( + server, {'onSharedStorage': 'False'}, + expected_host=self.compute.host, expected_migration_status='done') # Assert the RequestSpec.ignore_hosts field is not populated. reqspec = objects.RequestSpec.get_by_instance_uuid( diff --git a/nova/tests/functional/regressions/test_bug_1702454.py b/nova/tests/functional/regressions/test_bug_1702454.py index 097f6cedae..808665b24c 100644 --- a/nova/tests/functional/regressions/test_bug_1702454.py +++ b/nova/tests/functional/regressions/test_bug_1702454.py @@ -110,15 +110,14 @@ class SchedulerOnlyChecksTargetTest(test.TestCase, # only possibility the instance can end up on it is because the # scheduler should only verify the requested destination as host2 # is weighed lower than host3. - evacuate = { - 'evacuate': { - 'host': 'host2' - } + target_host = 'host2' + + post_args = { + 'host': target_host } - self.admin_api.post_server_action(server['id'], evacuate) - self._wait_for_state_change(server, 'ACTIVE') - server = self.admin_api.get_server(server_id) + server = self._evacuate_server( + server, extra_post_args=post_args, expected_host=target_host) # Yeepee, that works! - self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host']) + self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host']) diff --git a/nova/tests/functional/regressions/test_bug_1713783.py b/nova/tests/functional/regressions/test_bug_1713783.py index 521d447079..86e9ae919c 100644 --- a/nova/tests/functional/regressions/test_bug_1713783.py +++ b/nova/tests/functional/regressions/test_bug_1713783.py @@ -96,14 +96,11 @@ class FailedEvacuateStateTests(test.TestCase, fake_notifier.reset() # Initiate evacuation - post = {'evacuate': {}} - self.api.post_server_action(server['id'], post) - + self._evacuate_server( + server, expected_state='ERROR', expected_host=self.hostname, + expected_migration_status='error') self._wait_for_notification_event_type('compute_task.rebuild_server') - server = self._wait_for_state_change(server, 'ERROR') - self.assertEqual(self.hostname, server['OS-EXT-SRV-ATTR:host']) - # Check migrations migrations = self.api.get_migrations() self.assertEqual(1, len(migrations)) diff --git a/nova/tests/functional/regressions/test_bug_1764883.py b/nova/tests/functional/regressions/test_bug_1764883.py index d8d97276e6..431af81d86 100644 --- a/nova/tests/functional/regressions/test_bug_1764883.py +++ b/nova/tests/functional/regressions/test_bug_1764883.py @@ -97,12 +97,9 @@ class TestEvacuationWithSourceReturningDuringRebuild( self.computes.get(self.source_compute).stop() self.api.force_down_service(self.source_compute, 'nova-compute', True) - # Start evacuating the instance from the source_host - self.api.post_server_action(server['id'], {'evacuate': {}}) - - # Wait for the instance to go into an ACTIVE state - self._wait_for_state_change(server, 'ACTIVE') - server = self.api.get_server(server['id']) + # Evacuate the instance from the source_host + server = self._evacuate_server( + server, expected_migration_status='done') host = server['OS-EXT-SRV-ATTR:host'] migrations = self.api.get_migrations() diff --git a/nova/tests/functional/regressions/test_bug_1794996.py b/nova/tests/functional/regressions/test_bug_1794996.py index ee0756e603..15ed5e0647 100644 --- a/nova/tests/functional/regressions/test_bug_1794996.py +++ b/nova/tests/functional/regressions/test_bug_1794996.py @@ -52,12 +52,7 @@ class TestEvacuateDeleteServerRestartOriginalCompute( source_compute_id, {'forced_down': 'true'}) # evacuate the server - post = {'evacuate': {}} - self.api.post_server_action( - server['id'], post) - expected_params = {'OS-EXT-SRV-ATTR:host': dest_hostname, - 'status': 'ACTIVE'} - server = self._wait_for_server_parameter(server, expected_params) + server = self._evacuate_server(server, expected_host=dest_hostname) # Expect to have allocation and usages on both computes as the # source compute is still down diff --git a/nova/tests/functional/regressions/test_bug_1815153.py b/nova/tests/functional/regressions/test_bug_1815153.py index cadd20c8d8..5860187e71 100644 --- a/nova/tests/functional/regressions/test_bug_1815153.py +++ b/nova/tests/functional/regressions/test_bug_1815153.py @@ -142,11 +142,9 @@ class NonPersistentFieldNotResetTest( # Its status becomes 'ACTIVE'. # If requested_destination is not reset, a status of the server # becomes 'ERROR' because the target host is down. - self.api.post_server_action( - server['id'], {'evacuate': {'host': target_host}}) - expected_params = {'OS-EXT-SRV-ATTR:host': original_host, - 'status': 'ERROR'} - server = self._wait_for_server_parameter(server, expected_params) + server = self._evacuate_server( + server, {'host': target_host}, expected_host=original_host, + expected_state='ERROR', expected_migration_status='error') # Make sure 'is_bfv' is set. reqspec = objects.RequestSpec.get_by_instance_uuid(self.ctxt, diff --git a/nova/tests/functional/regressions/test_bug_1823370.py b/nova/tests/functional/regressions/test_bug_1823370.py index 30aa88a183..5e69905f5f 100644 --- a/nova/tests/functional/regressions/test_bug_1823370.py +++ b/nova/tests/functional/regressions/test_bug_1823370.py @@ -64,8 +64,6 @@ class MultiCellEvacuateTestCase(integrated_helpers._IntegratedTestBase): # Now evacuate the server which should send it to host3 since it is # in the same cell as host1, even though host2 in cell2 is weighed # higher than host3. - req = {'evacuate': {'onSharedStorage': False}} - self.api.post_server_action(server['id'], req) - self._wait_for_migration_status(server, ['done']) - server = self._wait_for_state_change(server, 'ACTIVE') - self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + self._evacuate_server( + server, {'onSharedStorage': 'False'}, expected_host='host3', + expected_migration_status='done') diff --git a/nova/tests/functional/regressions/test_bug_1896463.py b/nova/tests/functional/regressions/test_bug_1896463.py new file mode 100644 index 0000000000..dc74791e0e --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1896463.py @@ -0,0 +1,222 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import fixtures +import time + +from oslo_config import cfg + +from nova import context +from nova import objects +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import fixtures as func_fixtures +from nova.tests.functional import integrated_helpers +from nova import utils +from nova.virt import fake + + +CONF = cfg.CONF + + +class TestEvacuateResourceTrackerRace( + test.TestCase, integrated_helpers.InstanceHelperMixin, +): + """Demonstrate bug #1896463. + + Trigger a race condition between an almost finished evacuation that is + dropping the migration context, and the _update_available_resource() + periodic task that already loaded the instance list but haven't loaded the + migration list yet. The result is that the PCI allocation made by the + evacuation is deleted by the overlapping periodic task run and the instance + will not have PCI allocation after the evacuation. + """ + + def setUp(self): + super().setUp() + self.neutron = self.useFixture(nova_fixtures.NeutronFixture(self)) + self.glance = self.useFixture(nova_fixtures.GlanceFixture(self)) + self.placement = self.useFixture(func_fixtures.PlacementFixture()).api + + self.api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + + self.admin_api = self.api_fixture.admin_api + self.admin_api.microversion = 'latest' + self.api = self.admin_api + + self.start_service('conductor') + self.start_service('scheduler') + + self.flags(compute_driver='fake.FakeDriverWithPciResources') + self.useFixture( + fake.FakeDriverWithPciResources. + FakeDriverWithPciResourcesConfigFixture()) + + self.compute1 = self._start_compute('host1') + self.compute1_id = self._get_compute_node_id_by_host('host1') + self.compute1_service_id = self.admin_api.get_services( + host='host1', binary='nova-compute')[0]['id'] + + self.compute2 = self._start_compute('host2') + self.compute2_id = self._get_compute_node_id_by_host('host2') + self.compute2_service_id = self.admin_api.get_services( + host='host2', binary='nova-compute')[0]['id'] + + # add extra ports and the related network to the neutron fixture + # specifically for these tests. It cannot be added globally in the + # fixture init as it adds a second network that makes auto allocation + # based test to fail due to ambiguous networks. + self.neutron._ports[self.neutron.sriov_port['id']] = \ + copy.deepcopy(self.neutron.sriov_port) + self.neutron._networks[ + self.neutron.network_2['id']] = self.neutron.network_2 + self.neutron._subnets[ + self.neutron.subnet_2['id']] = self.neutron.subnet_2 + + self.ctxt = context.get_admin_context() + + def _get_compute_node_id_by_host(self, host): + # we specifically need the integer id of the node not the UUID so we + # need to use the old microversion + with utils.temporary_mutation(self.admin_api, microversion='2.52'): + hypers = self.admin_api.api_get( + 'os-hypervisors').body['hypervisors'] + for hyper in hypers: + if hyper['hypervisor_hostname'] == host: + return hyper['id'] + + self.fail('Hypervisor with hostname=%s not found' % host) + + def _assert_pci_device_allocated( + self, instance_uuid, compute_node_id, num=1): + """Assert that a given number of PCI devices are allocated to the + instance on the given host. + """ + + devices = objects.PciDeviceList.get_by_instance_uuid( + self.ctxt, instance_uuid) + devices_on_host = [dev for dev in devices + if dev.compute_node_id == compute_node_id] + self.assertEqual(num, len(devices_on_host)) + + def test_evacuate_races_with_update_available_resource(self): + # Create a server with a direct port to have PCI allocation + server = self._create_server( + name='test-server-for-bug-1896463', + networks=[{'port': self.neutron.sriov_port['id']}], + host='host1' + ) + + self._assert_pci_device_allocated(server['id'], self.compute1_id) + self._assert_pci_device_allocated( + server['id'], self.compute2_id, num=0) + + # stop and force down the compute the instance is on to allow + # evacuation + self.compute1.stop() + self.admin_api.put_service( + self.compute1_service_id, {'forced_down': 'true'}) + + # Inject some sleeps both in the Instance.drop_migration_context and + # the MigrationList.get_in_progress_and_error code to make them + # overlap. + # We want to create the following execution scenario: + # 1) The evacuation makes a move claim on the dest including the PCI + # claim. This means there is a migration context. But the evacuation + # is not complete yet so the instance.host does not point to the + # dest host. + # 2) The dest resource tracker starts an _update_available_resource() + # periodic task and this task loads the list of instances on its + # host from the DB. Our instance is not in this list due to #1. + # 3) The evacuation finishes, the instance.host is set to the dest host + # and the migration context is deleted. + # 4) The periodic task now loads the list of in-progress migration from + # the DB to check for incoming our outgoing migrations. However due + # to #3 our instance is not in this list either. + # 5) The periodic task cleans up every lingering PCI claim that is not + # connected to any instance collected above from the instance list + # and from the migration list. As our instance is not in either of + # the lists, the resource tracker cleans up the PCI allocation for + # the already finished evacuation of our instance. + # + # Unfortunately we cannot reproduce the above situation without sleeps. + # We need that the evac starts first then the periodic starts, but not + # finishes, then evac finishes, then periodic finishes. If I trigger + # and run the whole periodic in a wrapper of drop_migration_context + # then I could not reproduce the situation described at #4). In general + # it is not + # + # evac + # | + # | + # | periodic + # | | + # | | + # | x + # | + # | + # x + # + # but + # + # evac + # | + # | + # | periodic + # | | + # | | + # | | + # x | + # | + # x + # + # what is needed need. + # + # Starting the periodic from the test in a separate thread at + # drop_migration_context() might work but that is an extra complexity + # in the test code. Also it might need a sleep still to make the + # reproduction stable but only one sleep instead of two. + orig_drop = objects.Instance.drop_migration_context + + def slow_drop(*args, **kwargs): + time.sleep(1) + return orig_drop(*args, **kwargs) + + self.useFixture( + fixtures.MockPatch( + 'nova.objects.instance.Instance.drop_migration_context', + new=slow_drop)) + + orig_get_mig = objects.MigrationList.get_in_progress_and_error + + def slow_get_mig(*args, **kwargs): + time.sleep(2) + return orig_get_mig(*args, **kwargs) + + self.useFixture( + fixtures.MockPatch( + 'nova.objects.migration.MigrationList.' + 'get_in_progress_and_error', + new=slow_get_mig)) + + self.admin_api.post_server_action(server['id'], {'evacuate': {}}) + # we trigger the _update_available_resource periodic to overlap with + # the already started evacuation + self._run_periodics() + + self._wait_for_server_parameter( + server, {'OS-EXT-SRV-ATTR:host': 'host2', 'status': 'ACTIVE'}) + + self._assert_pci_device_allocated(server['id'], self.compute1_id) + self._assert_pci_device_allocated(server['id'], self.compute2_id) diff --git a/nova/tests/functional/regressions/test_bug_1899649.py b/nova/tests/functional/regressions/test_bug_1899649.py new file mode 100644 index 0000000000..be75ea947f --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1899649.py @@ -0,0 +1,100 @@ +# Copyright 2020, Red Hat, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional.libvirt import base +from nova.tests.unit.virt.libvirt import fakelibvirt + + +class TestVolAttachmentsAfterFailureToScheduleOrBuild(base.ServersTestBase): + """Regression test for bug . + + This regression test aims to ensure a volume attachment remains in place + after a failure to either schedule a server or when building a server + directly on a compute after skipping the scheduler. + + A volume attachment is required to remain after such failures to ensure the + volume itself remains marked as reserved. + + To ensure this is as accurate as possible the tests use the libvirt + functional base class to mimic a real world example with NUMA nodes being + requested via flavor extra specs. The underlying compute being unable to + meet this request ensuring a failure. + """ + + microversion = 'latest' + + def setUp(self): + super().setUp() + + # Launch a single libvirt based compute service with a single NUMA node + host_info = fakelibvirt.HostInfo( + cpu_nodes=1, cpu_sockets=1, cpu_cores=2, kB_mem=15740000) + self.start_compute(host_info=host_info, hostname='compute1') + + # Use a flavor requesting 2 NUMA nodes that we know will always fail + self.flavor_id = self._create_flavor(extra_spec={'hw:numa_nodes': '2'}) + + # Craft a common bfv server request for use within each test + self.volume_id = nova_fixtures.CinderFixture.IMAGE_BACKED_VOL + self.server = { + 'name': 'test', + 'flavorRef': self.flavor_id, + 'imageRef': '', + 'networks': 'none', + 'block_device_mapping_v2': [{ + 'source_type': 'volume', + 'destination_type': 'volume', + 'boot_index': 0, + 'uuid': self.volume_id}] + } + + def _assert_failure_and_volume_attachments(self, server): + # Assert that the server is in an ERROR state + self._wait_for_state_change(server, 'ERROR') + + # Assert that the volume is in a reserved state. As this isn't modelled + # by the CinderFixture we just assert that a single volume attachment + # remains after the failure and that it is referenced by the server. + attachments = self.cinder.volume_to_attachment.get(self.volume_id) + self.assertEqual(1, len(attachments)) + self.assertIn( + self.volume_id, self.cinder.volume_ids_for_instance(server['id'])) + + def test_failure_to_schedule(self): + # Assert that a volume attachment remains after a failure to schedule + server = self.api.post_server({'server': self.server}) + self._assert_failure_and_volume_attachments(server) + + def test_failure_to_schedule_with_az(self): + # Assert that a volume attachment remains after a failure to schedule + # with the addition of an availability_zone in the request + self.server['availability_zone'] = 'nova' + server = self.api.post_server({'server': self.server}) + self._assert_failure_and_volume_attachments(server) + + def test_failure_to_schedule_with_host(self): + # Assert that a volume attachment remains after a failure to schedule + # using the optional host parameter introduced in microversion 2.74 + self.server['host'] = 'compute1' + server = self.admin_api.post_server({'server': self.server}) + self._assert_failure_and_volume_attachments(server) + + def test_failure_to_build_with_az_and_host(self): + # Assert that a volume attachments remain after a failure to + # build and reschedule by providing an availability_zone *and* host, + # skipping the scheduler. This is bug #1899649. + self.server['availability_zone'] = 'nova:compute1' + server = self.admin_api.post_server({'server': self.server}) + self._assert_failure_and_volume_attachments(server) diff --git a/nova/tests/functional/regressions/test_bug_1902925.py b/nova/tests/functional/regressions/test_bug_1902925.py new file mode 100644 index 0000000000..fb5f5251e5 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1902925.py @@ -0,0 +1,44 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from nova.tests.functional import integrated_helpers +from nova.tests.unit import cast_as_call + + +class ComputeVersion5xPinnedRpcTests(integrated_helpers._IntegratedTestBase): + + compute_driver = 'fake.MediumFakeDriver' + ADMIN_API = True + api_major_version = 'v2.1' + microversion = 'latest' + + def setUp(self): + super(ComputeVersion5xPinnedRpcTests, self).setUp() + self.useFixture(cast_as_call.CastAsCall(self)) + + self.compute1 = self._start_compute(host='host1') + + def _test_rebuild_instance_with_compute_rpc_pin(self, version_cap): + self.flags(compute=version_cap, group='upgrade_levels') + + server_req = self._build_server(networks='none') + server = self.api.post_server({'server': server_req}) + server = self._wait_for_state_change(server, 'ACTIVE') + + self.api.post_server_action(server['id'], {'rebuild': { + 'imageRef': '155d900f-4e14-4e4c-a73d-069cbf4541e6' + }}) + + def test_rebuild_instance_5_0(self): + self._test_rebuild_instance_with_compute_rpc_pin('5.0') + + def test_rebuild_instance_5_12(self): + self._test_rebuild_instance_with_compute_rpc_pin('5.12') |