diff options
author | Matt Riedemann <mriedem.os@gmail.com> | 2017-09-20 14:24:44 -0400 |
---|---|---|
committer | Matt Riedemann <mriedem.os@gmail.com> | 2018-04-10 17:30:30 -0400 |
commit | 0d111f17c4e411a42beb6c80bc27e6455a71c48c (patch) | |
tree | b34149154ce0e857469bc39b993668c2e0a12c78 | |
parent | 781e7b3fcda3c972b95e4d32db08cd8e9d0694d9 (diff) | |
download | nova-0d111f17c4e411a42beb6c80bc27e6455a71c48c.tar.gz |
Add regression test for persisted RequestSpec.retry from failed resize
Commit 74ab427d4796d8a386f84a15cc49188c2a60f8f1 in Newton added
code to persist changes to the RequestSpec during a resize since
the flavor changes.
That change inadvertantly also persisted any failed hosts during
the resize that are stored in the RequestSpec.retry field during
a reschedule.
The problem is that later those persisted failed hosts are rejected
by the RetryFilter, which can be confusing if an admin is trying
to live migrate or evacate the instance to one of those specific
hosts.
This adds a functional regression test to show the failure, which
will be fixed in a separate change that then modifies the assertions.
NOTE(mriedem): There are two changes in this backport:
1. The functional test needed to change slightly to disable the
DiskFilter since 2fe96819c24eff5a9493a6559f3e8d5b4624a8c9 is
not in Ocata.
2. The test needs to use 'api_post' directly on the API client for
the confirmResize call since the check_response_status kwarg
wasn't in post_server_action until 8dd11ca1b34e1ed58b4 in Pike.
Change-Id: Ib8a23db838b0bbf2cfb8123cf6aaa39d00ff0640
Related-Bug: #1718512
(cherry picked from commit 89448bea577b30c40ce39185d14fe14f9c61a0c2)
(cherry picked from commit c2dc902e39eb345ebf674ad47422f1e72ec170e6)
(cherry picked from commit 004e9acf99964ac78f85d3efbd0a04404bd9a3ef)
-rw-r--r-- | nova/tests/functional/regressions/test_bug_1718512.py | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/nova/tests/functional/regressions/test_bug_1718512.py b/nova/tests/functional/regressions/test_bug_1718512.py new file mode 100644 index 0000000000..70918ad49a --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1718512.py @@ -0,0 +1,168 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.compute import manager as compute_manager +import nova.conf +from nova import context as nova_context +from nova import objects +from nova.scheduler import weights +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as image_fake +from nova.tests.unit import policy_fixture +from nova.virt import fake + +CONF = nova.conf.CONF + + +class HostNameWeigher(weights.BaseHostWeigher): + def _weigh_object(self, host_state, weight_properties): + """Arbitrary preferring host1 over host2 over host3.""" + weights = {'host1': 100, 'host2': 50, 'host3': 1} + return weights.get(host_state.host, 0) + + +class TestRequestSpecRetryReschedule(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Regression test for bug 1718512 introduced in Newton. + + Contains a test for a regression where an instance builds on one host, + then is resized. During the resize, the first attempted host fails and + the resize is rescheduled to another host which passes. The failed host + is persisted in the RequestSpec.retry field by mistake. Then later when + trying to live migrate the instance to the same host that failed during + resize, it is rejected by the RetryFilter because it's already in the + RequestSpec.retry field. + """ + def setUp(self): + super(TestRequestSpecRetryReschedule, self).setUp() + self.useFixture(policy_fixture.RealPolicyFixture()) + + # The NeutronFixture is needed to stub out validate_networks in API. + self.useFixture(nova_fixtures.NeutronFixture(self)) + + # We need the computes reporting into placement for the filter + # scheduler to pick a host. + self.useFixture(nova_fixtures.PlacementFixture()) + + api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + # The admin API is used to get the server details to verify the + # host on which the server was built. + self.admin_api = api_fixture.admin_api + self.api = api_fixture.api + + # the image fake backend needed for image discovery + image_fake.stub_out_image_service(self) + self.addCleanup(image_fake.FakeImageService_reset) + + self.start_service('conductor') + + # We have to get the image before we use 2.latest otherwise we'll get + # a 404 on the /images proxy API because of 2.36. + self.image_id = self.api.get_images()[0]['id'] + + # Use the latest microversion available to make sure something does + # not regress in new microversions; cap as necessary. + self.admin_api.microversion = 'latest' + self.api.microversion = 'latest' + + # The consoleauth service is needed for deleting console tokens when + # the server is deleted. + self.start_service('consoleauth') + + enabled_filters = CONF.filter_scheduler.enabled_filters + # Remove the DiskFilter since we're using Placement for filtering on + # DISK_GB. + if 'DiskFilter' in enabled_filters: + enabled_filters.remove('DiskFilter') + # Use our custom weigher defined above to make sure that we have + # a predictable scheduling sort order. + self.flags(weight_classes=[__name__ + '.HostNameWeigher'], + enabled_filters=enabled_filters, + group='filter_scheduler') + self.start_service('scheduler') + + # Let's now start three compute nodes as we said above. + for host in ['host1', 'host2', 'host3']: + fake.set_nodes([host]) + self.addCleanup(fake.restore_nodes) + self.start_service('compute', host=host) + + def _stub_resize_failure(self, failed_host): + actual_prep_resize = compute_manager.ComputeManager._prep_resize + + def fake_prep_resize(_self, *args, **kwargs): + if _self.host == failed_host: + raise Exception('%s:fake_prep_resize' % failed_host) + actual_prep_resize(_self, *args, **kwargs) + self.stub_out('nova.compute.manager.ComputeManager._prep_resize', + fake_prep_resize) + + def test_resize_with_reschedule_then_live_migrate(self): + """Tests the following scenario: + + - Server is created on host1 successfully. + - Server is resized; host2 is tried and fails, and rescheduled to + host3. + - Then try to live migrate the instance to host2 which should work. + """ + flavors = self.api.get_flavors() + flavor1 = flavors[0] + flavor2 = flavors[1] + if flavor1["disk"] > flavor2["disk"]: + # Make sure that flavor1 is smaller + flavor1, flavor2 = flavor2, flavor1 + + # create the instance which should go to host1 + server = self.admin_api.post_server( + dict(server=self._build_minimal_create_server_request( + self.api, 'test_resize_with_reschedule_then_live_migrate', + self.image_id, flavor_id=flavor1['id'], networks='none'))) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host']) + + # Stub out the resize to fail on host2, which will trigger a reschedule + # to host3. + self._stub_resize_failure('host2') + + # Resize the server to flavor2, which should make it ultimately end up + # on host3. + data = {'resize': {'flavorRef': flavor2['id']}} + self.api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, + 'VERIFY_RESIZE') + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + self.api.api_post('/servers/%s/action' % server['id'], + {'confirmResize': None}, check_response_status=[204]) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + # Now live migrate the server to host2 specifically, which previously + # failed the resize attempt but here it should pass. + data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}} + self.admin_api.post_server_action(server['id'], data) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + # FIXME(mriedem): This is bug 1718512 where the failed resize left + # host2 in the RequestSpec.retry field and it affects the live migrate + # to host2 because the scheduler RetryFilter kicks it out. + self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host']) + migrations = self.admin_api.api_get( + 'os-migrations?instance_uuid=%s&migration_type=live-migration' % + server['id']).body['migrations'] + self.assertEqual(1, len(migrations)) + self.assertEqual('error', migrations[0]['status']) + reqspec = objects.RequestSpec.get_by_instance_uuid( + nova_context.get_admin_context(), server['id']) + self.assertIsNotNone(reqspec.retry) + self.assertEqual(1, reqspec.retry.num_attempts) + self.assertEqual('host2', reqspec.retry.hosts[0].host) |