summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2019-08-08 16:19:54 +0000
committerGerrit Code Review <review@openstack.org>2019-08-08 16:19:54 +0000
commit979ec661de90f0e7eb2b16313d80a8e306f764b8 (patch)
tree60d5e973b79fad7af552b81d979f34b6765240b9
parent7efda0632d33255e3a5aaa665f5ba0c976119ff3 (diff)
parentf292a92a89b452c66b5799ac309a5f623ee7b16c (diff)
downloadnova-979ec661de90f0e7eb2b16313d80a8e306f764b8.tar.gz
Merge "Add functional regression test for bug 1837955" into stable/rocky
-rw-r--r--nova/tests/functional/regressions/test_bug_1837955.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/nova/tests/functional/regressions/test_bug_1837955.py b/nova/tests/functional/regressions/test_bug_1837955.py
new file mode 100644
index 0000000000..e47702b9df
--- /dev/null
+++ b/nova/tests/functional/regressions/test_bug_1837955.py
@@ -0,0 +1,115 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import time
+
+from nova import exception
+from nova.tests import fixtures as nova_fixtures
+from nova.tests.functional import integrated_helpers
+from nova.tests.unit import fake_notifier
+from nova.tests.unit.image import fake as fake_image
+
+
+class BuildRescheduleClaimFailsTestCase(
+ integrated_helpers.ProviderUsageBaseTestCase):
+ """Regression test case for bug 1837955 where a server build fails on the
+ primary host and then attempting to allocate resources on the alternate
+ host, the alternate host is full and the allocations claim in placement
+ fails, resulting in the build failing due to MaxRetriesExceeded and the
+ server going to ERROR status.
+ """
+ compute_driver = 'fake.SmallFakeDriver'
+
+ def _wait_for_unversioned_notification(self, event_type):
+ for x in range(20): # wait up to 10 seconds
+ for notification in fake_notifier.NOTIFICATIONS:
+ if notification.event_type == event_type:
+ return notification
+ time.sleep(.5)
+ self.fail('Timed out waiting for unversioned notification %s. Got: %s'
+ % (event_type, fake_notifier.NOTIFICATIONS))
+
+ def test_build_reschedule_alt_host_alloc_fails(self):
+ # Start two compute services so we have one alternate host.
+ # Set cpu_allocation_ratio=1.0 to make placement inventory
+ # and allocations for VCPU easier to manage.
+ self.flags(cpu_allocation_ratio=1.0)
+ for x in range(2):
+ self._start_compute('host%i' % x)
+
+ def fake_instance_claim(_self, _context, _inst, nodename, *a, **kw):
+ # Before triggering the reschedule to the other host, max out the
+ # capacity on the alternate host.
+ alt_nodename = 'host0' if nodename == 'host1' else 'host1'
+ rp_uuid = self._get_provider_uuid_by_host(alt_nodename)
+ inventories = self._get_provider_inventory(rp_uuid)
+ # Fake some other consumer taking all of the VCPU on the alt host.
+ # Since we set cpu_allocation_ratio=1.0 the total is the total
+ # capacity for VCPU on the host.
+ total_vcpu = inventories['VCPU']['total']
+ alt_consumer = '7d32d0bc-af16-44b2-8019-a24925d76152'
+ allocs = {
+ 'allocations': {
+ rp_uuid: {
+ 'resources': {
+ 'VCPU': total_vcpu
+ }
+ }
+ },
+ 'project_id': self.api.project_id,
+ 'user_id': self.api.project_id
+ }
+ resp = self.placement_api.put(
+ '/allocations/%s' % alt_consumer, allocs, version='1.12')
+ self.assertEqual(204, resp.status, resp.content)
+ raise exception.ComputeResourcesUnavailable(reason='overhead!')
+
+ # Stub out the instance claim (regardless of which host the scheduler
+ # picks as the primary) to trigger a reschedule.
+ self.stub_out('nova.compute.manager.resource_tracker.ResourceTracker.'
+ 'instance_claim', fake_instance_claim)
+
+ # Now that our stub is in place, try to create a server and wait for it
+ # to go to ERROR status.
+ server = self._build_minimal_create_server_request(
+ self.api, 'test_build_reschedule_alt_host_alloc_fails',
+ image_uuid=fake_image.get_valid_image_id(),
+ networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
+ server = self.api.post_server({'server': server})
+ # FIXME(mriedem): This is bug 1837955 where the status is stuck in
+ # BUILD rather than the vm_state being set to error and the task_state
+ # being set to None. Uncomment this when the bug is fixed.
+ # server = self._wait_for_state_change(self.api, server, 'ERROR')
+
+ # Wait for the MaxRetriesExceeded fault to be recorded.
+ # set_vm_state_and_notify sets the vm_state to ERROR before the fault
+ # is recorded but after the notification is sent. So wait for the
+ # unversioned notification to show up and then get the fault.
+ # FIXME(mriedem): Uncomment this when bug 1837955 is fixed.
+ # self._wait_for_unversioned_notification(
+ # 'compute_task.build_instances')
+ # server = self.api.get_server(server['id'])
+ # self.assertIn('fault', server)
+ # self.assertIn('Exceeded maximum number of retries',
+ # server['fault']['message'])
+
+ # TODO(mriedem): Remove this when the bug is fixed. We need to assert
+ # something before the bug is fixed to show the failure so check the
+ # logs.
+ for x in range(20):
+ logs = self.stdlog.logger.output
+ if 'MaxRetriesExceeded' in logs:
+ break
+ time.sleep(.5)
+ else:
+ self.fail('Timed out waiting for MaxRetriesExceeded to show up '
+ 'in the logs.')