summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRodrigo Barbieri <rodrigo.barbieri2010@gmail.com>2021-03-31 11:06:49 -0300
committerRodrigo Barbieri <rodrigo.barbieri2010@gmail.com>2021-07-09 15:20:15 -0300
commit5fa8718fe57e59b178d081e2068109151fdc3926 (patch)
tree6142c21cfd7dd397cb6aa1a2652bee1f2b7318b5
parent77b433b61e239e0b79736c7e9994a9b445b90d00 (diff)
downloadnova-5fa8718fe57e59b178d081e2068109151fdc3926.tar.gz
Error anti-affinity violation on migrations
Error-out the migrations (cold and live) whenever the anti-affinity policy is violated. This addresses violations when multiple concurrent migrations are requested. Added detection on: - prep_resize - check_can_live_migration_destination - pre_live_migration The improved method of detection now locks based on group_id and considers other migrations in-progress as well. Conflicts: nova/tests/unit/compute/test_compute_mgr.py NOTE: Conflicts are because the following changes are not in Stein: * Ia00277ac8a68a635db85f9e0ce2c6d8df396e0d8 (Set migrate_data.vifs only when using multiple port bindings) * I3c917796cb30d11e7db1e235ac1625d2a743aaa2 (NUMA live migration support) * I2f3434f06489d8b6cb80933bcb1ea1e841049ba5 (Support migrating SRIOV port with bandwidth) * I292a0e2d840bbf657ba6d0932f9a3decbcb2778f ([FUP] Follow-up patch for SR-IOV live migration) * I734cc01dce13f9e75a16639faf890ddb1661b7eb (SR-IOV Live migration indirect port support) Summary of conflicts: Cherry-picked pulled many non-related newer unit tests. Cleaned those up and adjusted: - Removed 2 extra params of check_can_live_migrate_destination invocations. - Adjusted request_spec variable of unit test test_prep_resize_errors_migration. - Removed extra tab spacing on a unit test. Closes-bug: #1821755 Change-Id: I32e6214568bb57f7613ddeba2c2c46da0320fabc (cherry picked from commit 33c8af1f8c46c9c37fcc28fb3409fbd3a78ae39f) (cherry picked from commit 8b62a4ec9bf617dfb2da046c25a9f76b33516508) (cherry picked from commit 6ede6df7f41db809de19e124d3d4994180598f19) (cherry picked from commit bf90a1e06181f6b328b967124e538c6e2579b2e5) (cherry picked from commit a22d1b04de9e6ebc33b5ab9871b86f8e4022e7a9)
-rw-r--r--nova/compute/manager.py110
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py112
-rw-r--r--releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml11
3 files changed, 206 insertions, 27 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index c7a0f37e92..1c29aa278f 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1474,7 +1474,11 @@ class ComputeManager(manager.Manager):
return [_decode(f) for f in injected_files]
def _validate_instance_group_policy(self, context, instance,
- scheduler_hints):
+ scheduler_hints=None):
+
+ if CONF.workarounds.disable_group_policy_check_upcall:
+ return
+
# NOTE(russellb) Instance group policy is enforced by the scheduler.
# However, there is a race condition with the enforcement of
# the policy. Since more than one instance may be scheduled at the
@@ -1483,29 +1487,63 @@ class ComputeManager(manager.Manager):
# multiple instances with an affinity policy could end up on different
# hosts. This is a validation step to make sure that starting the
# instance here doesn't violate the policy.
- group_hint = scheduler_hints.get('group')
- if not group_hint:
- return
-
- # The RequestSpec stores scheduler_hints as key=list pairs so we need
- # to check the type on the value and pull the single entry out. The
- # API request schema validates that the 'group' hint is a single value.
- if isinstance(group_hint, list):
- group_hint = group_hint[0]
+ if scheduler_hints is not None:
+ # only go through here if scheduler_hints is provided, even if it
+ # is empty.
+ group_hint = scheduler_hints.get('group')
+ if not group_hint:
+ return
+ else:
+ # The RequestSpec stores scheduler_hints as key=list pairs so
+ # we need to check the type on the value and pull the single
+ # entry out. The API request schema validates that
+ # the 'group' hint is a single value.
+ if isinstance(group_hint, list):
+ group_hint = group_hint[0]
+
+ group = objects.InstanceGroup.get_by_hint(context, group_hint)
+ else:
+ # TODO(ganso): a call to DB can be saved by adding request_spec
+ # to rpcapi payload of live_migration, pre_live_migration and
+ # check_can_live_migrate_destination
+ try:
+ group = objects.InstanceGroup.get_by_instance_uuid(
+ context, instance.uuid)
+ except exception.InstanceGroupNotFound:
+ return
- @utils.synchronized(group_hint)
- def _do_validation(context, instance, group_hint):
- group = objects.InstanceGroup.get_by_hint(context, group_hint)
+ @utils.synchronized(group['uuid'])
+ def _do_validation(context, instance, group):
if group.policy and 'anti-affinity' == group.policy:
+
+ # instances on host
instances_uuids = objects.InstanceList.get_uuids_by_host(
context, self.host)
ins_on_host = set(instances_uuids)
+
+ # instance param is just for logging, the nodename obtained is
+ # not actually related to the instance at all
+ nodename = self._get_nodename(instance)
+
+ # instances being migrated to host
+ migrations = (
+ objects.MigrationList.get_in_progress_by_host_and_node(
+ context, self.host, nodename))
+ migration_vm_uuids = set([mig['instance_uuid']
+ for mig in migrations])
+
+ total_instances = migration_vm_uuids | ins_on_host
+
+ # refresh group to get updated members within locked block
+ group = objects.InstanceGroup.get_by_uuid(context,
+ group['uuid'])
members = set(group.members)
# Determine the set of instance group members on this host
# which are not the instance in question. This is used to
# determine how many other members from the same anti-affinity
# group can be on this host.
- members_on_host = ins_on_host & members - set([instance.uuid])
+ members_on_host = (total_instances & members -
+ set([instance.uuid]))
rules = group.rules
if rules and 'max_server_per_host' in rules:
max_server = rules['max_server_per_host']
@@ -1517,6 +1555,12 @@ class ComputeManager(manager.Manager):
raise exception.RescheduledException(
instance_uuid=instance.uuid,
reason=msg)
+
+ # NOTE(ganso): The check for affinity below does not work and it
+ # can easily be violated because the lock happens in different
+ # compute hosts.
+ # The only fix seems to be a DB lock to perform the check whenever
+ # setting the host field to an instance.
elif group.policy and 'affinity' == group.policy:
group_hosts = group.get_hosts(exclude=[instance.uuid])
if group_hosts and self.host not in group_hosts:
@@ -1525,8 +1569,7 @@ class ComputeManager(manager.Manager):
instance_uuid=instance.uuid,
reason=msg)
- if not CONF.workarounds.disable_group_policy_check_upcall:
- _do_validation(context, instance, group_hint)
+ _do_validation(context, instance, group)
def _log_original_error(self, exc_info, instance_uuid):
LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid,
@@ -4646,10 +4689,24 @@ class ComputeManager(manager.Manager):
with self._error_out_instance_on_exception(
context, instance, instance_state=instance_state),\
errors_out_migration_ctxt(migration):
+
self._send_prep_resize_notifications(
context, instance, fields.NotificationPhase.START,
instance_type)
try:
+ scheduler_hints = self._get_scheduler_hints(filter_properties,
+ request_spec)
+ # Error out if this host cannot accept the new instance due
+ # to anti-affinity. At this point the migration is already
+ # in-progress, so this is the definitive moment to abort due to
+ # the policy violation. Also, exploding here is covered by the
+ # cleanup methods in except block.
+ try:
+ self._validate_instance_group_policy(context, instance,
+ scheduler_hints)
+ except exception.RescheduledException as e:
+ raise exception.InstanceFaultRollback(inner_exception=e)
+
self._prep_resize(context, image, instance,
instance_type, filter_properties,
node, migration, clean_shutdown)
@@ -6505,6 +6562,20 @@ class ComputeManager(manager.Manager):
if None, ignore disk usage checking
:returns: a dict containing migration info
"""
+
+ # Error out if this host cannot accept the new instance due
+ # to anti-affinity. This check at this moment is not very accurate, as
+ # multiple requests may be happening concurrently and miss the lock,
+ # but when it works it provides a better user experience by failing
+ # earlier. Also, it should be safe to explode here, error becomes
+ # NoValidHost and instance status remains ACTIVE.
+ try:
+ self._validate_instance_group_policy(ctxt, instance)
+ except exception.RescheduledException as e:
+ msg = ("Failed to validate instance group policy "
+ "due to: {}".format(e))
+ raise exception.MigrationPreCheckError(reason=msg)
+
src_compute_info = obj_base.obj_to_primitive(
self._get_compute_info(ctxt, instance.host))
dst_compute_info = obj_base.obj_to_primitive(
@@ -6567,6 +6638,13 @@ class ComputeManager(manager.Manager):
"""
LOG.debug('pre_live_migration data is %s', migrate_data)
+ # Error out if this host cannot accept the new instance due
+ # to anti-affinity. At this point the migration is already in-progress,
+ # so this is the definitive moment to abort due to the policy
+ # violation. Also, it should be safe to explode here. The instance
+ # status remains ACTIVE, migration status failed.
+ self._validate_instance_group_policy(context, instance)
+
migrate_data.old_vol_attachment_ids = {}
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 1c7b5de37a..ef048e1d68 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -2915,14 +2915,36 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.context, 'compute_check_can_live_migrate_destination',
CONF.host, instance.uuid)
+ @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
+ side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_success(self):
self._test_check_can_live_migrate_destination()
+ @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
+ side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_fail(self):
self.assertRaises(
- test.TestingException,
- self._test_check_can_live_migrate_destination,
- do_raise=True)
+ test.TestingException,
+ self._test_check_can_live_migrate_destination,
+ do_raise=True)
+
+ @mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
+ def test_check_can_live_migrate_destination_fail_group_policy(
+ self, mock_fail_db):
+
+ instance = fake_instance.fake_instance_obj(
+ self.context, host=self.compute.host, vm_state=vm_states.ACTIVE,
+ node='fake-node')
+
+ ex = exception.RescheduledException(
+ instance_uuid=instance.uuid, reason="policy violated")
+
+ with mock.patch.object(self.compute, '_validate_instance_group_policy',
+ side_effect=ex):
+ self.assertRaises(
+ exception.MigrationPreCheckError,
+ self.compute.check_can_live_migrate_destination,
+ self.context, instance, None, None)
@mock.patch('nova.compute.manager.InstanceEvents._lock_name')
def test_prepare_for_instance_event(self, lock_name_mock):
@@ -6447,7 +6469,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
def test_validate_policy_honors_workaround_disabled(self, mock_get):
instance = objects.Instance(uuid=uuids.instance)
hints = {'group': 'foo'}
- mock_get.return_value = objects.InstanceGroup(policy=None)
+ mock_get.return_value = objects.InstanceGroup(policy=None,
+ uuid=uuids.group)
self.compute._validate_instance_group_policy(self.context,
instance, hints)
mock_get.assert_called_once_with(self.context, 'foo')
@@ -6473,10 +6496,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
instance, hints)
mock_get.assert_called_once_with(self.context, uuids.group_hint)
+ @mock.patch('nova.objects.InstanceGroup.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_uuids_by_host')
@mock.patch('nova.objects.InstanceGroup.get_by_hint')
- def test_validate_instance_group_policy_with_rules(self, mock_get_by_hint,
- mock_get_by_host):
+ @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
+ @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
+ def test_validate_instance_group_policy_with_rules(
+ self, migration_list, nodes, mock_get_by_hint, mock_get_by_host,
+ mock_get_by_uuid):
# Create 2 instance in same host, inst2 created before inst1
instance = objects.Instance(uuid=uuids.inst1)
hints = {'group': [uuids.group_hint]}
@@ -6485,17 +6512,26 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_get_by_host.return_value = existing_insts
# if group policy rules limit to 1, raise RescheduledException
- mock_get_by_hint.return_value = objects.InstanceGroup(
+ group = objects.InstanceGroup(
policy='anti-affinity', rules={'max_server_per_host': '1'},
- hosts=['host1'], members=members_uuids)
+ hosts=['host1'], members=members_uuids,
+ uuid=uuids.group)
+ mock_get_by_hint.return_value = group
+ mock_get_by_uuid.return_value = group
+ nodes.return_value = ['nodename']
+ migration_list.return_value = [objects.Migration(
+ uuid=uuids.migration, instance_uuid=uuids.instance)]
self.assertRaises(exception.RescheduledException,
self.compute._validate_instance_group_policy,
self.context, instance, hints)
# if group policy rules limit change to 2, validate OK
- mock_get_by_hint.return_value = objects.InstanceGroup(
+ group2 = objects.InstanceGroup(
policy='anti-affinity', rules={'max_server_per_host': 2},
- hosts=['host1'], members=members_uuids)
+ hosts=['host1'], members=members_uuids,
+ uuid=uuids.group)
+ mock_get_by_hint.return_value = group2
+ mock_get_by_uuid.return_value = group2
self.compute._validate_instance_group_policy(self.context,
instance, hints)
@@ -7966,6 +8002,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
else:
mock_log.warning.assert_not_called()
+ @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
+ side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_cinder_v3_api(self):
# This tests that pre_live_migration with a bdm with an
# attachment_id, will create a new attachment and update
@@ -8043,6 +8081,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
_test()
+ @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
+ side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_exception_cinder_v3_api(self):
# The instance in this test has 2 attachments. The second attach_create
# will throw an exception. This will test that the first attachment
@@ -8112,6 +8152,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.assertGreater(len(m.mock_calls), 0)
_test()
+ @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
+ side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_exceptions_delete_attachments(self):
# The instance in this test has 2 attachments. The call to
# driver.pre_live_migration will raise an exception. This will test
@@ -9297,7 +9339,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.compute.prep_resize,
self.context, mock.sentinel.image,
instance, flavor,
- mock.sentinel.request_spec,
+ objects.RequestSpec(),
{}, 'node', False,
migration, [])
@@ -9375,6 +9417,54 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# (_error_out_instance_on_exception will set to ACTIVE by default).
self.assertEqual(vm_states.STOPPED, instance.vm_state)
+ @mock.patch('nova.compute.utils.notify_usage_exists')
+ @mock.patch('nova.compute.manager.ComputeManager.'
+ '_notify_about_instance_usage')
+ @mock.patch('nova.compute.utils.notify_about_resize_prep_instance')
+ @mock.patch('nova.objects.Instance.save')
+ @mock.patch('nova.compute.manager.ComputeManager._revert_allocation')
+ @mock.patch('nova.compute.manager.ComputeManager.'
+ '_reschedule_resize_or_reraise')
+ @mock.patch('nova.compute.utils.add_instance_fault_from_exc')
+ # this is almost copy-paste from test_prep_resize_fails_rollback
+ def test_prep_resize_fails_group_validation(
+ self, add_instance_fault_from_exc, _reschedule_resize_or_reraise,
+ _revert_allocation, mock_instance_save,
+ notify_about_resize_prep_instance, _notify_about_instance_usage,
+ notify_usage_exists):
+ """Tests that if _validate_instance_group_policy raises
+ InstanceFaultRollback, the instance.vm_state is reset properly in
+ _error_out_instance_on_exception
+ """
+ instance = fake_instance.fake_instance_obj(
+ self.context, host=self.compute.host, vm_state=vm_states.STOPPED,
+ node='fake-node', expected_attrs=['system_metadata', 'flavor'])
+ migration = mock.MagicMock(spec='nova.objects.Migration')
+ request_spec = mock.MagicMock(spec='nova.objects.RequestSpec')
+ ex = exception.RescheduledException(
+ instance_uuid=instance.uuid, reason="policy violated")
+ ex2 = exception.InstanceFaultRollback(
+ inner_exception=ex)
+
+ def fake_reschedule_resize_or_reraise(*args, **kwargs):
+ raise ex2
+
+ _reschedule_resize_or_reraise.side_effect = (
+ fake_reschedule_resize_or_reraise)
+
+ with mock.patch.object(self.compute, '_validate_instance_group_policy',
+ side_effect=ex):
+ self.assertRaises(
+ # _error_out_instance_on_exception should reraise the
+ # RescheduledException inside InstanceFaultRollback.
+ exception.RescheduledException, self.compute.prep_resize,
+ self.context, instance.image_meta, instance, instance.flavor,
+ request_spec, filter_properties={}, node=instance.node,
+ clean_shutdown=True, migration=migration, host_list=[])
+ # The instance.vm_state should remain unchanged
+ # (_error_out_instance_on_exception will set to ACTIVE by default).
+ self.assertEqual(vm_states.STOPPED, instance.vm_state)
+
def test_get_updated_nw_info_with_pci_mapping(self):
old_dev = objects.PciDevice(address='0000:04:00.2')
new_dev = objects.PciDevice(address='0000:05:00.3')
diff --git a/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml b/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml
new file mode 100644
index 0000000000..4c6135311b
--- /dev/null
+++ b/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+ - |
+ Improved detection of anti-affinity policy violation when performing live
+ and cold migrations. Most of the violations caused by race conditions due
+ to performing concurrent live or cold migrations should now be addressed
+ by extra checks in the compute service. Upon detection, cold migration
+ operations are automatically rescheduled, while live migrations have two
+ checks and will be rescheduled if detected by the first one, otherwise the
+ live migration will fail cleanly and revert the instance state back to its
+ previous value.