summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Riedemann <mriedem.os@gmail.com>2018-08-21 14:24:47 -0400
committerMatt Riedemann <mriedem.os@gmail.com>2018-08-23 14:44:22 -0400
commit531407919eee9365d4d6481a69c52fef81ed4e5b (patch)
tree8ccce7e7c302a515c9e0664887d0193fc73496d9
parent14d9e9f0e8e06c4fd24da77749083dfd8d6867cc (diff)
downloadnova-531407919eee9365d4d6481a69c52fef81ed4e5b.tar.gz
libvirt: Don't react to VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED events
The VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED event is sent when live migration completes but does not indicate if the migration was successful or a failure, for that we need to check the actual job status. For now, to fix the regression introduced in change Ic5cab99944df9e501ba2032eb96911c36304494d in Rocky and mitigate the risk of a change in the Rocky GA, we just don't deal with that event. In the case of live migration failing, things are the same as before Rocky. In the case of live migration success, we activate the destination host port bindings in _post_live_migration so the network downtime benefit is not realized in that case, but we also don't mess up the network with incorrectly activating the dest host port bindings in ComputeManager.handle_lifecycle_event. If post-copy is in effect, we still get the reduced network downtime benefit from blueprint neutron-new-port-binding-api. This is a partial fix and the full fix will include checking the job status when we get VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED but is likely too risky of a change for the Rocky GA. Change-Id: If6779b633310c46c21bf7b3b3769d17e486f3945 Partial-Fix: #1788014 (cherry picked from commit a92b473e19def0f087dff73fc87ae13181f71d20)
-rw-r--r--nova/tests/unit/virt/libvirt/test_host.py7
-rw-r--r--nova/virt/libvirt/host.py10
2 files changed, 13 insertions, 4 deletions
diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py
index 4f04bcc9ae..a1448a66f1 100644
--- a/nova/tests/unit/virt/libvirt/test_host.py
+++ b/nova/tests/unit/virt/libvirt/test_host.py
@@ -251,7 +251,12 @@ class HostTestCase(test.NoDBTestCase):
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
detail=VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED, opaque=hostimpl)
expected_event = hostimpl._queue_event.call_args[0][0]
- self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
+ # FIXME(mriedem): This should be EVENT_LIFECYCLE_MIGRATION_COMPLETED
+ # once bug 1788014 is fixed and we properly check job status for the
+ # VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED case.
+ # self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
+ # expected_event.transition)
+ self.assertEqual(event.EVENT_LIFECYCLE_PAUSED,
expected_event.transition)
def test_event_emit_delayed_call_delayed(self):
diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py
index 70032bb9b7..65ae0ff2f1 100644
--- a/nova/virt/libvirt/host.py
+++ b/nova/virt/libvirt/host.py
@@ -179,9 +179,13 @@ class Host(object):
if (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY') and
detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY):
transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED
- elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and
- detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED):
- transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
+ # FIXME(mriedem): VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent
+ # when live migration of the guest fails, so we cannot simply rely
+ # on the event itself but need to check if the job itself was
+ # successful.
+ # elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and
+ # detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED):
+ # transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
else:
transition = virtevent.EVENT_LIFECYCLE_PAUSED
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED: