diff options
author | Matt Riedemann <mriedem.os@gmail.com> | 2018-08-21 14:24:47 -0400 |
---|---|---|
committer | Matt Riedemann <mriedem.os@gmail.com> | 2018-08-23 14:44:22 -0400 |
commit | 531407919eee9365d4d6481a69c52fef81ed4e5b (patch) | |
tree | 8ccce7e7c302a515c9e0664887d0193fc73496d9 | |
parent | 14d9e9f0e8e06c4fd24da77749083dfd8d6867cc (diff) | |
download | nova-531407919eee9365d4d6481a69c52fef81ed4e5b.tar.gz |
libvirt: Don't react to VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED events
The VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED event is sent when live
migration completes but does not indicate if the migration was
successful or a failure, for that we need to check the actual
job status.
For now, to fix the regression introduced in change
Ic5cab99944df9e501ba2032eb96911c36304494d in Rocky and mitigate
the risk of a change in the Rocky GA, we just don't deal with
that event. In the case of live migration failing, things are the
same as before Rocky. In the case of live migration success, we
activate the destination host port bindings in _post_live_migration
so the network downtime benefit is not realized in that case, but
we also don't mess up the network with incorrectly activating the
dest host port bindings in ComputeManager.handle_lifecycle_event.
If post-copy is in effect, we still get the reduced network downtime
benefit from blueprint neutron-new-port-binding-api.
This is a partial fix and the full fix will include checking the
job status when we get VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED but is
likely too risky of a change for the Rocky GA.
Change-Id: If6779b633310c46c21bf7b3b3769d17e486f3945
Partial-Fix: #1788014
(cherry picked from commit a92b473e19def0f087dff73fc87ae13181f71d20)
-rw-r--r-- | nova/tests/unit/virt/libvirt/test_host.py | 7 | ||||
-rw-r--r-- | nova/virt/libvirt/host.py | 10 |
2 files changed, 13 insertions, 4 deletions
diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py index 4f04bcc9ae..a1448a66f1 100644 --- a/nova/tests/unit/virt/libvirt/test_host.py +++ b/nova/tests/unit/virt/libvirt/test_host.py @@ -251,7 +251,12 @@ class HostTestCase(test.NoDBTestCase): conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED, detail=VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED, opaque=hostimpl) expected_event = hostimpl._queue_event.call_args[0][0] - self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED, + # FIXME(mriedem): This should be EVENT_LIFECYCLE_MIGRATION_COMPLETED + # once bug 1788014 is fixed and we properly check job status for the + # VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED case. + # self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED, + # expected_event.transition) + self.assertEqual(event.EVENT_LIFECYCLE_PAUSED, expected_event.transition) def test_event_emit_delayed_call_delayed(self): diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 70032bb9b7..65ae0ff2f1 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -179,9 +179,13 @@ class Host(object): if (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY') and detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY): transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED - elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and - detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED): - transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED + # FIXME(mriedem): VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent + # when live migration of the guest fails, so we cannot simply rely + # on the event itself but need to check if the job itself was + # successful. + # elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and + # detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED): + # transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED else: transition = virtevent.EVENT_LIFECYCLE_PAUSED elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED: |