summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLee Yarwood <lyarwood@redhat.com>2020-08-05 11:40:11 +0100
committerLee Yarwood <lyarwood@redhat.com>2020-11-30 09:43:51 +0000
commit478be6f4fbbbc7b05becd5dd92a27f0c4e8f8ef8 (patch)
tree3f1027e42ee7ce502af2110630e741e63dd01a22
parenta806b1dc4c578f4a53a6c13317b04eb028861301 (diff)
downloadnova-478be6f4fbbbc7b05becd5dd92a27f0c4e8f8ef8.tar.gz
zuul: Replace nova-live-migration with zuulv3 jobs
This change removes the original nova-live-migration job and replaces it directly with the new Focal based zuulv3 native job. The nova-dsvm-multinode-base base job is no longer used and so also removed as part of this change. Note that this new nova-live-migration job does not yet contain any ceph coverage like the original, this is still pending and will be completed early in the W cycle. This change is being merged ahead of this to resolve bug #1901739, a known QEMU -drive issue caused by the previous jobs use of libvirt 5.4.0 as provided by Ubuntu Bionic. The fix here being the migration to Ubuntu Focal based jobs and libvirt 6.0.0 that now defaults to using QEMU -blockdev. NOTE(lyarwood): This change squashes the following changes into it to ensure we end up with a passing zuulv3 Focal based job in stable/victoria. This includes the reintroduction of nova-dsvm-multinode-base that was incorrectly removed by this change on master while still being used. zuul: Introduce nova-evacuate (cherry picked from commit f357d8040741b0346c3105fb3d3d1b260f5cb13d) nova-evacuate: Disable libvirtd service and sockets during negative tests (cherry picked from commit 226250beb6858bb3094c005fbc335a7378531df1) zuul: Merge nova-evacuate into nova-multinode-live-migration (cherry picked from commit c0fe95fcc5aec99a83dd57093dc230ef67b36b39) zuul: Reintroduce nova-dsvm-multinode-base (cherry picked from commit be752b8175f0cb3444ee47679753be847c8f8fd2) nova-live-migration: Disable *all* virt services during negative tests (cherry picked from commit 76360e566bcd0e203f3e9357ca2b0ca3d7baf4b8) Closes-Bug: #1901739 Change-Id: Ib342e2d3c395830b4667a60de7e492d3b9de2f0a (cherry picked from commit 4ac4a04d1843b0450e8d6d80189ce3e85253dcd0)
-rw-r--r--.zuul.yaml23
-rw-r--r--playbooks/nova-evacuate/run.yaml8
-rw-r--r--playbooks/nova-live-migration/post-run.yaml10
-rw-r--r--roles/run-evacuate-hook/README.rst1
-rwxr-xr-xroles/run-evacuate-hook/files/setup_evacuate_resources.sh34
-rwxr-xr-xroles/run-evacuate-hook/files/test_evacuate.sh55
-rwxr-xr-xroles/run-evacuate-hook/files/test_negative_evacuate.sh37
-rw-r--r--roles/run-evacuate-hook/tasks/main.yaml82
8 files changed, 228 insertions, 22 deletions
diff --git a/.zuul.yaml b/.zuul.yaml
index b4f67067e4..785522e964 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -56,21 +56,8 @@
bindep_profile: test py38
timeout: 3600
-# TODO(lyarwood): Remove once the new zuulv3 nova-multinode jobs are voting
- job:
name: nova-live-migration
- parent: nova-dsvm-multinode-base
- description: |
- Run tempest live migration tests against both local storage and shared
- storage using ceph (the environment is reconfigured for ceph after the
- local storage tests are run). Also runs simple evacuate tests.
- Config drive is forced on all instances. Runs with python 3.
- run: playbooks/legacy/nova-live-migration/run.yaml
- post-run: playbooks/legacy/nova-live-migration/post.yaml
- irrelevant-files: *dsvm-irrelevant-files
-
-- job:
- name: nova-multinode-live-migration
parent: tempest-multinode-full-py3
description: |
Run tempest live migration tests against local qcow2 ephemeral storage
@@ -86,6 +73,7 @@
volume_backed_live_migration: true
block_migration_for_live_migration: true
block_migrate_cinder_iscsi: true
+ post-run: playbooks/nova-live-migration/post-run.yaml
# TODO(lyarwood): The following jobs need to be written as part of the
# migration to zuulv3 before nova-live-migration can be removed:
@@ -95,13 +83,6 @@
# description: |
# Run tempest live migration tests against ceph ephemeral storage and
# cinder volumes.
-#
-#- job:
-# name: nova-multinode-evacuate
-# description: |
-# Verifiy the evacuation of instances with local qcow2 ephemeral disks
-# from down compute hosts.
-#
#- job:
# name: nova-multinode-evacuate-ceph
# description: |
@@ -439,8 +420,6 @@
- ^(?!nova/network/.*)(?!nova/virt/libvirt/vif.py).*$
- nova-grenade-multinode
- nova-live-migration
- - nova-multinode-live-migration:
- voting: false
- nova-lvm
- nova-multi-cell
- nova-next
diff --git a/playbooks/nova-evacuate/run.yaml b/playbooks/nova-evacuate/run.yaml
new file mode 100644
index 0000000000..35e330a6de
--- /dev/null
+++ b/playbooks/nova-evacuate/run.yaml
@@ -0,0 +1,8 @@
+---
+- hosts: all
+ roles:
+ - orchestrate-devstack
+
+- hosts: controller
+ roles:
+ - run-evacuate-hook
diff --git a/playbooks/nova-live-migration/post-run.yaml b/playbooks/nova-live-migration/post-run.yaml
new file mode 100644
index 0000000000..845a1b15b2
--- /dev/null
+++ b/playbooks/nova-live-migration/post-run.yaml
@@ -0,0 +1,10 @@
+---
+- hosts: tempest
+ become: true
+ roles:
+ - role: fetch-subunit-output
+ zuul_work_dir: '{{ devstack_base_dir }}/tempest'
+ - role: process-stackviz
+- hosts: controller
+ roles:
+ - run-evacuate-hook
diff --git a/roles/run-evacuate-hook/README.rst b/roles/run-evacuate-hook/README.rst
new file mode 100644
index 0000000000..e423455aee
--- /dev/null
+++ b/roles/run-evacuate-hook/README.rst
@@ -0,0 +1 @@
+Run Nova evacuation tests against a multinode environment.
diff --git a/roles/run-evacuate-hook/files/setup_evacuate_resources.sh b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh
new file mode 100755
index 0000000000..c8c385d7ff
--- /dev/null
+++ b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
+flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
+network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
+
+echo "Creating ephemeral test server on subnode"
+openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
+--nic net-id=${network_id} --host $SUBNODE_HOSTNAME --wait evacuate-test
+
+# TODO(lyarwood) Use osc to launch the bfv volume
+echo "Creating boot from volume test server on subnode"
+nova --os-compute-api-version 2.74 boot --flavor ${flavor_id} --poll \
+--block-device id=${image_id},source=image,dest=volume,size=1,bootindex=0,shutdown=remove \
+--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} evacuate-bfv-test
+
+echo "Forcing down the subnode so we can evacuate from it"
+openstack --os-compute-api-version 2.11 compute service set --down ${SUBNODE_HOSTNAME} nova-compute
+
+count=0
+status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
+while [ "${status}" != "down" ]
+do
+ sleep 1
+ count=$((count+1))
+ if [ ${count} -eq 30 ]; then
+ echo "Timed out waiting for subnode compute service to be marked as down"
+ exit 5
+ fi
+ status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
+done
diff --git a/roles/run-evacuate-hook/files/test_evacuate.sh b/roles/run-evacuate-hook/files/test_evacuate.sh
new file mode 100755
index 0000000000..bdf8d92441
--- /dev/null
+++ b/roles/run-evacuate-hook/files/test_evacuate.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Source tempest to determine the build timeout configuration.
+source /opt/stack/devstack/lib/tempest
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+# Wait for the controller compute service to be enabled.
+count=0
+status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
+while [ "${status}" != "enabled" ]
+do
+ sleep 1
+ count=$((count+1))
+ if [ ${count} -eq 30 ]; then
+ echo "Timed out waiting for controller compute service to be enabled"
+ exit 5
+ fi
+ status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
+done
+
+function evacuate_and_wait_for_active() {
+ local server="$1"
+
+ nova evacuate ${server}
+ # Wait for the instance to go into ACTIVE state from the evacuate.
+ count=0
+ status=$(openstack server show ${server} -f value -c status)
+ while [ "${status}" != "ACTIVE" ]
+ do
+ sleep 1
+ count=$((count+1))
+ if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
+ echo "Timed out waiting for server ${server} to go to ACTIVE status"
+ exit 6
+ fi
+ status=$(openstack server show ${server} -f value -c status)
+ done
+}
+
+evacuate_and_wait_for_active evacuate-test
+evacuate_and_wait_for_active evacuate-bfv-test
+
+# Make sure the servers moved.
+for server in evacuate-test evacuate-bfv-test; do
+ host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host)
+ if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then
+ echo "Unexpected host ${host} for server ${server} after evacuate."
+ exit 7
+ fi
+done
+
+# Cleanup test servers
+openstack server delete --wait evacuate-test
+openstack server delete --wait evacuate-bfv-test
diff --git a/roles/run-evacuate-hook/files/test_negative_evacuate.sh b/roles/run-evacuate-hook/files/test_negative_evacuate.sh
new file mode 100755
index 0000000000..b1f5f7a4af
--- /dev/null
+++ b/roles/run-evacuate-hook/files/test_negative_evacuate.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Source tempest to determine the build timeout configuration.
+source /opt/stack/devstack/lib/tempest
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+# Now force the evacuation to the controller; we have to force to bypass the
+# scheduler since we killed libvirtd which will trigger the libvirt compute
+# driver to auto-disable the nova-compute service and then the ComputeFilter
+# would filter out this host and we'd get NoValidHost. Normally forcing a host
+# during evacuate and bypassing the scheduler is a very bad idea, but we're
+# doing a negative test here.
+
+function evacuate_and_wait_for_error() {
+ local server="$1"
+
+ echo "Forcing evacuate of ${server} to local host"
+ # TODO(mriedem): Use OSC when it supports evacuate.
+ nova --os-compute-api-version "2.67" evacuate --force ${server} ${CONTROLLER_HOSTNAME}
+ # Wait for the instance to go into ERROR state from the failed evacuate.
+ count=0
+ status=$(openstack server show ${server} -f value -c status)
+ while [ "${status}" != "ERROR" ]
+ do
+ sleep 1
+ count=$((count+1))
+ if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
+ echo "Timed out waiting for server ${server} to go to ERROR status"
+ exit 4
+ fi
+ status=$(openstack server show ${server} -f value -c status)
+ done
+}
+
+evacuate_and_wait_for_error evacuate-test
+evacuate_and_wait_for_error evacuate-bfv-test
diff --git a/roles/run-evacuate-hook/tasks/main.yaml b/roles/run-evacuate-hook/tasks/main.yaml
new file mode 100644
index 0000000000..184b9d18f9
--- /dev/null
+++ b/roles/run-evacuate-hook/tasks/main.yaml
@@ -0,0 +1,82 @@
+- name: Setup resources and mark the subnode as forced down
+ become: true
+ become_user: stack
+ shell: "/opt/stack/nova/roles/run-evacuate-hook/files/setup_evacuate_resources.sh"
+ environment:
+ SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
+
+- name: Fence subnode by stopping q-agt and n-cpu
+ delegate_to: compute1
+ become: true
+ systemd:
+ name: "{{ item }}"
+ state: stopped
+ with_items:
+ - devstack@q-agt
+ - devstack@n-cpu
+
+- name: Register running domains on subnode
+ delegate_to: compute1
+ become: true
+ virt:
+ command: list_vms
+ state: running
+ register: subnode_vms
+
+- name: Destroy running domains on subnode
+ delegate_to: compute1
+ become: true
+ virt:
+ name: "{{ item }}"
+ state: destroyed
+ with_items: "{{ subnode_vms.list_vms }}"
+
+- name: Stop libvirtd on "{{ inventory_hostname }}"
+ become: true
+ systemd:
+ name: "{{ item }}"
+ state: stopped
+ enabled: no
+ with_items:
+ - libvirtd.service
+ - libvirtd.socket
+ - libvirtd-admin.socket
+ - libvirtd-ro.socket
+ - virtlogd.service
+ - virtlogd-admin.socket
+ - virtlogd.socket
+ - virtlockd.service
+ - virtlockd-admin.socket
+ - virtlockd.socket
+
+- name: Run negative evacuate tests
+ become: true
+ become_user: stack
+ shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_negative_evacuate.sh"
+ environment:
+ CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
+
+- name: Start libvirtd on "{{ inventory_hostname }}"
+ become: true
+ systemd:
+ name: "{{ item }}"
+ state: started
+ enabled: yes
+ with_items:
+ - libvirtd.service
+ - libvirtd.socket
+ - libvirtd-admin.socket
+ - libvirtd-ro.socket
+ - virtlogd.service
+ - virtlogd-admin.socket
+ - virtlogd.socket
+ - virtlockd.service
+ - virtlockd-admin.socket
+ - virtlockd.socket
+
+- name: Run evacuate tests
+ become: true
+ become_user: stack
+ shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_evacuate.sh"
+ environment:
+ CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"