diff options
33 files changed, 683 insertions, 562 deletions
diff --git a/.zuul.yaml b/.zuul.yaml index 70784d0fa2..c5df8beb16 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -79,16 +79,48 @@ timeout: 3600 - job: + name: nova-tox-validate-backport + parent: openstack-tox + description: | + Determine whether a backport is ready to be merged by checking whether it + has already been merged to master or more recent stable branches. + + Uses tox with the ``validate-backport`` environment. + vars: + tox_envlist: validate-backport + +- job: name: nova-live-migration - parent: nova-dsvm-multinode-base + parent: tempest-multinode-full-py3 description: | - Run tempest live migration tests against both local storage and shared - storage using ceph (the environment is reconfigured for ceph after the - local storage tests are run). Also runs simple evacuate tests. - Config drive is forced on all instances. Runs with python 3. - run: playbooks/legacy/nova-live-migration/run.yaml - post-run: playbooks/legacy/nova-live-migration/post.yaml + Run tempest live migration tests against local qcow2 ephemeral storage + and shared LVM/iSCSI cinder volumes. irrelevant-files: *dsvm-irrelevant-files + vars: + tox_envlist: all + tempest_test_regex: (^tempest\.api\.compute\.admin\.(test_live_migration|test_migration)) + devstack_local_conf: + test-config: + $TEMPEST_CONFIG: + compute-feature-enabled: + volume_backed_live_migration: true + block_migration_for_live_migration: true + block_migrate_cinder_iscsi: true + post-run: playbooks/nova-live-migration/post-run.yaml + +# TODO(lyarwood): The following jobs need to be written as part of the +# migration to zuulv3 before nova-live-migration can be removed: +# +#- job: +# name: nova-multinode-live-migration-ceph +# description: | +# Run tempest live migration tests against ceph ephemeral storage and +# cinder volumes. +#- job: +# name: nova-multinode-evacuate-ceph +# description: | +# Verifiy the evacuation of instances with ceph ephemeral disks +# from down compute hosts. - job: name: nova-lvm @@ -254,22 +286,24 @@ - job: name: nova-grenade-multinode - parent: nova-dsvm-multinode-base + parent: grenade-multinode description: | - Multi-node grenade job which runs gate/live_migration/hooks tests under - python 3. - In other words, this tests live and cold migration and resize with - mixed-version compute services which is important for things like - rolling upgrade support. + Run a multinode grenade job and run the smoke, cold and live migration + tests with the controller upgraded and the compute on the older release. The former names for this job were "nova-grenade-live-migration" and "legacy-grenade-dsvm-neutron-multinode-live-migration". - run: playbooks/legacy/nova-grenade-multinode/run.yaml - post-run: playbooks/legacy/nova-grenade-multinode/post.yaml - required-projects: - - openstack/grenade - - openstack/devstack-gate - - openstack/nova irrelevant-files: *dsvm-irrelevant-files + vars: + devstack_local_conf: + test-config: + $TEMPEST_CONFIG: + compute-feature-enabled: + live_migration: true + volume_backed_live_migration: true + block_migration_for_live_migration: true + block_migrate_cinder_iscsi: true + tox_envlist: all + tempest_test_regex: ((tempest\.(api\.compute|scenario)\..*smoke.*)|(^tempest\.api\.compute\.admin\.(test_live_migration|test_migration))) - job: name: nova-multi-cell @@ -372,12 +406,13 @@ # code; we don't need to run this on all changes, nor do we run # it in the gate. - ^(?!nova/network/.*)(?!nova/virt/libvirt/vif.py).*$ - - nova-grenade-multinode - nova-live-migration - nova-lvm - nova-multi-cell - nova-next - nova-tox-functional-py36 + - nova-tox-validate-backport: + voting: false - tempest-integrated-compute: # NOTE(gmann): Policies changes do not need to run all the # integration test jobs. Running only tempest and grenade @@ -397,7 +432,7 @@ - ^setup.cfg$ - ^tools/.*$ - ^tox.ini$ - - grenade: + - nova-grenade-multinode: irrelevant-files: *policies-irrelevant-files - tempest-ipv6-only: irrelevant-files: *dsvm-irrelevant-files @@ -408,14 +443,14 @@ voting: false gate: jobs: - - nova-grenade-multinode - nova-live-migration - nova-tox-functional-py36 - nova-multi-cell - nova-next + - nova-tox-validate-backport - tempest-integrated-compute: irrelevant-files: *policies-irrelevant-files - - grenade: + - nova-grenade-multinode: irrelevant-files: *policies-irrelevant-files - tempest-ipv6-only: irrelevant-files: *dsvm-irrelevant-files @@ -429,8 +464,6 @@ irrelevant-files: *dsvm-irrelevant-files - barbican-simple-crypto-devstack-tempest: irrelevant-files: *dsvm-irrelevant-files - - legacy-grenade-dsvm-neutron-multinode-zero-downtime: - irrelevant-files: *dsvm-irrelevant-files - devstack-plugin-nfs-tempest-full: irrelevant-files: *dsvm-irrelevant-files - nova-osprofiler-redis @@ -440,8 +473,6 @@ irrelevant-files: *dsvm-irrelevant-files - nova-tempest-full-oslo.versionedobjects: irrelevant-files: *dsvm-irrelevant-files - - legacy-tempest-dsvm-nova-libvirt-kvm-apr: - irrelevant-files: *dsvm-irrelevant-files - nova-tempest-v2-api: irrelevant-files: *dsvm-irrelevant-files - neutron-tempest-dvr-ha-multinode-full: diff --git a/doc/source/admin/networking.rst b/doc/source/admin/networking.rst index 407a43aafe..626bb89592 100644 --- a/doc/source/admin/networking.rst +++ b/doc/source/admin/networking.rst @@ -24,6 +24,18 @@ A full guide on configuring and using SR-IOV is provided in the :neutron-doc:`OpenStack Networking service documentation <admin/config-sriov.html>` +.. note:: + + Nova only supports PCI addresses where the fields are restricted to the + following maximum value: + + * domain - 0xFFFF + * bus - 0xFF + * slot - 0x1F + * function - 0x7 + + Nova will ignore PCI devices reported by the hypervisor if the address is + outside of these ranges. NUMA Affinity ------------- diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 663fdbaf51..c3eb76e68e 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -37,6 +37,18 @@ devices with potentially different capabilities. supported until the 14.0.0 Newton release, see `bug 1512800 <https://bugs.launchpad.net/nova/+bug/1512880>`_ for details. +.. note:: + + Nova only supports PCI addresses where the fields are restricted to the + following maximum value: + + * domain - 0xFFFF + * bus - 0xFF + * slot - 0x1F + * function - 0x7 + + Nova will ignore PCI devices reported by the hypervisor if the address is + outside of these ranges. Configure host (Compute) ------------------------ diff --git a/gate/live_migration/hooks/ceph.sh b/gate/live_migration/hooks/ceph.sh deleted file mode 100755 index 3d596ff0b3..0000000000 --- a/gate/live_migration/hooks/ceph.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash - -function prepare_ceph { - git clone https://opendev.org/openstack/devstack-plugin-ceph /tmp/devstack-plugin-ceph - source /tmp/devstack-plugin-ceph/devstack/settings - source /tmp/devstack-plugin-ceph/devstack/lib/ceph - install_ceph - configure_ceph - #install ceph-common package and additional python3 ceph libraries on compute nodes - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m raw -a "executable=/bin/bash - USE_PYTHON3=${USE_PYTHON3:-True} - source $BASE/new/devstack/functions - source $BASE/new/devstack/functions-common - git clone https://opendev.org/openstack/devstack-plugin-ceph /tmp/devstack-plugin-ceph - source /tmp/devstack-plugin-ceph/devstack/lib/ceph - install_ceph_remote - " - - #copy ceph admin keyring to compute nodes - sudo cp /etc/ceph/ceph.client.admin.keyring /tmp/ceph.client.admin.keyring - sudo chown ${STACK_USER}:${STACK_USER} /tmp/ceph.client.admin.keyring - sudo chmod 644 /tmp/ceph.client.admin.keyring - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m copy -a "src=/tmp/ceph.client.admin.keyring dest=/etc/ceph/ceph.client.admin.keyring owner=ceph group=ceph" - sudo rm -f /tmp/ceph.client.admin.keyring - #copy ceph.conf to compute nodes - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m copy -a "src=/etc/ceph/ceph.conf dest=/etc/ceph/ceph.conf owner=root group=root" - - start_ceph -} - -function _ceph_configure_glance { - GLANCE_API_CONF=${GLANCE_API_CONF:-/etc/glance/glance-api.conf} - sudo ceph -c ${CEPH_CONF_FILE} osd pool create ${GLANCE_CEPH_POOL} ${GLANCE_CEPH_POOL_PG} ${GLANCE_CEPH_POOL_PGP} - sudo ceph -c ${CEPH_CONF_FILE} auth get-or-create client.${GLANCE_CEPH_USER} \ - mon "allow r" \ - osd "allow class-read object_prefix rbd_children, allow rwx pool=${GLANCE_CEPH_POOL}" | \ - sudo tee ${CEPH_CONF_DIR}/ceph.client.${GLANCE_CEPH_USER}.keyring - sudo chown ${STACK_USER}:$(id -g -n $whoami) ${CEPH_CONF_DIR}/ceph.client.${GLANCE_CEPH_USER}.keyring - - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=DEFAULT option=show_image_direct_url value=True" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=glance_store option=default_store value=rbd" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=glance_store option=stores value='file, http, rbd'" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=glance_store option=rbd_store_ceph_conf value=$CEPH_CONF_FILE" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=glance_store option=rbd_store_user value=$GLANCE_CEPH_USER" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${GLANCE_API_CONF} section=glance_store option=rbd_store_pool value=$GLANCE_CEPH_POOL" - - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${GLANCE_CEPH_POOL} size ${CEPH_REPLICAS} - if [[ $CEPH_REPLICAS -ne 1 ]]; then - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${GLANCE_CEPH_POOL} crush_ruleset ${RULE_ID} - fi - - #copy glance keyring to compute only node - sudo cp /etc/ceph/ceph.client.glance.keyring /tmp/ceph.client.glance.keyring - sudo chown $STACK_USER:$STACK_USER /tmp/ceph.client.glance.keyring - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m copy -a "src=/tmp/ceph.client.glance.keyring dest=/etc/ceph/ceph.client.glance.keyring" - sudo rm -f /tmp/ceph.client.glance.keyring -} - -function configure_and_start_glance { - _ceph_configure_glance - echo 'check processes before glance-api stop' - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "ps aux | grep glance-api" - - # restart glance - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "systemctl restart devstack@g-api" - - echo 'check processes after glance-api stop' - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "ps aux | grep glance-api" -} - -function _ceph_configure_nova { - #setup ceph for nova, we don't reuse configure_ceph_nova - as we need to emulate case where cinder is not configured for ceph - sudo ceph -c ${CEPH_CONF_FILE} osd pool create ${NOVA_CEPH_POOL} ${NOVA_CEPH_POOL_PG} ${NOVA_CEPH_POOL_PGP} - NOVA_CONF=${NOVA_CPU_CONF:-/etc/nova/nova.conf} - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=rbd_user value=${CINDER_CEPH_USER}" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=rbd_secret_uuid value=${CINDER_CEPH_UUID}" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=inject_key value=false" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=inject_partition value=-2" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=disk_cachemodes value='network=writeback'" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_type value=rbd" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_pool value=${NOVA_CEPH_POOL}" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=${NOVA_CONF} section=libvirt option=images_rbd_ceph_conf value=${CEPH_CONF_FILE}" - - sudo ceph -c ${CEPH_CONF_FILE} auth get-or-create client.${CINDER_CEPH_USER} \ - mon "allow r" \ - osd "allow class-read object_prefix rbd_children, allow rwx pool=${CINDER_CEPH_POOL}, allow rwx pool=${NOVA_CEPH_POOL},allow rwx pool=${GLANCE_CEPH_POOL}" | \ - sudo tee ${CEPH_CONF_DIR}/ceph.client.${CINDER_CEPH_USER}.keyring > /dev/null - sudo chown ${STACK_USER}:$(id -g -n $whoami) ${CEPH_CONF_DIR}/ceph.client.${CINDER_CEPH_USER}.keyring - - #copy cinder keyring to compute only node - sudo cp /etc/ceph/ceph.client.cinder.keyring /tmp/ceph.client.cinder.keyring - sudo chown stack:stack /tmp/ceph.client.cinder.keyring - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m copy -a "src=/tmp/ceph.client.cinder.keyring dest=/etc/ceph/ceph.client.cinder.keyring" - sudo rm -f /tmp/ceph.client.cinder.keyring - - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${NOVA_CEPH_POOL} size ${CEPH_REPLICAS} - if [[ $CEPH_REPLICAS -ne 1 ]]; then - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${NOVA_CEPH_POOL} crush_ruleset ${RULE_ID} - fi -} - -function _wait_for_nova_compute_service_state { - source $BASE/new/devstack/openrc admin admin - local status=$1 - local attempt=1 - local max_attempts=24 - local attempt_sleep=5 - local computes_count=$(openstack compute service list | grep -c nova-compute) - local computes_ready=$(openstack compute service list | grep nova-compute | grep $status | wc -l) - - echo "Waiting for $computes_count computes to report as $status" - while [ "$computes_ready" -ne "$computes_count" ]; do - if [ "$attempt" -eq "$max_attempts" ]; then - echo "Failed waiting for computes to report as ${status}, ${computes_ready}/${computes_count} ${status} after ${max_attempts} attempts" - exit 4 - fi - echo "Waiting ${attempt_sleep} seconds for ${computes_count} computes to report as ${status}, ${computes_ready}/${computes_count} ${status} after ${attempt}/${max_attempts} attempts" - sleep $attempt_sleep - attempt=$((attempt+1)) - computes_ready=$(openstack compute service list | grep nova-compute | grep $status | wc -l) - done - echo "All computes are now reporting as ${status} after ${attempt} attempts" -} - -function configure_and_start_nova { - - echo "Checking all n-cpu services" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "pgrep -u stack -a nova-compute" - - # stop nova-compute - echo "Stopping all n-cpu services" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "systemctl stop devstack@n-cpu" - - # Wait for the service to be marked as down - _wait_for_nova_compute_service_state "down" - - _ceph_configure_nova - - #import secret to libvirt - _populate_libvirt_secret - - # start nova-compute - echo "Starting all n-cpu services" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "systemctl start devstack@n-cpu" - - echo "Checking all n-cpu services" - # test that they are all running again - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "pgrep -u stack -a nova-compute" - - # Wait for the service to be marked as up - _wait_for_nova_compute_service_state "up" -} - -function _ceph_configure_cinder { - sudo ceph -c ${CEPH_CONF_FILE} osd pool create ${CINDER_CEPH_POOL} ${CINDER_CEPH_POOL_PG} ${CINDER_CEPH_POOL_PGP} - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${CINDER_CEPH_POOL} size ${CEPH_REPLICAS} - if [[ $CEPH_REPLICAS -ne 1 ]]; then - sudo ceph -c ${CEPH_CONF_FILE} osd pool set ${CINDER_CEPH_POOL} crush_ruleset ${RULE_ID} - fi - - CINDER_CONF=${CINDER_CONF:-/etc/cinder/cinder.conf} - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=volume_backend_name value=ceph" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=volume_driver value=cinder.volume.drivers.rbd.RBDDriver" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_ceph_conf value=$CEPH_CONF_FILE" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_pool value=$CINDER_CEPH_POOL" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_user value=$CINDER_CEPH_USER" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_uuid value=$CINDER_CEPH_UUID" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_flatten_volume_from_snapshot value=False" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=ceph option=rbd_max_clone_depth value=5" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=DEFAULT option=default_volume_type value=ceph" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$CINDER_CONF section=DEFAULT option=enabled_backends value=ceph" - -} - -function configure_and_start_cinder { - _ceph_configure_cinder - - # restart cinder - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "systemctl restart devstack@c-vol" - - source $BASE/new/devstack/openrc - - export OS_USERNAME=admin - export OS_PROJECT_NAME=admin - lvm_type=$(cinder type-list | awk -F "|" 'NR==4{ print $2}') - cinder type-delete $lvm_type - openstack volume type create --os-volume-api-version 1 --property volume_backend_name="ceph" ceph -} - -function _populate_libvirt_secret { - cat > /tmp/secret.xml <<EOF -<secret ephemeral='no' private='no'> - <uuid>${CINDER_CEPH_UUID}</uuid> - <usage type='ceph'> - <name>client.${CINDER_CEPH_USER} secret</name> - </usage> -</secret> -EOF - - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m copy -a "src=/tmp/secret.xml dest=/tmp/secret.xml" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "virsh secret-define --file /tmp/secret.xml" - local secret=$(sudo ceph -c ${CEPH_CONF_FILE} auth get-key client.${CINDER_CEPH_USER}) - # TODO(tdurakov): remove this escaping as https://github.com/ansible/ansible/issues/13862 fixed - secret=${secret//=/'\='} - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "virsh secret-set-value --secret ${CINDER_CEPH_UUID} --base64 $secret" - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m file -a "path=/tmp/secret.xml state=absent" - -} diff --git a/gate/live_migration/hooks/nfs.sh b/gate/live_migration/hooks/nfs.sh deleted file mode 100755 index acadb36d6c..0000000000 --- a/gate/live_migration/hooks/nfs.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -function nfs_setup { - if uses_debs; then - module=apt - elif is_fedora; then - module=yum - fi - $ANSIBLE all --become -f 5 -i "$WORKSPACE/inventory" -m $module \ - -a "name=nfs-common state=present" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m $module \ - -a "name=nfs-kernel-server state=present" - - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=/etc/idmapd.conf section=Mapping option=Nobody-User value=nova" - - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=/etc/idmapd.conf section=Mapping option=Nobody-Group value=nova" - - for SUBNODE in $SUBNODES ; do - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m lineinfile -a "dest=/etc/exports line='/opt/stack/data/nova/instances $SUBNODE(rw,fsid=0,insecure,no_subtree_check,async,no_root_squash)'" - done - - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "exportfs -a" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m service -a "name=nfs-kernel-server state=restarted" - GetDistro - if [[ ! ${DISTRO} =~ (xenial) ]]; then - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m service -a "name=idmapd state=restarted" - fi - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "iptables -A INPUT -p tcp --dport 111 -j ACCEPT" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "iptables -A INPUT -p udp --dport 111 -j ACCEPT" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "iptables -A INPUT -p tcp --dport 2049 -j ACCEPT" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "iptables -A INPUT -p udp --dport 2049 -j ACCEPT" - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "mount -t nfs4 -o proto\=tcp,port\=2049 $primary_node:/ /opt/stack/data/nova/instances/" -} - -function nfs_configure_tempest { - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$BASE/new/tempest/etc/tempest.conf section=compute-feature-enabled option=block_migration_for_live_migration value=False" -} - -function nfs_verify_setup { - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m file -a "path=/opt/stack/data/nova/instances/test_file state=touch" - if [ ! -e '/opt/stack/data/nova/instances/test_file' ]; then - die $LINENO "NFS configuration failure" - fi -} - -function nfs_teardown { - #teardown nfs shared storage - $ANSIBLE subnodes --become -f 5 -i "$WORKSPACE/inventory" -m shell -a "umount -t nfs4 /opt/stack/data/nova/instances/" - $ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m service -a "name=nfs-kernel-server state=stopped" -}
\ No newline at end of file diff --git a/gate/live_migration/hooks/run_tests.sh b/gate/live_migration/hooks/run_tests.sh deleted file mode 100755 index 331f2fa204..0000000000 --- a/gate/live_migration/hooks/run_tests.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# Live migration dedicated ci job will be responsible for testing different -# environments based on underlying storage, used for ephemerals. -# This hook allows to inject logic of environment reconfiguration in ci job. -# Base scenario for this would be: -# -# 1. test with all local storage (use default for volumes) -# 2. test with NFS for root + ephemeral disks -# 3. test with Ceph for root + ephemeral disks -# 4. test with Ceph for volumes and root + ephemeral disk - -set -xe -cd $BASE/new/tempest - -source $BASE/new/devstack/functions -source $BASE/new/devstack/functions-common -source $BASE/new/devstack/lib/nova -source $WORKSPACE/devstack-gate/functions.sh -source $BASE/new/nova/gate/live_migration/hooks/utils.sh -source $BASE/new/nova/gate/live_migration/hooks/nfs.sh -source $BASE/new/nova/gate/live_migration/hooks/ceph.sh -primary_node=$(cat /etc/nodepool/primary_node_private) -SUBNODES=$(cat /etc/nodepool/sub_nodes_private) -SERVICE_HOST=$primary_node -STACK_USER=${STACK_USER:-stack} - -echo '1. test with all local storage (use default for volumes)' -echo 'NOTE: test_volume_backed_live_migration is skipped due to https://bugs.launchpad.net/nova/+bug/1524898' -run_tempest "block migration test" "^.*test_live_migration(?!.*(test_volume_backed_live_migration))" - -# TODO(mriedem): Run $BASE/new/nova/gate/test_evacuate.sh for local storage - -#all tests bellow this line use shared storage, need to update tempest.conf -echo 'disabling block_migration in tempest' -$ANSIBLE primary --become -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$BASE/new/tempest/etc/tempest.conf section=compute-feature-enabled option=block_migration_for_live_migration value=False" - -echo '2. NFS testing is skipped due to setup failures with Ubuntu 16.04' -#echo '2. test with NFS for root + ephemeral disks' - -#nfs_setup -#nfs_configure_tempest -#nfs_verify_setup -#run_tempest "NFS shared storage test" "live_migration" -#nfs_teardown - -# The nova-grenade-multinode job also runs resize and cold migration tests -# so we check for a grenade-only variable. -if [[ -n "$GRENADE_NEW_BRANCH" ]]; then - echo '3. test cold migration and resize' - run_tempest "cold migration and resize test" "test_resize_server|test_cold_migration|test_revert_cold_migration" -else - echo '3. cold migration and resize is skipped for non-grenade jobs' -fi - -echo '4. test with Ceph for root + ephemeral disks' -# Discover and set variables for the OS version so the devstack-plugin-ceph -# scripts can find the correct repository to install the ceph packages. -GetOSVersion -USE_PYTHON3=${USE_PYTHON3:-True} -prepare_ceph -GLANCE_API_CONF=${GLANCE_API_CONF:-/etc/glance/glance-api.conf} -configure_and_start_glance - -configure_and_start_nova -run_tempest "Ceph nova&glance test" "^.*test_live_migration(?!.*(test_volume_backed_live_migration))" - -set +e -#echo '5. test with Ceph for volumes and root + ephemeral disk' - -#configure_and_start_cinder -#run_tempest "Ceph nova&glance&cinder test" "live_migration" diff --git a/gate/live_migration/hooks/utils.sh b/gate/live_migration/hooks/utils.sh deleted file mode 100755 index 9f98ca2e25..0000000000 --- a/gate/live_migration/hooks/utils.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -function run_tempest { - local message=$1 - local tempest_regex=$2 - sudo -H -u tempest tox -eall -- $tempest_regex --concurrency=$TEMPEST_CONCURRENCY - exitcode=$? - if [[ $exitcode -ne 0 ]]; then - die $LINENO "$message failure" - fi -} diff --git a/nova/compute/manager.py b/nova/compute/manager.py index cc4f8a58bf..5421bc62e5 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1627,7 +1627,11 @@ class ComputeManager(manager.Manager): return [_decode(f) for f in injected_files] def _validate_instance_group_policy(self, context, instance, - scheduler_hints): + scheduler_hints=None): + + if CONF.workarounds.disable_group_policy_check_upcall: + return + # NOTE(russellb) Instance group policy is enforced by the scheduler. # However, there is a race condition with the enforcement of # the policy. Since more than one instance may be scheduled at the @@ -1636,29 +1640,63 @@ class ComputeManager(manager.Manager): # multiple instances with an affinity policy could end up on different # hosts. This is a validation step to make sure that starting the # instance here doesn't violate the policy. - group_hint = scheduler_hints.get('group') - if not group_hint: - return - - # The RequestSpec stores scheduler_hints as key=list pairs so we need - # to check the type on the value and pull the single entry out. The - # API request schema validates that the 'group' hint is a single value. - if isinstance(group_hint, list): - group_hint = group_hint[0] + if scheduler_hints is not None: + # only go through here if scheduler_hints is provided, even if it + # is empty. + group_hint = scheduler_hints.get('group') + if not group_hint: + return + else: + # The RequestSpec stores scheduler_hints as key=list pairs so + # we need to check the type on the value and pull the single + # entry out. The API request schema validates that + # the 'group' hint is a single value. + if isinstance(group_hint, list): + group_hint = group_hint[0] + + group = objects.InstanceGroup.get_by_hint(context, group_hint) + else: + # TODO(ganso): a call to DB can be saved by adding request_spec + # to rpcapi payload of live_migration, pre_live_migration and + # check_can_live_migrate_destination + try: + group = objects.InstanceGroup.get_by_instance_uuid( + context, instance.uuid) + except exception.InstanceGroupNotFound: + return - @utils.synchronized(group_hint) - def _do_validation(context, instance, group_hint): - group = objects.InstanceGroup.get_by_hint(context, group_hint) + @utils.synchronized(group['uuid']) + def _do_validation(context, instance, group): if group.policy and 'anti-affinity' == group.policy: + + # instances on host instances_uuids = objects.InstanceList.get_uuids_by_host( context, self.host) ins_on_host = set(instances_uuids) + + # instance param is just for logging, the nodename obtained is + # not actually related to the instance at all + nodename = self._get_nodename(instance) + + # instances being migrated to host + migrations = ( + objects.MigrationList.get_in_progress_by_host_and_node( + context, self.host, nodename)) + migration_vm_uuids = set([mig['instance_uuid'] + for mig in migrations]) + + total_instances = migration_vm_uuids | ins_on_host + + # refresh group to get updated members within locked block + group = objects.InstanceGroup.get_by_uuid(context, + group['uuid']) members = set(group.members) # Determine the set of instance group members on this host # which are not the instance in question. This is used to # determine how many other members from the same anti-affinity # group can be on this host. - members_on_host = ins_on_host & members - set([instance.uuid]) + members_on_host = (total_instances & members - + set([instance.uuid])) rules = group.rules if rules and 'max_server_per_host' in rules: max_server = rules['max_server_per_host'] @@ -1670,6 +1708,12 @@ class ComputeManager(manager.Manager): raise exception.RescheduledException( instance_uuid=instance.uuid, reason=msg) + + # NOTE(ganso): The check for affinity below does not work and it + # can easily be violated because the lock happens in different + # compute hosts. + # The only fix seems to be a DB lock to perform the check whenever + # setting the host field to an instance. elif group.policy and 'affinity' == group.policy: group_hosts = group.get_hosts(exclude=[instance.uuid]) if group_hosts and self.host not in group_hosts: @@ -1678,8 +1722,7 @@ class ComputeManager(manager.Manager): instance_uuid=instance.uuid, reason=msg) - if not CONF.workarounds.disable_group_policy_check_upcall: - _do_validation(context, instance, group_hint) + _do_validation(context, instance, group) def _log_original_error(self, exc_info, instance_uuid): LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid, @@ -5220,10 +5263,24 @@ class ComputeManager(manager.Manager): with self._error_out_instance_on_exception( context, instance, instance_state=instance_state),\ errors_out_migration_ctxt(migration): + self._send_prep_resize_notifications( context, instance, fields.NotificationPhase.START, instance_type) try: + scheduler_hints = self._get_scheduler_hints(filter_properties, + request_spec) + # Error out if this host cannot accept the new instance due + # to anti-affinity. At this point the migration is already + # in-progress, so this is the definitive moment to abort due to + # the policy violation. Also, exploding here is covered by the + # cleanup methods in except block. + try: + self._validate_instance_group_policy(context, instance, + scheduler_hints) + except exception.RescheduledException as e: + raise exception.InstanceFaultRollback(inner_exception=e) + self._prep_resize(context, image, instance, instance_type, filter_properties, node, migration, request_spec, @@ -7694,6 +7751,20 @@ class ComputeManager(manager.Manager): :param limits: objects.SchedulerLimits object for this live migration. :returns: a LiveMigrateData object (hypervisor-dependent) """ + + # Error out if this host cannot accept the new instance due + # to anti-affinity. This check at this moment is not very accurate, as + # multiple requests may be happening concurrently and miss the lock, + # but when it works it provides a better user experience by failing + # earlier. Also, it should be safe to explode here, error becomes + # NoValidHost and instance status remains ACTIVE. + try: + self._validate_instance_group_policy(ctxt, instance) + except exception.RescheduledException as e: + msg = ("Failed to validate instance group policy " + "due to: {}".format(e)) + raise exception.MigrationPreCheckError(reason=msg) + src_compute_info = obj_base.obj_to_primitive( self._get_compute_info(ctxt, instance.host)) dst_compute_info = obj_base.obj_to_primitive( @@ -7836,6 +7907,13 @@ class ComputeManager(manager.Manager): """ LOG.debug('pre_live_migration data is %s', migrate_data) + # Error out if this host cannot accept the new instance due + # to anti-affinity. At this point the migration is already in-progress, + # so this is the definitive moment to abort due to the policy + # violation. Also, it should be safe to explode here. The instance + # status remains ACTIVE, migration status failed. + self._validate_instance_group_policy(context, instance) + migrate_data.old_vol_attachment_ids = {} bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) diff --git a/nova/conf/pci.py b/nova/conf/pci.py index b812b39676..b383d0a69f 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -116,7 +116,13 @@ Possible values: ``address`` PCI address of the device. Both traditional glob style and regular - expression syntax is supported. + expression syntax is supported. Please note that the address fields are + restricted to the following maximum values: + + * domain - 0xFFFF + * bus - 0xFF + * slot - 0x1F + * function - 0x7 ``devname`` Device name of the device (for e.g. interface name). Not all PCI devices diff --git a/nova/console/websocketproxy.py b/nova/console/websocketproxy.py index b8e9e99c38..d55ed7f538 100644 --- a/nova/console/websocketproxy.py +++ b/nova/console/websocketproxy.py @@ -19,6 +19,8 @@ Leverages websockify.py by Joel Martin ''' import copy +from http import HTTPStatus +import os import socket import sys @@ -288,6 +290,27 @@ class NovaProxyRequestHandler(websockify.ProxyRequestHandler): def socket(self, *args, **kwargs): return websockifyserver.WebSockifyServer.socket(*args, **kwargs) + def send_head(self): + # This code is copied from this example patch: + # https://bugs.python.org/issue32084#msg306545 + path = self.translate_path(self.path) + if os.path.isdir(path): + parts = urlparse.urlsplit(self.path) + if not parts.path.endswith('/'): + # redirect browser - doing basically what apache does + new_parts = (parts[0], parts[1], parts[2] + '/', + parts[3], parts[4]) + new_url = urlparse.urlunsplit(new_parts) + + # Browsers interpret "Location: //uri" as an absolute URI + # like "http://URI" + if new_url.startswith('//'): + self.send_error(HTTPStatus.BAD_REQUEST, + "URI must not start with //") + return None + + return super(NovaProxyRequestHandler, self).send_head() + class NovaWebSocketProxy(websockify.WebSocketProxy): def __init__(self, *args, **kwargs): diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 4c882e7f96..f3d763044e 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -820,9 +820,15 @@ class API(base.Base): # TODO(arosen) Should optimize more to do direct query for security # group if len(security_groups) == 1 if len(security_groups): + # NOTE(slaweq): fields other than name and id aren't really needed + # so asking only about those fields will allow Neutron to not + # prepare list of rules for each found security group. That may + # speed processing of this request a lot in case when tenant has + # got many security groups + sg_fields = ['id', 'name'] search_opts = {'tenant_id': instance.project_id} user_security_groups = neutron.list_security_groups( - **search_opts).get('security_groups') + fields=sg_fields, **search_opts).get('security_groups') for security_group in security_groups: name_match = None diff --git a/nova/pci/manager.py b/nova/pci/manager.py index 05930b0beb..17792dded4 100644 --- a/nova/pci/manager.py +++ b/nova/pci/manager.py @@ -19,6 +19,7 @@ import collections from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import jsonutils +import six from nova import exception from nova import objects @@ -117,8 +118,42 @@ class PciDevTracker(object): devices = [] for dev in jsonutils.loads(devices_json): - if self.dev_filter.device_assignable(dev): - devices.append(dev) + try: + if self.dev_filter.device_assignable(dev): + devices.append(dev) + except exception.PciConfigInvalidWhitelist as e: + # The raised exception is misleading as the problem is not with + # the whitelist config but with the host PCI device reported by + # libvirt. The code that matches the host PCI device to the + # withelist spec reuses the WhitelistPciAddress object to parse + # the host PCI device address. That parsing can fail if the + # PCI address has a 32 bit domain. But this should not prevent + # processing the rest of the devices. So we simply skip this + # device and continue. + # Please note that this except block does not ignore the + # invalid whitelist configuration. The whitelist config has + # already been parsed or rejected in case it was invalid. At + # this point the self.dev_filter representes the parsed and + # validated whitelist config. + LOG.debug( + 'Skipping PCI device %s reported by the hypervisor: %s', + {k: v for k, v in dev.items() + if k in ['address', 'parent_addr']}, + # NOTE(gibi): this is ugly but the device_assignable() call + # uses the PhysicalPciAddress class to parse the PCI + # addresses and that class reuses the code from + # PciAddressSpec that was originally designed to parse + # whitelist spec. Hence the raised exception talks about + # whitelist config. This is misleading as in our case the + # PCI address that we failed to parse came from the + # hypervisor. + # TODO(gibi): refactor the false abstraction to make the + # code reuse clean from the false assumption that we only + # parse whitelist config with + # devspec.PciAddressSpec._set_pci_dev_info() + six.text_type(e).replace( + 'Invalid PCI devices Whitelist config:', 'The')) + self._set_hvdevs(devices) @staticmethod diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 49d081c301..7d777f7725 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -3254,12 +3254,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, CONF.host, instance.uuid, graceful_exit=False) return result + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_check_can_live_migrate_destination_success(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', lambda *args: True)) self._test_check_can_live_migrate_destination() + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_check_can_live_migrate_destination_fail(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', @@ -3269,7 +3273,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self._test_check_can_live_migrate_destination, do_raise=True) - def test_check_can_live_migrate_destination_contins_vifs(self): + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) + def test_check_can_live_migrate_destination_contains_vifs(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', lambda *args: True)) @@ -3277,6 +3283,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.assertIn('vifs', migrate_data) self.assertIsNotNone(migrate_data.vifs) + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_check_can_live_migrate_destination_no_binding_extended(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', @@ -3284,18 +3292,40 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, migrate_data = self._test_check_can_live_migrate_destination() self.assertNotIn('vifs', migrate_data) + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_check_can_live_migrate_destination_src_numa_lm_false(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', lambda *args: True)) self._test_check_can_live_migrate_destination(src_numa_lm=False) + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_check_can_live_migrate_destination_src_numa_lm_true(self): self.useFixture(std_fixtures.MonkeyPatch( 'nova.network.neutron.API.supports_port_binding_extension', lambda *args: True)) self._test_check_can_live_migrate_destination(src_numa_lm=True) + @mock.patch.object(compute_utils, 'add_instance_fault_from_exc') + def test_check_can_live_migrate_destination_fail_group_policy( + self, mock_fail_db): + + instance = fake_instance.fake_instance_obj( + self.context, host=self.compute.host, vm_state=vm_states.ACTIVE, + node='fake-node') + + ex = exception.RescheduledException( + instance_uuid=instance.uuid, reason="policy violated") + + with mock.patch.object(self.compute, '_validate_instance_group_policy', + side_effect=ex): + self.assertRaises( + exception.MigrationPreCheckError, + self.compute.check_can_live_migrate_destination, + self.context, instance, None, None, None, None) + def test_dest_can_numa_live_migrate(self): positive_dest_check_data = objects.LibvirtLiveMigrateData( dst_supports_numa_live_migration=True) @@ -7347,7 +7377,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): def test_validate_policy_honors_workaround_disabled(self, mock_get): instance = objects.Instance(uuid=uuids.instance) hints = {'group': 'foo'} - mock_get.return_value = objects.InstanceGroup(policy=None) + mock_get.return_value = objects.InstanceGroup(policy=None, + uuid=uuids.group) self.compute._validate_instance_group_policy(self.context, instance, hints) mock_get.assert_called_once_with(self.context, 'foo') @@ -7373,10 +7404,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): instance, hints) mock_get.assert_called_once_with(self.context, uuids.group_hint) + @mock.patch('nova.objects.InstanceGroup.get_by_uuid') @mock.patch('nova.objects.InstanceList.get_uuids_by_host') @mock.patch('nova.objects.InstanceGroup.get_by_hint') - def test_validate_instance_group_policy_with_rules(self, mock_get_by_hint, - mock_get_by_host): + @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes') + @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') + def test_validate_instance_group_policy_with_rules( + self, migration_list, nodes, mock_get_by_hint, mock_get_by_host, + mock_get_by_uuid): # Create 2 instance in same host, inst2 created before inst1 instance = objects.Instance(uuid=uuids.inst1) hints = {'group': [uuids.group_hint]} @@ -7385,17 +7420,26 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock_get_by_host.return_value = existing_insts # if group policy rules limit to 1, raise RescheduledException - mock_get_by_hint.return_value = objects.InstanceGroup( + group = objects.InstanceGroup( policy='anti-affinity', rules={'max_server_per_host': '1'}, - hosts=['host1'], members=members_uuids) + hosts=['host1'], members=members_uuids, + uuid=uuids.group) + mock_get_by_hint.return_value = group + mock_get_by_uuid.return_value = group + nodes.return_value = ['nodename'] + migration_list.return_value = [objects.Migration( + uuid=uuids.migration, instance_uuid=uuids.instance)] self.assertRaises(exception.RescheduledException, self.compute._validate_instance_group_policy, self.context, instance, hints) # if group policy rules limit change to 2, validate OK - mock_get_by_hint.return_value = objects.InstanceGroup( + group2 = objects.InstanceGroup( policy='anti-affinity', rules={'max_server_per_host': 2}, - hosts=['host1'], members=members_uuids) + hosts=['host1'], members=members_uuids, + uuid=uuids.group) + mock_get_by_hint.return_value = group2 + mock_get_by_uuid.return_value = group2 self.compute._validate_instance_group_policy(self.context, instance, hints) @@ -8925,6 +8969,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, manager.ComputeManager() mock_executor.assert_called_once_with() + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_pre_live_migration_cinder_v3_api(self): # This tests that pre_live_migration with a bdm with an # attachment_id, will create a new attachment and update @@ -9002,6 +9048,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, _test() + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_pre_live_migration_exception_cinder_v3_api(self): # The instance in this test has 2 attachments. The second attach_create # will throw an exception. This will test that the first attachment @@ -9071,6 +9119,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, self.assertGreater(len(m.mock_calls), 0) _test() + @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( + side_effect=exception.InstanceGroupNotFound(group_uuid=''))) def test_pre_live_migration_exceptions_delete_attachments(self): # The instance in this test has 2 attachments. The call to # driver.pre_live_migration will raise an exception. This will test @@ -10464,6 +10514,54 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, # (_error_out_instance_on_exception will set to ACTIVE by default). self.assertEqual(vm_states.STOPPED, instance.vm_state) + @mock.patch('nova.compute.utils.notify_usage_exists') + @mock.patch('nova.compute.manager.ComputeManager.' + '_notify_about_instance_usage') + @mock.patch('nova.compute.utils.notify_about_resize_prep_instance') + @mock.patch('nova.objects.Instance.save') + @mock.patch('nova.compute.manager.ComputeManager._revert_allocation') + @mock.patch('nova.compute.manager.ComputeManager.' + '_reschedule_resize_or_reraise') + @mock.patch('nova.compute.utils.add_instance_fault_from_exc') + # this is almost copy-paste from test_prep_resize_fails_rollback + def test_prep_resize_fails_group_validation( + self, add_instance_fault_from_exc, _reschedule_resize_or_reraise, + _revert_allocation, mock_instance_save, + notify_about_resize_prep_instance, _notify_about_instance_usage, + notify_usage_exists): + """Tests that if _validate_instance_group_policy raises + InstanceFaultRollback, the instance.vm_state is reset properly in + _error_out_instance_on_exception + """ + instance = fake_instance.fake_instance_obj( + self.context, host=self.compute.host, vm_state=vm_states.STOPPED, + node='fake-node', expected_attrs=['system_metadata', 'flavor']) + migration = mock.MagicMock(spec='nova.objects.Migration') + request_spec = mock.MagicMock(spec='nova.objects.RequestSpec') + ex = exception.RescheduledException( + instance_uuid=instance.uuid, reason="policy violated") + ex2 = exception.InstanceFaultRollback( + inner_exception=ex) + + def fake_reschedule_resize_or_reraise(*args, **kwargs): + raise ex2 + + _reschedule_resize_or_reraise.side_effect = ( + fake_reschedule_resize_or_reraise) + + with mock.patch.object(self.compute, '_validate_instance_group_policy', + side_effect=ex): + self.assertRaises( + # _error_out_instance_on_exception should reraise the + # RescheduledException inside InstanceFaultRollback. + exception.RescheduledException, self.compute.prep_resize, + self.context, instance.image_meta, instance, instance.flavor, + request_spec, filter_properties={}, node=instance.node, + clean_shutdown=True, migration=migration, host_list=[]) + # The instance.vm_state should remain unchanged + # (_error_out_instance_on_exception will set to ACTIVE by default). + self.assertEqual(vm_states.STOPPED, instance.vm_state) + @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance') @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim') @mock.patch('nova.objects.Instance.save') diff --git a/nova/tests/unit/console/test_websocketproxy.py b/nova/tests/unit/console/test_websocketproxy.py index 3c234df891..4ed2d2d4dc 100644 --- a/nova/tests/unit/console/test_websocketproxy.py +++ b/nova/tests/unit/console/test_websocketproxy.py @@ -626,6 +626,40 @@ class NovaProxyRequestHandlerTestCase(test.NoDBTestCase): self.wh.server.top_new_client(conn, address) self.assertIsNone(self.wh._compute_rpcapi) + def test_reject_open_redirect(self): + # This will test the behavior when an attempt is made to cause an open + # redirect. It should be rejected. + mock_req = mock.MagicMock() + mock_req.makefile().readline.side_effect = [ + b'GET //example.com/%2F.. HTTP/1.1\r\n', + b'' + ] + + # Collect the response data to verify at the end. The + # SimpleHTTPRequestHandler writes the response data by calling the + # request socket sendall() method. + self.data = b'' + + def fake_sendall(data): + self.data += data + + mock_req.sendall.side_effect = fake_sendall + + client_addr = ('8.8.8.8', 54321) + mock_server = mock.MagicMock() + # This specifies that the server will be able to handle requests other + # than only websockets. + mock_server.only_upgrade = False + + # Constructing a handler will process the mock_req request passed in. + websocketproxy.NovaProxyRequestHandler( + mock_req, client_addr, mock_server) + + # Verify no redirect happens and instead a 400 Bad Request is returned. + self.data = self.data.decode() + self.assertIn('Error code: 400', self.data) + self.assertIn('Message: URI must not start with //', self.data) + @mock.patch('websockify.websocketproxy.select_ssl_version') def test_ssl_min_version_is_not_set(self, mock_select_ssl): websocketproxy.NovaWebSocketProxy() diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py index fe7d918e27..c1b26d9726 100644 --- a/nova/tests/unit/pci/test_manager.py +++ b/nova/tests/unit/pci/test_manager.py @@ -236,6 +236,40 @@ class PciDevTrackerTestCase(test.NoDBTestCase): tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json) self.assertEqual(2, len(tracker.pci_devs)) + @mock.patch("nova.pci.manager.LOG.debug") + def test_update_devices_from_hypervisor_resources_32bit_domain( + self, mock_debug): + self.flags( + group='pci', + passthrough_whitelist=[ + '{"product_id":"2032", "vendor_id":"8086"}']) + # There are systems where 32 bit PCI domain is used. See bug 1897528 + # for example. While nova (and qemu) does not support assigning such + # devices but the existence of such device in the system should not + # lead to an error. + fake_pci = { + 'compute_node_id': 1, + 'address': '10000:00:02.0', + 'product_id': '2032', + 'vendor_id': '8086', + 'request_id': None, + 'status': fields.PciDeviceStatus.AVAILABLE, + 'dev_type': fields.PciDeviceType.STANDARD, + 'parent_addr': None, + 'numa_node': 0} + + fake_pci_devs = [fake_pci] + fake_pci_devs_json = jsonutils.dumps(fake_pci_devs) + tracker = manager.PciDevTracker(self.fake_context) + # We expect that the device with 32bit PCI domain is ignored + tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json) + self.assertEqual(0, len(tracker.pci_devs)) + mock_debug.assert_called_once_with( + 'Skipping PCI device %s reported by the hypervisor: %s', + {'address': '10000:00:02.0', 'parent_addr': None}, + 'The property domain (10000) is greater than the maximum ' + 'allowable value (FFFF).') + def test_set_hvdev_new_dev(self): fake_pci_3 = dict(fake_pci, address='0000:00:00.4', vendor_id='v2') fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_1), diff --git a/nova/tests/unit/virt/disk/vfs/fakeguestfs.py b/nova/tests/unit/virt/disk/vfs/fakeguestfs.py index 96c97edf79..168400e956 100644 --- a/nova/tests/unit/virt/disk/vfs/fakeguestfs.py +++ b/nova/tests/unit/virt/disk/vfs/fakeguestfs.py @@ -109,7 +109,7 @@ class GuestFS(object): "mode": 0o700 } - return self.files[path]["content"] + return bytes(self.files[path]["content"].encode()) def write(self, path, content): if path not in self.files: diff --git a/nova/virt/disk/api.py b/nova/virt/disk/api.py index c79ae77371..83d3f13ed2 100644 --- a/nova/virt/disk/api.py +++ b/nova/virt/disk/api.py @@ -615,8 +615,8 @@ def _set_passwd(username, admin_passwd, passwd_data, shadow_data): :param username: the username :param admin_passwd: the admin password - :param passwd_data: path to the passwd file - :param shadow_data: path to the shadow password file + :param passwd_data: Data from the passwd file decoded as a string + :param shadow_data: Data from the shadow file decoded as a string :returns: nothing :raises: exception.NovaException(), IOError() diff --git a/nova/virt/disk/vfs/guestfs.py b/nova/virt/disk/vfs/guestfs.py index db260d9a4a..ce5f48794a 100644 --- a/nova/virt/disk/vfs/guestfs.py +++ b/nova/virt/disk/vfs/guestfs.py @@ -308,7 +308,14 @@ class VFSGuestFS(vfs.VFS): def read_file(self, path): LOG.debug("Read file path=%s", path) path = self._canonicalize_path(path) - return self.handle.read_file(path) + data = self.handle.read_file(path) + # NOTE(lyarwood): libguestfs v1.41.1 (0ee02e0117527) switched the + # return type of read_file from string to bytes and as such we need to + # handle both here, decoding and returning a string if bytes is + # provided. https://bugzilla.redhat.com/show_bug.cgi?id=1661871 + if isinstance(data, bytes): + return data.decode() + return data def has_file(self, path): LOG.debug("Has file path=%s", path) diff --git a/playbooks/legacy/nova-grenade-multinode/post.yaml b/playbooks/legacy/nova-grenade-multinode/post.yaml deleted file mode 100644 index e07f5510ae..0000000000 --- a/playbooks/legacy/nova-grenade-multinode/post.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- hosts: primary - tasks: - - - name: Copy files from {{ ansible_user_dir }}/workspace/ on node - synchronize: - src: '{{ ansible_user_dir }}/workspace/' - dest: '{{ zuul.executor.log_root }}' - mode: pull - copy_links: true - verify_host: true - rsync_opts: - - --include=/logs/** - - --include=*/ - - --exclude=* - - --prune-empty-dirs diff --git a/playbooks/legacy/nova-grenade-multinode/run.yaml b/playbooks/legacy/nova-grenade-multinode/run.yaml deleted file mode 100644 index 18f7c753eb..0000000000 --- a/playbooks/legacy/nova-grenade-multinode/run.yaml +++ /dev/null @@ -1,65 +0,0 @@ -- hosts: primary - name: nova-grenade-multinode - tasks: - - - name: Ensure legacy workspace directory - file: - path: '{{ ansible_user_dir }}/workspace' - state: directory - - - shell: - cmd: | - set -e - set -x - cat > clonemap.yaml << EOF - clonemap: - - name: openstack/devstack-gate - dest: devstack-gate - EOF - /usr/zuul-env/bin/zuul-cloner -m clonemap.yaml --cache-dir /opt/git \ - https://opendev.org \ - openstack/devstack-gate - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' - - - shell: - cmd: | - set -e - set -x - export PROJECTS="openstack/grenade $PROJECTS" - export PYTHONUNBUFFERED=true - export DEVSTACK_GATE_CONFIGDRIVE=0 - export DEVSTACK_GATE_NEUTRON=1 - # NOTE(mriedem): Run tempest smoke tests specific to compute on the - # new side of the grenade environment. The post-test hook script will - # run non-smoke migration tests in a local/block and shared/ceph - # setup. Note that grenade hard-codes "tox -esmoke" for tempest on - # the old side so the regex is not appied there. - export DEVSTACK_GATE_TEMPEST=1 - export DEVSTACK_GATE_TEMPEST_REGEX="tempest\.(api\.compute|scenario)\..*smoke.*" - export DEVSTACK_GATE_GRENADE=pullup - export DEVSTACK_GATE_USE_PYTHON3=True - # By default grenade runs only smoke tests so we need to set - # RUN_SMOKE to False in order to run live migration tests using - # grenade - export DEVSTACK_LOCAL_CONFIG="RUN_SMOKE=False" - # LIVE_MIGRATE_BACK_AND_FORTH will tell Tempest to run a live - # migration of the same instance to one compute node and then back - # to the other, which is mostly only interesting for grenade since - # we have mixed level computes. - export DEVSTACK_LOCAL_CONFIG+=$'\n'"LIVE_MIGRATE_BACK_AND_FORTH=True" - export BRANCH_OVERRIDE=default - export DEVSTACK_GATE_TOPOLOGY="multinode" - if [ "$BRANCH_OVERRIDE" != "default" ] ; then - export OVERRIDE_ZUUL_BRANCH=$BRANCH_OVERRIDE - fi - function post_test_hook { - /opt/stack/new/nova/gate/live_migration/hooks/run_tests.sh - } - export -f post_test_hook - cp devstack-gate/devstack-vm-gate-wrap.sh ./safe-devstack-vm-gate-wrap.sh - ./safe-devstack-vm-gate-wrap.sh - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' diff --git a/playbooks/legacy/nova-live-migration/post.yaml b/playbooks/legacy/nova-live-migration/post.yaml deleted file mode 100644 index e07f5510ae..0000000000 --- a/playbooks/legacy/nova-live-migration/post.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- hosts: primary - tasks: - - - name: Copy files from {{ ansible_user_dir }}/workspace/ on node - synchronize: - src: '{{ ansible_user_dir }}/workspace/' - dest: '{{ zuul.executor.log_root }}' - mode: pull - copy_links: true - verify_host: true - rsync_opts: - - --include=/logs/** - - --include=*/ - - --exclude=* - - --prune-empty-dirs diff --git a/playbooks/legacy/nova-live-migration/run.yaml b/playbooks/legacy/nova-live-migration/run.yaml deleted file mode 100644 index ef8853135b..0000000000 --- a/playbooks/legacy/nova-live-migration/run.yaml +++ /dev/null @@ -1,60 +0,0 @@ -- hosts: primary - name: nova-live-migration - tasks: - - - name: Ensure legacy workspace directory - file: - path: '{{ ansible_user_dir }}/workspace' - state: directory - - - shell: - cmd: | - set -e - set -x - cat > clonemap.yaml << EOF - clonemap: - - name: openstack/devstack-gate - dest: devstack-gate - EOF - /usr/zuul-env/bin/zuul-cloner -m clonemap.yaml --cache-dir /opt/git \ - https://opendev.org \ - openstack/devstack-gate - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' - - - name: Configure devstack - shell: - # Force config drive. - cmd: | - set -e - set -x - cat << 'EOF' >>"/tmp/dg-local.conf" - [[local|localrc]] - FORCE_CONFIG_DRIVE=True - - EOF - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' - - - shell: - cmd: | - set -e - set -x - export PYTHONUNBUFFERED=true - export DEVSTACK_GATE_CONFIGDRIVE=0 - export DEVSTACK_GATE_TEMPEST=1 - export DEVSTACK_GATE_TEMPEST_NOTESTS=1 - export DEVSTACK_GATE_TOPOLOGY="multinode" - export DEVSTACK_GATE_USE_PYTHON3=True - function post_test_hook { - /opt/stack/new/nova/gate/live_migration/hooks/run_tests.sh - $BASE/new/nova/gate/test_evacuate.sh - } - export -f post_test_hook - cp devstack-gate/devstack-vm-gate-wrap.sh ./safe-devstack-vm-gate-wrap.sh - ./safe-devstack-vm-gate-wrap.sh - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' diff --git a/playbooks/nova-evacuate/run.yaml b/playbooks/nova-evacuate/run.yaml new file mode 100644 index 0000000000..35e330a6de --- /dev/null +++ b/playbooks/nova-evacuate/run.yaml @@ -0,0 +1,8 @@ +--- +- hosts: all + roles: + - orchestrate-devstack + +- hosts: controller + roles: + - run-evacuate-hook diff --git a/playbooks/nova-live-migration/post-run.yaml b/playbooks/nova-live-migration/post-run.yaml new file mode 100644 index 0000000000..845a1b15b2 --- /dev/null +++ b/playbooks/nova-live-migration/post-run.yaml @@ -0,0 +1,10 @@ +--- +- hosts: tempest + become: true + roles: + - role: fetch-subunit-output + zuul_work_dir: '{{ devstack_base_dir }}/tempest' + - role: process-stackviz +- hosts: controller + roles: + - run-evacuate-hook diff --git a/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml b/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml new file mode 100644 index 0000000000..4c6135311b --- /dev/null +++ b/releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Improved detection of anti-affinity policy violation when performing live + and cold migrations. Most of the violations caused by race conditions due + to performing concurrent live or cold migrations should now be addressed + by extra checks in the compute service. Upon detection, cold migration + operations are automatically rescheduled, while live migrations have two + checks and will be rescheduled if detected by the first one, otherwise the + live migration will fail cleanly and revert the instance state back to its + previous value. diff --git a/releasenotes/notes/console-proxy-reject-open-redirect-4ac0a7895acca7eb.yaml b/releasenotes/notes/console-proxy-reject-open-redirect-4ac0a7895acca7eb.yaml new file mode 100644 index 0000000000..ce05b9a867 --- /dev/null +++ b/releasenotes/notes/console-proxy-reject-open-redirect-4ac0a7895acca7eb.yaml @@ -0,0 +1,19 @@ +--- +security: + - | + A vulnerability in the console proxies (novnc, serial, spice) that allowed + open redirection has been `patched`_. The novnc, serial, and spice console + proxies are implemented as websockify servers and the request handler + inherits from the python standard SimpleHTTPRequestHandler. There is a + `known issue`_ in the SimpleHTTPRequestHandler which allows open redirects + by way of URLs in the following format:: + + http://vncproxy.my.domain.com//example.com/%2F.. + + which if visited, will redirect a user to example.com. + + The novnc, serial, and spice console proxies will now reject requests that + pass a redirection URL beginning with "//" with a 400 Bad Request. + + .. _patched: https://bugs.launchpad.net/nova/+bug/1927677 + .. _known issue: https://bugs.python.org/issue32084 diff --git a/roles/run-evacuate-hook/README.rst b/roles/run-evacuate-hook/README.rst new file mode 100644 index 0000000000..e423455aee --- /dev/null +++ b/roles/run-evacuate-hook/README.rst @@ -0,0 +1 @@ +Run Nova evacuation tests against a multinode environment. diff --git a/roles/run-evacuate-hook/files/setup_evacuate_resources.sh b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh new file mode 100755 index 0000000000..c8c385d7ff --- /dev/null +++ b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh @@ -0,0 +1,34 @@ +#!/bin/bash +source /opt/stack/devstack/openrc admin +set -x +set -e + +image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}') +flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}') +network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}') + +echo "Creating ephemeral test server on subnode" +openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \ +--nic net-id=${network_id} --host $SUBNODE_HOSTNAME --wait evacuate-test + +# TODO(lyarwood) Use osc to launch the bfv volume +echo "Creating boot from volume test server on subnode" +nova --os-compute-api-version 2.74 boot --flavor ${flavor_id} --poll \ +--block-device id=${image_id},source=image,dest=volume,size=1,bootindex=0,shutdown=remove \ +--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} evacuate-bfv-test + +echo "Forcing down the subnode so we can evacuate from it" +openstack --os-compute-api-version 2.11 compute service set --down ${SUBNODE_HOSTNAME} nova-compute + +count=0 +status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State) +while [ "${status}" != "down" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for subnode compute service to be marked as down" + exit 5 + fi + status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State) +done diff --git a/roles/run-evacuate-hook/files/test_evacuate.sh b/roles/run-evacuate-hook/files/test_evacuate.sh new file mode 100755 index 0000000000..bdf8d92441 --- /dev/null +++ b/roles/run-evacuate-hook/files/test_evacuate.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Source tempest to determine the build timeout configuration. +source /opt/stack/devstack/lib/tempest +source /opt/stack/devstack/openrc admin +set -x +set -e + +# Wait for the controller compute service to be enabled. +count=0 +status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status) +while [ "${status}" != "enabled" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for controller compute service to be enabled" + exit 5 + fi + status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status) +done + +function evacuate_and_wait_for_active() { + local server="$1" + + nova evacuate ${server} + # Wait for the instance to go into ACTIVE state from the evacuate. + count=0 + status=$(openstack server show ${server} -f value -c status) + while [ "${status}" != "ACTIVE" ] + do + sleep 1 + count=$((count+1)) + if [ ${count} -eq ${BUILD_TIMEOUT} ]; then + echo "Timed out waiting for server ${server} to go to ACTIVE status" + exit 6 + fi + status=$(openstack server show ${server} -f value -c status) + done +} + +evacuate_and_wait_for_active evacuate-test +evacuate_and_wait_for_active evacuate-bfv-test + +# Make sure the servers moved. +for server in evacuate-test evacuate-bfv-test; do + host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host) + if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then + echo "Unexpected host ${host} for server ${server} after evacuate." + exit 7 + fi +done + +# Cleanup test servers +openstack server delete --wait evacuate-test +openstack server delete --wait evacuate-bfv-test diff --git a/roles/run-evacuate-hook/files/test_negative_evacuate.sh b/roles/run-evacuate-hook/files/test_negative_evacuate.sh new file mode 100755 index 0000000000..b1f5f7a4af --- /dev/null +++ b/roles/run-evacuate-hook/files/test_negative_evacuate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Source tempest to determine the build timeout configuration. +source /opt/stack/devstack/lib/tempest +source /opt/stack/devstack/openrc admin +set -x +set -e + +# Now force the evacuation to the controller; we have to force to bypass the +# scheduler since we killed libvirtd which will trigger the libvirt compute +# driver to auto-disable the nova-compute service and then the ComputeFilter +# would filter out this host and we'd get NoValidHost. Normally forcing a host +# during evacuate and bypassing the scheduler is a very bad idea, but we're +# doing a negative test here. + +function evacuate_and_wait_for_error() { + local server="$1" + + echo "Forcing evacuate of ${server} to local host" + # TODO(mriedem): Use OSC when it supports evacuate. + nova --os-compute-api-version "2.67" evacuate --force ${server} ${CONTROLLER_HOSTNAME} + # Wait for the instance to go into ERROR state from the failed evacuate. + count=0 + status=$(openstack server show ${server} -f value -c status) + while [ "${status}" != "ERROR" ] + do + sleep 1 + count=$((count+1)) + if [ ${count} -eq ${BUILD_TIMEOUT} ]; then + echo "Timed out waiting for server ${server} to go to ERROR status" + exit 4 + fi + status=$(openstack server show ${server} -f value -c status) + done +} + +evacuate_and_wait_for_error evacuate-test +evacuate_and_wait_for_error evacuate-bfv-test diff --git a/roles/run-evacuate-hook/tasks/main.yaml b/roles/run-evacuate-hook/tasks/main.yaml new file mode 100644 index 0000000000..f6c80bcb6b --- /dev/null +++ b/roles/run-evacuate-hook/tasks/main.yaml @@ -0,0 +1,64 @@ +- name: Setup resources and mark the subnode as forced down + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/setup_evacuate_resources.sh" + environment: + SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}" + +- name: Fence subnode by stopping q-agt and n-cpu + delegate_to: compute1 + become: true + systemd: + name: "{{ item }}" + state: stopped + with_items: + - devstack@q-agt + - devstack@n-cpu + +- name: Register running domains on subnode + delegate_to: compute1 + become: true + virt: + command: list_vms + state: running + register: subnode_vms + +- name: Destroy running domains on subnode + delegate_to: compute1 + become: true + virt: + name: "{{ item }}" + state: destroyed + with_items: "{{ subnode_vms.list_vms }}" + +- name: Stop libvirtd on "{{ inventory_hostname }}" + become: true + systemd: + name: "{{ item }}" + state: stopped + enabled: no + with_items: + - libvirtd + +- name: Run negative evacuate tests + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_negative_evacuate.sh" + environment: + CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}" + +- name: Start libvirtd on "{{ inventory_hostname }}" + become: true + systemd: + name: "{{ item }}" + state: started + enabled: yes + with_items: + - libvirtd + +- name: Run evacuate tests + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_evacuate.sh" + environment: + CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}" diff --git a/tools/check-cherry-picks.sh b/tools/check-cherry-picks.sh index 5ca6ded203..5a449c520b 100755 --- a/tools/check-cherry-picks.sh +++ b/tools/check-cherry-picks.sh @@ -4,11 +4,6 @@ # to verify that they're all on either master or stable/ branches # -# Allow this script to be disabled by a simple env var -if [ ${DISABLE_CHERRY_PICK_CHECK:-0} -eq 1 ]; then - exit 0 -fi - commit_hash="" # Check if the patch is a merge patch by counting the number of parents. @@ -42,15 +42,12 @@ commands = description = Run style checks. envdir = {toxworkdir}/shared -passenv = - DISABLE_CHERRY_PICK_CHECK commands = bash tools/flake8wrap.sh {posargs} # Check that all JSON files don't have \r\n in line. bash -c "! find doc/ -type f -name *.json | xargs grep -U -n $'\r'" # Check that all included JSON files are valid JSON bash -c '! find doc/ -type f -name *.json | xargs -t -n1 python -m json.tool 2>&1 > /dev/null | grep -B1 -v ^python' - bash tools/check-cherry-picks.sh [testenv:fast8] description = @@ -59,6 +56,15 @@ envdir = {toxworkdir}/shared commands = bash tools/flake8wrap.sh -HEAD +[testenv:validate-backport] +description = + Determine whether a backport is ready to be merged by checking whether it has + already been merged to master or more recent stable branches. +deps = +skipsdist = true +commands = + bash tools/check-cherry-picks.sh + [testenv:functional] description = Run functional tests using python3. |