diff options
author | Zuul <zuul@review.opendev.org> | 2021-06-25 19:30:28 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2021-06-25 19:30:28 +0000 |
commit | e6d6284563a9b60eeaf4fdffdede3fc0966869ef (patch) | |
tree | 143b679920c02cd7469d6b597dfc6f731bdb85b9 | |
parent | 7acb9fc562b104c0e2d443e2efd6ab364dfffc8f (diff) | |
parent | 794bedf00e6a3dcdf89f07ae3f63deee09138a9a (diff) | |
download | nova-e6d6284563a9b60eeaf4fdffdede3fc0966869ef.tar.gz |
Merge "Add a workaround config toggle to refuse ceph image upload" into stable/train
-rw-r--r-- | nova/conf/workarounds.py | 24 | ||||
-rw-r--r-- | nova/tests/unit/virt/libvirt/test_driver.py | 46 | ||||
-rw-r--r-- | nova/virt/libvirt/driver.py | 15 | ||||
-rw-r--r-- | releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml | 19 |
4 files changed, 104 insertions, 0 deletions
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index c9ed0f2e4b..20e4b5c9d6 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -266,6 +266,30 @@ Related options: * :oslo.config:option:`image_cache_subdirectory_name` * :oslo.config:option:`update_resources_interval` """), + cfg.BoolOpt( + 'never_download_image_if_on_rbd', + default=False, + help=""" +When booting from an image on a ceph-backed compute node, if the image does not +already reside on the ceph cluster (as would be the case if glance is +also using the same cluster), nova will download the image from glance and +upload it to ceph itself. If using multiple ceph clusters, this may cause nova +to unintentionally duplicate the image in a non-COW-able way in the local +ceph deployment, wasting space. + +For more information, refer to the bug report: + +https://bugs.launchpad.net/nova/+bug/1858877 + +Enabling this option will cause nova to *refuse* to boot an instance if it +would require downloading the image from glance and uploading it to ceph +itself. + +Related options: + +* ``compute_driver`` (libvirt) +* ``[libvirt]/images_type`` (rbd) +"""), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index d6e241c8e6..a95640c750 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -21884,6 +21884,52 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): None) self.assertFalse(mock_inject.called) + @mock.patch('nova.virt.libvirt.utils.fetch_image') + @mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver') + @mock.patch.object(imagebackend, 'IMAGE_API') + def test_create_fetch_image_ceph_workaround(self, mock_image, mock_rbd, + mock_fetch): + # Make sure that rbd clone will fail as un-clone-able + mock_rbd.is_cloneable.return_value = False + # Make sure the rbd code thinks the image does not already exist + mock_rbd.return_value.exists.return_value = False + # Make sure the rbd code says the image is small + mock_rbd.return_value.size.return_value = 128 * units.Mi + # Make sure IMAGE_API.get() returns a raw image + mock_image.get.return_value = {'locations': [], 'disk_format': 'raw'} + + instance = self._create_instance() + disk_images = {'image_id': 'foo'} + self.flags(images_type='rbd', group='libvirt') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + + def do_create(): + # Reset the fetch mock and run our driver method so we can + # check for called-ness after each attempt + mock_fetch.reset_mock() + drvr._create_and_inject_local_root(self.context, + instance, + False, + '', + disk_images, + get_injection_info(), + None) + + # Do an image create with rbd + do_create() + # Make sure it tried fetch, which implies that it tried and + # failed to clone. + mock_fetch.assert_called() + + # Enable the workaround + self.flags(never_download_image_if_on_rbd=True, + group='workarounds') + # Ensure that we raise the original ImageUnacceptable from the + # failed clone... + self.assertRaises(exception.ImageUnacceptable, do_create) + # ...and ensure that we did _not_ try to fetch + mock_fetch.assert_not_called() + @mock.patch('nova.virt.netutils.get_injected_network_template') @mock.patch('nova.virt.disk.api.inject_data') @mock.patch.object(libvirt_driver.LibvirtDriver, "_conn") diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index c9e48b3971..5d1caf34f6 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -4015,9 +4015,24 @@ class LibvirtDriver(driver.ComputeDriver): backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME) if backend.SUPPORTS_CLONE: def clone_fallback_to_fetch(*args, **kwargs): + refuse_fetch = ( + CONF.libvirt.images_type == 'rbd' and + CONF.workarounds.never_download_image_if_on_rbd) try: backend.clone(context, disk_images['image_id']) except exception.ImageUnacceptable: + if refuse_fetch: + # Re-raise the exception from the failed + # ceph clone. The compute manager expects + # ImageUnacceptable as a possible result + # of spawn(), from which this is called. + with excutils.save_and_reraise_exception(): + LOG.warning( + 'Image %s is not on my ceph and ' + '[workarounds]/' + 'never_download_image_if_on_rbd=True;' + ' refusing to fetch and upload.', + disk_images['image_id']) libvirt_utils.fetch_image(*args, **kwargs) fetch_func = clone_fallback_to_fetch else: diff --git a/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml b/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml new file mode 100644 index 0000000000..f79c278119 --- /dev/null +++ b/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml @@ -0,0 +1,19 @@ +--- +other: + - | + Nova now has a config option called + ``[workarounds]/never_download_image_if_on_rbd`` which helps to + avoid pathological storage behavior with multiple ceph clusters. + Currently, Nova does *not* support multiple ceph clusters + properly, but Glance can be configured with them. If an instance + is booted from an image residing in a ceph cluster other than the + one Nova knows about, it will silently download it from Glance and + re-upload the image to the local ceph privately for that + instance. Unlike the behavior you expect when configuring Nova and + Glance for ceph, Nova will continue to do this over and over for + the same image when subsequent instances are booted, consuming a + large amount of storage unexpectedly. The new workaround option + will cause Nova to refuse to do this download/upload behavior and + instead fail the instance boot. It is simply a stop-gap effort to + allow unsupported deployments with multiple ceph clusters from + silently consuming large amounts of disk space. |