summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2021-06-25 19:30:28 +0000
committerGerrit Code Review <review@openstack.org>2021-06-25 19:30:28 +0000
commite6d6284563a9b60eeaf4fdffdede3fc0966869ef (patch)
tree143b679920c02cd7469d6b597dfc6f731bdb85b9
parent7acb9fc562b104c0e2d443e2efd6ab364dfffc8f (diff)
parent794bedf00e6a3dcdf89f07ae3f63deee09138a9a (diff)
downloadnova-e6d6284563a9b60eeaf4fdffdede3fc0966869ef.tar.gz
Merge "Add a workaround config toggle to refuse ceph image upload" into stable/train
-rw-r--r--nova/conf/workarounds.py24
-rw-r--r--nova/tests/unit/virt/libvirt/test_driver.py46
-rw-r--r--nova/virt/libvirt/driver.py15
-rw-r--r--releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml19
4 files changed, 104 insertions, 0 deletions
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index c9ed0f2e4b..20e4b5c9d6 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -266,6 +266,30 @@ Related options:
* :oslo.config:option:`image_cache_subdirectory_name`
* :oslo.config:option:`update_resources_interval`
"""),
+ cfg.BoolOpt(
+ 'never_download_image_if_on_rbd',
+ default=False,
+ help="""
+When booting from an image on a ceph-backed compute node, if the image does not
+already reside on the ceph cluster (as would be the case if glance is
+also using the same cluster), nova will download the image from glance and
+upload it to ceph itself. If using multiple ceph clusters, this may cause nova
+to unintentionally duplicate the image in a non-COW-able way in the local
+ceph deployment, wasting space.
+
+For more information, refer to the bug report:
+
+https://bugs.launchpad.net/nova/+bug/1858877
+
+Enabling this option will cause nova to *refuse* to boot an instance if it
+would require downloading the image from glance and uploading it to ceph
+itself.
+
+Related options:
+
+* ``compute_driver`` (libvirt)
+* ``[libvirt]/images_type`` (rbd)
+"""),
]
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index d6e241c8e6..a95640c750 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -21884,6 +21884,52 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
None)
self.assertFalse(mock_inject.called)
+ @mock.patch('nova.virt.libvirt.utils.fetch_image')
+ @mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
+ @mock.patch.object(imagebackend, 'IMAGE_API')
+ def test_create_fetch_image_ceph_workaround(self, mock_image, mock_rbd,
+ mock_fetch):
+ # Make sure that rbd clone will fail as un-clone-able
+ mock_rbd.is_cloneable.return_value = False
+ # Make sure the rbd code thinks the image does not already exist
+ mock_rbd.return_value.exists.return_value = False
+ # Make sure the rbd code says the image is small
+ mock_rbd.return_value.size.return_value = 128 * units.Mi
+ # Make sure IMAGE_API.get() returns a raw image
+ mock_image.get.return_value = {'locations': [], 'disk_format': 'raw'}
+
+ instance = self._create_instance()
+ disk_images = {'image_id': 'foo'}
+ self.flags(images_type='rbd', group='libvirt')
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+
+ def do_create():
+ # Reset the fetch mock and run our driver method so we can
+ # check for called-ness after each attempt
+ mock_fetch.reset_mock()
+ drvr._create_and_inject_local_root(self.context,
+ instance,
+ False,
+ '',
+ disk_images,
+ get_injection_info(),
+ None)
+
+ # Do an image create with rbd
+ do_create()
+ # Make sure it tried fetch, which implies that it tried and
+ # failed to clone.
+ mock_fetch.assert_called()
+
+ # Enable the workaround
+ self.flags(never_download_image_if_on_rbd=True,
+ group='workarounds')
+ # Ensure that we raise the original ImageUnacceptable from the
+ # failed clone...
+ self.assertRaises(exception.ImageUnacceptable, do_create)
+ # ...and ensure that we did _not_ try to fetch
+ mock_fetch.assert_not_called()
+
@mock.patch('nova.virt.netutils.get_injected_network_template')
@mock.patch('nova.virt.disk.api.inject_data')
@mock.patch.object(libvirt_driver.LibvirtDriver, "_conn")
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index c9e48b3971..5d1caf34f6 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -4015,9 +4015,24 @@ class LibvirtDriver(driver.ComputeDriver):
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
if backend.SUPPORTS_CLONE:
def clone_fallback_to_fetch(*args, **kwargs):
+ refuse_fetch = (
+ CONF.libvirt.images_type == 'rbd' and
+ CONF.workarounds.never_download_image_if_on_rbd)
try:
backend.clone(context, disk_images['image_id'])
except exception.ImageUnacceptable:
+ if refuse_fetch:
+ # Re-raise the exception from the failed
+ # ceph clone. The compute manager expects
+ # ImageUnacceptable as a possible result
+ # of spawn(), from which this is called.
+ with excutils.save_and_reraise_exception():
+ LOG.warning(
+ 'Image %s is not on my ceph and '
+ '[workarounds]/'
+ 'never_download_image_if_on_rbd=True;'
+ ' refusing to fetch and upload.',
+ disk_images['image_id'])
libvirt_utils.fetch_image(*args, **kwargs)
fetch_func = clone_fallback_to_fetch
else:
diff --git a/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml b/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml
new file mode 100644
index 0000000000..f79c278119
--- /dev/null
+++ b/releasenotes/notes/avoid_muli_ceph_download-4083decf501dba40.yaml
@@ -0,0 +1,19 @@
+---
+other:
+ - |
+ Nova now has a config option called
+ ``[workarounds]/never_download_image_if_on_rbd`` which helps to
+ avoid pathological storage behavior with multiple ceph clusters.
+ Currently, Nova does *not* support multiple ceph clusters
+ properly, but Glance can be configured with them. If an instance
+ is booted from an image residing in a ceph cluster other than the
+ one Nova knows about, it will silently download it from Glance and
+ re-upload the image to the local ceph privately for that
+ instance. Unlike the behavior you expect when configuring Nova and
+ Glance for ceph, Nova will continue to do this over and over for
+ the same image when subsequent instances are booted, consuming a
+ large amount of storage unexpectedly. The new workaround option
+ will cause Nova to refuse to do this download/upload behavior and
+ instead fail the instance boot. It is simply a stop-gap effort to
+ allow unsupported deployments with multiple ceph clusters from
+ silently consuming large amounts of disk space.