summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Smith <dansmith@redhat.com>2015-02-04 10:10:25 -0800
committerDan Smith <dms@danplanet.com>2015-02-04 23:01:18 +0000
commit5a542e770648469b0fbb638f6ba53f95424252ec (patch)
tree570fa06095a5f0d305104d1c1f73164f87a33fa9
parentb501e57990d2e4308bb798da6006085f987e4d1a (diff)
downloadnova-5a542e770648469b0fbb638f6ba53f95424252ec.tar.gz
Add max_concurrent_builds limit configuration
Right now, nova-compute will attempt to build an infinite number of instances, if asked to do so. This won't work on any machine, regardless of the resources, if the number of instances is too large. We could default this to zero to retain the current behavior, but the current behavior is really not sane in any case, so I think we should default to something. Ten instances for a single compute node seems like as reasonable default. If you can do more than ten at a time, you're definitely not running a cloud based on default config. DocImpact: Adds a new configuration variable Closes-Bug: #1418155 Change-Id: I412d2849fd16430e6926fc983c031babb7ad04d0
-rw-r--r--nova/compute/manager.py16
-rw-r--r--nova/compute/utils.py12
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py36
3 files changed, 63 insertions, 1 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 5ca46c4bab..326b0b000a 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -37,6 +37,7 @@ import uuid
from cinderclient import exceptions as cinder_exception
import eventlet.event
from eventlet import greenthread
+import eventlet.semaphore
import eventlet.timeout
from keystoneclient import exceptions as keystone_exception
from oslo.config import cfg
@@ -122,6 +123,9 @@ compute_opts = [
cfg.IntOpt('network_allocate_retries',
default=0,
help="Number of times to retry network allocation on failures"),
+ cfg.IntOpt('max_concurrent_builds',
+ default=10,
+ help='Maximum number of instance builds to run concurrently'),
cfg.IntOpt('block_device_allocate_retries',
default=60,
help='Number of times to retry block device'
@@ -619,6 +623,11 @@ class ComputeManager(manager.Manager):
self.instance_events = InstanceEvents()
self._sync_power_pool = eventlet.GreenPool()
self._syncs_in_progress = {}
+ if CONF.max_concurrent_builds != 0:
+ self._build_semaphore = eventlet.semaphore.Semaphore(
+ CONF.max_concurrent_builds)
+ else:
+ self._build_semaphore = compute_utils.UnlimitedSemaphore()
super(ComputeManager, self).__init__(service_name="compute",
*args, **kwargs)
@@ -2023,7 +2032,12 @@ class ComputeManager(manager.Manager):
@utils.synchronized(instance.uuid)
def _locked_do_build_and_run_instance(*args, **kwargs):
- self._do_build_and_run_instance(*args, **kwargs)
+ # NOTE(danms): We grab the semaphore with the instance uuid
+ # locked because we could wait in line to build this instance
+ # for a while and we want to make sure that nothing else tries
+ # to do anything with this instance while we wait.
+ with self._build_semaphore:
+ self._do_build_and_run_instance(*args, **kwargs)
# NOTE(danms): We spawn here to return the RPC worker thread back to
# the pool. Since what follows could take a really long time, we don't
diff --git a/nova/compute/utils.py b/nova/compute/utils.py
index e5d5298b2a..ccfb600055 100644
--- a/nova/compute/utils.py
+++ b/nova/compute/utils.py
@@ -480,3 +480,15 @@ class EventReporter(object):
self.context, uuid, self.event_name, exc_val=exc_val,
exc_tb=exc_tb, want_result=False)
return False
+
+
+class UnlimitedSemaphore(object):
+ def __enter__(self):
+ pass
+
+ def __exit__(self):
+ pass
+
+ @property
+ def balance(self):
+ return 0
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index f15a7d0f90..b9b36085b0 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -306,6 +306,42 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.assertEqual(final_result, res)
self.assertEqual(1, sleep.call_count)
+ @mock.patch('nova.utils.spawn_n')
+ @mock.patch('nova.compute.manager.ComputeManager.'
+ '_do_build_and_run_instance')
+ def _test_max_concurrent_builds(self, mock_dbari, mock_spawn):
+ mock_spawn.side_effect = lambda f, *a, **k: f(*a, **k)
+
+ with mock.patch.object(self.compute,
+ '_build_semaphore') as mock_sem:
+ instance = objects.Instance(uuid=str(uuid.uuid4()))
+ for i in (1, 2, 3):
+ self.compute.build_and_run_instance(self.context, instance,
+ mock.sentinel.image,
+ mock.sentinel.request_spec,
+ {})
+ self.assertEqual(3, mock_sem.__enter__.call_count)
+
+ def test_max_concurrent_builds_limited(self):
+ self.flags(max_concurrent_builds=2)
+ self._test_max_concurrent_builds()
+
+ def test_max_concurrent_builds_unlimited(self):
+ self.flags(max_concurrent_builds=0)
+ self._test_max_concurrent_builds()
+
+ def test_max_concurrent_builds_semaphore_limited(self):
+ self.flags(max_concurrent_builds=123)
+ self.assertEqual(123,
+ manager.ComputeManager()._build_semaphore.balance)
+
+ def test_max_concurrent_builds_semaphore_unlimited(self):
+ self.flags(max_concurrent_builds=0)
+ compute = manager.ComputeManager()
+ self.assertEqual(0, compute._build_semaphore.balance)
+ self.assertIsInstance(compute._build_semaphore,
+ compute_utils.UnlimitedSemaphore)
+
def test_init_host(self):
our_host = self.compute.host
fake_context = 'fake-context'