Add max_concurrent_builds limit configuration

Right now, nova-compute will attempt to build an infinite number of instances, if asked to do so. This won't work on any machine, regardless of the resources, if the number of instances is too large. We could default this to zero to retain the current behavior, but the current behavior is really not sane in any case, so I think we should default to something. Ten instances for a single compute node seems like as reasonable default. If you can do more than ten at a time, you're definitely not running a cloud based on default config. DocImpact: Adds a new configuration variable Closes-Bug: #1418155 Change-Id: I412d2849fd16430e6926fc983c031babb7ad04d0
author: Dan Smith <dansmith@redhat.com> 2015-02-04 10:10:25 -0800
committer: Dan Smith <dms@danplanet.com> 2015-02-04 23:01:18 +0000
commit: 5a542e770648469b0fbb638f6ba53f95424252ec (patch)
tree: 570fa06095a5f0d305104d1c1f73164f87a33fa9
parent: b501e57990d2e4308bb798da6006085f987e4d1a (diff)
download: nova-5a542e770648469b0fbb638f6ba53f95424252ec.tar.gz
3 files changed, 63 insertions, 1 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 5ca46c4bab..326b0b000a 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -37,6 +37,7 @@ import uuid
 from cinderclient import exceptions as cinder_exception
 import eventlet.event
 from eventlet import greenthread
+import eventlet.semaphore
 import eventlet.timeout
 from keystoneclient import exceptions as keystone_exception
 from oslo.config import cfg
@@ -122,6 +123,9 @@ compute_opts = [
     cfg.IntOpt('network_allocate_retries',
                default=0,
                help="Number of times to retry network allocation on failures"),
+    cfg.IntOpt('max_concurrent_builds',
+               default=10,
+               help='Maximum number of instance builds to run concurrently'),
     cfg.IntOpt('block_device_allocate_retries',
                default=60,
                help='Number of times to retry block device'
@@ -619,6 +623,11 @@ class ComputeManager(manager.Manager):
         self.instance_events = InstanceEvents()
         self._sync_power_pool = eventlet.GreenPool()
         self._syncs_in_progress = {}
+        if CONF.max_concurrent_builds != 0:
+            self._build_semaphore = eventlet.semaphore.Semaphore(
+                CONF.max_concurrent_builds)
+        else:
+            self._build_semaphore = compute_utils.UnlimitedSemaphore()
 
         super(ComputeManager, self).__init__(service_name="compute",
                                              *args, **kwargs)
@@ -2023,7 +2032,12 @@ class ComputeManager(manager.Manager):
 
         @utils.synchronized(instance.uuid)
         def _locked_do_build_and_run_instance(*args, **kwargs):
-            self._do_build_and_run_instance(*args, **kwargs)
+            # NOTE(danms): We grab the semaphore with the instance uuid
+            # locked because we could wait in line to build this instance
+            # for a while and we want to make sure that nothing else tries
+            # to do anything with this instance while we wait.
+            with self._build_semaphore:
+                self._do_build_and_run_instance(*args, **kwargs)
 
         # NOTE(danms): We spawn here to return the RPC worker thread back to
         # the pool. Since what follows could take a really long time, we don't
diff --git a/nova/compute/utils.py b/nova/compute/utils.py
index e5d5298b2a..ccfb600055 100644
--- a/nova/compute/utils.py
+++ b/nova/compute/utils.py
@@ -480,3 +480,15 @@ class EventReporter(object):
                 self.context, uuid, self.event_name, exc_val=exc_val,
                 exc_tb=exc_tb, want_result=False)
         return False
+
+
+class UnlimitedSemaphore(object):
+    def __enter__(self):
+        pass
+
+    def __exit__(self):
+        pass
+
+    @property
+    def balance(self):
+        return 0
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index f15a7d0f90..b9b36085b0 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -306,6 +306,42 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
         self.assertEqual(final_result, res)
         self.assertEqual(1, sleep.call_count)
 
+    @mock.patch('nova.utils.spawn_n')
+    @mock.patch('nova.compute.manager.ComputeManager.'
+                '_do_build_and_run_instance')
+    def _test_max_concurrent_builds(self, mock_dbari, mock_spawn):
+        mock_spawn.side_effect = lambda f, *a, **k: f(*a, **k)
+
+        with mock.patch.object(self.compute,
+                               '_build_semaphore') as mock_sem:
+            instance = objects.Instance(uuid=str(uuid.uuid4()))
+            for i in (1, 2, 3):
+                self.compute.build_and_run_instance(self.context, instance,
+                                                    mock.sentinel.image,
+                                                    mock.sentinel.request_spec,
+                                                    {})
+            self.assertEqual(3, mock_sem.__enter__.call_count)
+
+    def test_max_concurrent_builds_limited(self):
+        self.flags(max_concurrent_builds=2)
+        self._test_max_concurrent_builds()
+
+    def test_max_concurrent_builds_unlimited(self):
+        self.flags(max_concurrent_builds=0)
+        self._test_max_concurrent_builds()
+
+    def test_max_concurrent_builds_semaphore_limited(self):
+        self.flags(max_concurrent_builds=123)
+        self.assertEqual(123,
+                         manager.ComputeManager()._build_semaphore.balance)
+
+    def test_max_concurrent_builds_semaphore_unlimited(self):
+        self.flags(max_concurrent_builds=0)
+        compute = manager.ComputeManager()
+        self.assertEqual(0, compute._build_semaphore.balance)
+        self.assertIsInstance(compute._build_semaphore,
+                              compute_utils.UnlimitedSemaphore)
+
     def test_init_host(self):
         our_host = self.compute.host
         fake_context = 'fake-context'
author	Dan Smith <dansmith@redhat.com>	2015-02-04 10:10:25 -0800
committer	Dan Smith <dms@danplanet.com>	2015-02-04 23:01:18 +0000
commit	5a542e770648469b0fbb638f6ba53f95424252ec (patch)
tree	570fa06095a5f0d305104d1c1f73164f87a33fa9
parent	b501e57990d2e4308bb798da6006085f987e4d1a (diff)
download	nova-5a542e770648469b0fbb638f6ba53f95424252ec.tar.gz