Eliminate client races in legacy operations

Wait for the legacy stack to move to the IN_PROGRESS state before returning from the API call in the stack update, suspend, resume, check, and restore operations. For the stack delete operation, do the same provided that we can acquire the stack lock immediately, and thus don't need to wait for existing operations to be cancelled before we can change the state to IN_PROGRESS. In other cases there is still a race. Change-Id: Id94d009d69342f311a00ed3859f4ca8ac6b0af09 Story: #1669608 Task: 23175
author: Zane Bitter <zbitter@redhat.com> 2018-07-30 20:24:34 -0400
committer: Zane Bitter <zbitter@redhat.com> 2018-07-30 20:48:34 -0400
commit: 2d2da745931bceeb784984a44d747fcaf6264a30 (patch)
tree: 27faa2f34f813e1e42d12570cfd11bd3f59f0406 /heat/engine/stack.py
parent: e63778efc91869efbd41e2f96b0c554aef1c80e5 (diff)
download: heat-2d2da745931bceeb784984a44d747fcaf6264a30.tar.gz
1 files changed, 39 insertions, 13 deletions
diff --git a/heat/engine/stack.py b/heat/engine/stack.py
index 7dab91a7d..66eba3cbe 100644
--- a/heat/engine/stack.py
+++ b/heat/engine/stack.py
@@ -1122,7 +1122,8 @@ class Stack(collections.Mapping):
 
     @scheduler.wrappertask
     def stack_task(self, action, reverse=False, post_func=None,
-                   aggregate_exceptions=False, pre_completion_func=None):
+                   aggregate_exceptions=False, pre_completion_func=None,
+                   notify=None):
         """A task to perform an action on the stack.
 
         All of the resources are traversed in forward or reverse dependency
@@ -1147,9 +1148,13 @@ class Stack(collections.Mapping):
                            'Failed stack pre-ops: %s' % six.text_type(e))
             if callable(post_func):
                 post_func()
+            # No need to call notify.signal(), because persistence of the
+            # state is always deferred here.
             return
         self.state_set(action, self.IN_PROGRESS,
                        'Stack %s started' % action)
+        if notify is not None:
+            notify.signal()
 
         stack_status = self.COMPLETE
         reason = 'Stack %s completed successfully' % action
@@ -1208,12 +1213,13 @@ class Stack(collections.Mapping):
 
     @profiler.trace('Stack.check', hide_args=False)
     @reset_state_on_error
-    def check(self):
+    def check(self, notify=None):
         self.updated_time = oslo_timeutils.utcnow()
         checker = scheduler.TaskRunner(
             self.stack_task, self.CHECK,
             post_func=self.supports_check_action,
-            aggregate_exceptions=True)
+            aggregate_exceptions=True,
+            notify=notify)
         checker()
 
     def supports_check_action(self):
@@ -1281,7 +1287,7 @@ class Stack(collections.Mapping):
 
     @profiler.trace('Stack.update', hide_args=False)
     @reset_state_on_error
-    def update(self, newstack, msg_queue=None):
+    def update(self, newstack, msg_queue=None, notify=None):
         """Update the stack.
 
         Compare the current stack with newstack,
@@ -1296,7 +1302,7 @@ class Stack(collections.Mapping):
         """
         self.updated_time = oslo_timeutils.utcnow()
         updater = scheduler.TaskRunner(self.update_task, newstack,
-                                       msg_queue=msg_queue)
+                                       msg_queue=msg_queue, notify=notify)
         updater()
 
     @profiler.trace('Stack.converge_stack', hide_args=False)
@@ -1540,11 +1546,14 @@ class Stack(collections.Mapping):
         self.state_set(self.action, self.FAILED, six.text_type(reason))
 
     @scheduler.wrappertask
-    def update_task(self, newstack, action=UPDATE, msg_queue=None):
+    def update_task(self, newstack, action=UPDATE,
+                    msg_queue=None, notify=None):
         if action not in (self.UPDATE, self.ROLLBACK, self.RESTORE):
             LOG.error("Unexpected action %s passed to update!", action)
             self.state_set(self.UPDATE, self.FAILED,
                            "Invalid action %s" % action)
+            if notify is not None:
+                notify.signal()
             return
 
         try:
@@ -1553,6 +1562,8 @@ class Stack(collections.Mapping):
         except Exception as e:
             self.state_set(action, self.FAILED, e.args[0] if e.args else
                            'Failed stack pre-ops: %s' % six.text_type(e))
+            if notify is not None:
+                notify.signal()
             return
         if self.status == self.IN_PROGRESS:
             if action == self.ROLLBACK:
@@ -1561,6 +1572,8 @@ class Stack(collections.Mapping):
                 reason = _('Attempted to %s an IN_PROGRESS '
                            'stack') % action
                 self.reset_stack_and_resources_in_progress(reason)
+                if notify is not None:
+                    notify.signal()
                 return
 
         # Save a copy of the new template.  To avoid two DB writes
@@ -1574,6 +1587,10 @@ class Stack(collections.Mapping):
         self.status_reason = 'Stack %s started' % action
         self._send_notification_and_add_event()
         self.store()
+        # Notify the caller that the state is stored
+        if notify is not None:
+            notify.signal()
+
         if prev_tmpl_id is not None:
             raw_template_object.RawTemplate.delete(self.context, prev_tmpl_id)
 
@@ -1842,7 +1859,7 @@ class Stack(collections.Mapping):
 
     @profiler.trace('Stack.delete', hide_args=False)
     @reset_state_on_error
-    def delete(self, action=DELETE, backup=False, abandon=False):
+    def delete(self, action=DELETE, backup=False, abandon=False, notify=None):
         """Delete all of the resources, and then the stack itself.
 
         The action parameter is used to differentiate between a user
@@ -1858,12 +1875,16 @@ class Stack(collections.Mapping):
             LOG.error("Unexpected action %s passed to delete!", action)
             self.state_set(self.DELETE, self.FAILED,
                            "Invalid action %s" % action)
+            if notify is not None:
+                notify.signal()
             return
 
         stack_status = self.COMPLETE
         reason = 'Stack %s completed successfully' % action
         self.state_set(action, self.IN_PROGRESS, 'Stack %s started' %
                        action)
+        if notify is not None:
+            notify.signal()
 
         backup_stack = self._backup_stack(False)
         if backup_stack:
@@ -1927,7 +1948,7 @@ class Stack(collections.Mapping):
 
     @profiler.trace('Stack.suspend', hide_args=False)
     @reset_state_on_error
-    def suspend(self):
+    def suspend(self, notify=None):
         """Suspend the stack.
 
         Invokes handle_suspend for all stack resources.
@@ -1938,6 +1959,7 @@ class Stack(collections.Mapping):
         other than move to SUSPEND_COMPLETE, so the resources must implement
         handle_suspend for this to have any effect.
         """
+        LOG.debug("Suspending stack %s", self)
         # No need to suspend if the stack has been suspended
         if self.state == (self.SUSPEND, self.COMPLETE):
             LOG.info('%s is already suspended', self)
@@ -1947,12 +1969,13 @@ class Stack(collections.Mapping):
         sus_task = scheduler.TaskRunner(
             self.stack_task,
             action=self.SUSPEND,
-            reverse=True)
+            reverse=True,
+            notify=notify)
         sus_task(timeout=self.timeout_secs())
 
     @profiler.trace('Stack.resume', hide_args=False)
     @reset_state_on_error
-    def resume(self):
+    def resume(self, notify=None):
         """Resume the stack.
 
         Invokes handle_resume for all stack resources.
@@ -1963,6 +1986,7 @@ class Stack(collections.Mapping):
         other than move to RESUME_COMPLETE, so the resources must implement
         handle_resume for this to have any effect.
         """
+        LOG.debug("Resuming stack %s", self)
         # No need to resume if the stack has been resumed
         if self.state == (self.RESUME, self.COMPLETE):
             LOG.info('%s is already resumed', self)
@@ -1972,7 +1996,8 @@ class Stack(collections.Mapping):
         sus_task = scheduler.TaskRunner(
             self.stack_task,
             action=self.RESUME,
-            reverse=False)
+            reverse=False,
+            notify=notify)
         sus_task(timeout=self.timeout_secs())
 
     @profiler.trace('Stack.snapshot', hide_args=False)
@@ -2034,16 +2059,17 @@ class Stack(collections.Mapping):
         return newstack, template
 
     @reset_state_on_error
-    def restore(self, snapshot):
+    def restore(self, snapshot, notify=None):
         """Restore the given snapshot.
 
         Invokes handle_restore on all resources.
         """
+        LOG.debug("Restoring stack %s", self)
         self.updated_time = oslo_timeutils.utcnow()
         newstack = self.restore_data(snapshot)[0]
 
         updater = scheduler.TaskRunner(self.update_task, newstack,
-                                       action=self.RESTORE)
+                                       action=self.RESTORE, notify=notify)
         updater()
 
     def get_availability_zones(self):
author	Zane Bitter <zbitter@redhat.com>	2018-07-30 20:24:34 -0400
committer	Zane Bitter <zbitter@redhat.com>	2018-07-30 20:48:34 -0400
commit	2d2da745931bceeb784984a44d747fcaf6264a30 (patch)
tree	27faa2f34f813e1e42d12570cfd11bd3f59f0406 /heat/engine/stack.py
parent	e63778efc91869efbd41e2f96b0c554aef1c80e5 (diff)
download	heat-2d2da745931bceeb784984a44d747fcaf6264a30.tar.gz