summaryrefslogtreecommitdiff
path: root/buildstream/_scheduler/scheduler.py
diff options
context:
space:
mode:
authorTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2017-06-13 23:48:38 +0900
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2017-06-13 23:52:18 +0900
commit90cab3878a3080c3901fb7cbc11bd152564d5085 (patch)
tree43de1d3eb117cb1b23c1b3b14960df150796173d /buildstream/_scheduler/scheduler.py
parent0683f1ff55fd6097375046817e7b93b8d02e1359 (diff)
downloadbuildstream-90cab3878a3080c3901fb7cbc11bd152564d5085.tar.gz
scheduler.py: Terminate jobs in an idle callback.
It turns out that process termination consistently fails when running in a callback from when a job completes, but consistently works when running from a SIGINT handler instead. Now we just unconditionally run process termination in an idle callback so we can ensure consistent termination.
Diffstat (limited to 'buildstream/_scheduler/scheduler.py')
-rw-r--r--buildstream/_scheduler/scheduler.py58
1 files changed, 37 insertions, 21 deletions
diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py
index 0437d9559..d0505acf5 100644
--- a/buildstream/_scheduler/scheduler.py
+++ b/buildstream/_scheduler/scheduler.py
@@ -145,30 +145,25 @@ class Scheduler():
#
# Forcefully terminates all ongoing jobs.
#
+ # For this to be effective, one needs to return to
+ # the scheduler loop first and allow the scheduler
+ # to complete gracefully.
+ #
+ # NOTE: This will block SIGINT so that graceful process
+ # termination is not interrupted, and SIGINT will
+ # remain blocked after Scheduler.run() returns.
+ #
def terminate_jobs(self):
- # 20 seconds is a long time, but sometimes cleaning up man GB
- # in a build directory can take a long time
- wait_start = datetime.datetime.now()
- wait_limit = 20.0
-
- with _signals.blocked([signal.SIGINT]):
-
- # First tell all jobs to terminate
- for queue in self.queues:
- for job in queue.active_jobs:
- job.terminate()
-
- # Now wait for them to really terminate
- for queue in self.queues:
- for job in queue.active_jobs:
- elapsed = datetime.datetime.now() - wait_start
- timeout = max(wait_limit - elapsed.total_seconds(), 0.0)
- if not job.terminate_wait(timeout):
- job.kill()
+ # Set this right away, the frontend will check this
+ # attribute to decide whether or not to print status info
+ # etc and the following code block will trigger some callbacks.
+ self.terminated = True
+ self.loop.call_soon(self.terminate_jobs_real)
- self.loop.stop()
- self.terminated = True
+ # Block this until we're finished terminating jobs,
+ # this will remain blocked forever.
+ signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT])
# suspend_jobs()
#
@@ -268,6 +263,27 @@ class Scheduler():
break
return failed
+ def terminate_jobs_real(self):
+ # 20 seconds is a long time, it can take a while and sometimes
+ # we still fail, need to look deeper into this again.
+ wait_start = datetime.datetime.now()
+ wait_limit = 20.0
+
+ # First tell all jobs to terminate
+ for queue in self.queues:
+ for job in queue.active_jobs:
+ job.terminate()
+
+ # Now wait for them to really terminate
+ for queue in self.queues:
+ for job in queue.active_jobs:
+ elapsed = datetime.datetime.now() - wait_start
+ timeout = max(wait_limit - elapsed.total_seconds(), 0.0)
+ if not job.terminate_wait(timeout):
+ job.kill()
+
+ self.loop.stop()
+
# get_job_token():
#
# Used by the Queue object to obtain a token for