summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-06-26 15:41:27 +0000
committerLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-09-04 16:25:45 +0000
commitfcce17aa00de4c8a638ee631048dd8b89f1aa65d (patch)
tree04d5e1e3d119f96ec430645bea7480ca49684241
parent94d69740dde87b33848c4183cf2f03fbb5861e35 (diff)
downloadlorry-controller-fcce17aa00de4c8a638ee631048dd8b89f1aa65d.tar.gz
Implement /1.0/remove-ghost-jobs
-rw-r--r--lorrycontroller/__init__.py1
-rw-r--r--lorrycontroller/jobupdate.py4
-rw-r--r--lorrycontroller/removeghostjobs.py65
-rw-r--r--lorrycontroller/statedb.py41
4 files changed, 100 insertions, 11 deletions
diff --git a/lorrycontroller/__init__.py b/lorrycontroller/__init__.py
index bc51b88..a65ff02 100644
--- a/lorrycontroller/__init__.py
+++ b/lorrycontroller/__init__.py
@@ -32,6 +32,7 @@ from movetopbottom import MoveToTop, MoveToBottom
from stopjob import StopJob
from listjobs import ListAllJobs, ListAllJobsHTML
from showjob import ShowJob, ShowJobHTML, JobShower
+from removeghostjobs import RemoveGhostJobs
from removejob import RemoveJob
from lstroves import LsTroves, ForceLsTrove
from pretendtime import PretendTime
diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py
index 3bd0e81..efc9ce1 100644
--- a/lorrycontroller/jobupdate.py
+++ b/lorrycontroller/jobupdate.py
@@ -44,11 +44,13 @@ class JobUpdate(lorrycontroller.LorryControllerRoute):
if stderr:
statedb.append_to_job_output(job_id, stderr)
+ now = statedb.get_current_time()
+ statedb.set_job_updated(job_id, now)
+
path = statedb.find_lorry_running_job(job_id)
lorry_info = statedb.get_lorry_info(path)
if exit is not None and exit != 'no':
- now = statedb.get_current_time()
statedb.set_lorry_last_run(path, int(now))
statedb.set_running_job(path, None)
statedb.set_job_exit(job_id, exit, int(now), disk_usage)
diff --git a/lorrycontroller/removeghostjobs.py b/lorrycontroller/removeghostjobs.py
new file mode 100644
index 0000000..2b2760c
--- /dev/null
+++ b/lorrycontroller/removeghostjobs.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class RemoveGhostJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/remove-ghost-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ ghost_timeout = self.app_settings['ghost-timeout']
+ ghosts = []
+ with self.open_statedb() as statedb:
+ for job_id in statedb.get_running_jobs():
+ if self.is_ghost_job(statedb, job_id, ghost_timeout):
+ self.exorcise_ghost_job(statedb, job_id)
+ ghosts.append(statedb.get_job_info(job_id))
+ return {
+ 'killed-ghost-jobs': ghosts,
+ }
+
+ def is_ghost_job(self, statedb, job_id, ghost_timeout):
+ updated = statedb.get_job_updated(job_id)
+ return self.now(statedb) - updated >= ghost_timeout
+
+ def now(self, statedb):
+ return statedb.get_current_time()
+
+ def exorcise_ghost_job(self, statedb, job_id):
+ logging.info('Job %s is a ghost job', job_id)
+ self.mark_job_to_be_killed_in_case_minion_appears(statedb, job_id)
+ self.mark_job_as_terminated(statedb, job_id)
+
+ def mark_job_to_be_killed_in_case_minion_appears(self, statedb, job_id):
+ statedb.set_kill_job(job_id, True)
+
+ def mark_job_as_terminated(self, statedb, job_id):
+ statedb.append_to_job_output(
+ job_id, '\nTERMINATED DUE TO GHOST TIMEOUT\n')
+ statedb.set_job_exit(job_id, 127, self.now(statedb), -1)
+
+ job_info = statedb.get_job_info(job_id)
+ statedb.set_running_job(job_info['path'], None)
diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py
index 2d223e0..fd7857d 100644
--- a/lorrycontroller/statedb.py
+++ b/lorrycontroller/statedb.py
@@ -129,6 +129,7 @@ class StateDB(object):
'pid INT, '
'started INT, '
'ended INT, '
+ 'updated INT, '
'kill INT, '
'path TEXT, '
'exit TEXT, '
@@ -454,8 +455,8 @@ class StateDB(object):
def get_job_info(self, job_id):
c = self.get_cursor()
c.execute(
- 'SELECT job_id, host, pid, started, ended, kill, path, exit, '
- 'disk_usage, output FROM jobs WHERE job_id=?',
+ 'SELECT job_id, host, pid, started, ended, updated, kill, '
+ 'path, exit, disk_usage, output FROM jobs WHERE job_id=?',
(job_id,))
row = c.fetchone()
return {
@@ -464,11 +465,12 @@ class StateDB(object):
'pid': row[2],
'started': row[3],
'ended': row[4],
- 'kill': row[5],
- 'path': row[6],
- 'exit': row[7],
- 'disk_usage': row[8],
- 'output': row[9],
+ 'updated': row[5],
+ 'kill': row[6],
+ 'path': row[7],
+ 'exit': row[8],
+ 'disk_usage': row[9],
+ 'output': row[10],
}
def add_new_job(self, job_id, host, pid, path, started):
@@ -478,9 +480,10 @@ class StateDB(object):
assert self.in_transaction
c = self.get_cursor()
c.execute(
- 'INSERT INTO jobs (job_id, host, pid, path, started, kill) '
- 'VALUES (?, ?, ?, ?, ?, ?)',
- (job_id, host, pid, path, started, 0))
+ 'INSERT INTO jobs (job_id, host, pid, path, started, '
+ 'updated, kill) '
+ 'VALUES (?, ?, ?, ?, ?, ?, ?)',
+ (job_id, host, pid, path, started, started, 0))
def get_job_minion_host(self, job_id):
c = self.get_cursor()
@@ -514,6 +517,24 @@ class StateDB(object):
row = c.fetchone()
return row[0], row[1]
+ def get_job_updated(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT updated FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def set_job_updated(self, job_id, updated):
+ logging.debug(
+ 'StateDB.set_job_updated(%r, %r) called',
+ job_id, updated)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE jobs SET updated=? WHERE job_id IS ?',
+ (updated, job_id))
+
def get_job_exit(self, job_id):
c = self.get_cursor()
c.execute(