summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPedro Alvarez <pedro.alvarez@codethink.co.uk>2014-10-23 09:49:37 +0100
committerPedro Alvarez <pedro.alvarez@codethink.co.uk>2014-10-23 09:49:37 +0100
commitd7ca3f2f14ad28437f7332ca387a790fecb8b081 (patch)
tree90d364dc107b0c032ecc5befc4f36393d9b655b0
parentf9ea6b7761a5dbfe7b5e5b2adcabf70525184d7e (diff)
parentd91b652951cf9203c8e8cd255a0e0a81af6d2591 (diff)
downloadlorry-controller-d7ca3f2f14ad28437f7332ca387a790fecb8b081.tar.gz
Merge branch 'baserock/pedroalvarez/old-jobs-removal'
Reviewed-by: Richard Maw Reviewed-by: Pedro Alvarez
-rw-r--r--ARCH16
-rwxr-xr-xlorry-controller-remove-old-jobs153
-rw-r--r--lorrycontroller/showjob.py2
-rw-r--r--setup.py3
-rw-r--r--units/lorry-controller-remove-old-jobs.service12
-rw-r--r--units/lorry-controller-remove-old-jobs.timer8
-rw-r--r--yarns.webapp/040-running-jobs.yarn56
-rw-r--r--yarns.webapp/900-implementations.yarn13
8 files changed, 260 insertions, 3 deletions
diff --git a/ARCH b/ARCH
index a815c99..6eb8376 100644
--- a/ARCH
+++ b/ARCH
@@ -282,9 +282,12 @@ Running job management:
MINION gets around to it.) This request returns as soon as the
STATEDB change is done.
-* `GET /1.0/list-all-jobs` causes WEBAPP to return a JSON list of ids
+* `GET /1.0/list-jobs` causes WEBAPP to return a JSON list of ids
of all jobs, running or finished, that it knows about. (RQ/ALLJOBS)
+* `GET /1.0/list-jobs-html` is the same as `list-jobs`, but returns an
+ HTML page instead.
+
* `POST /1.0/remove-job` with `job_id=jobid` in the body, removes a
stopped job from the state database.
@@ -369,6 +372,17 @@ The MINION
to WEBAPP.
* Go back to top to request new job.
+
+Old job removal
+---------------
+
+To avoid the STATEDB filling up with logs of old jobs, a systemd timer
+unit will run occasionally to remove jobs so old, nobody cares about
+them anymore. To make it easier to experiment with the logic of
+choosing what to remove (age only? keep failed ones? something else?)
+the removal is kept outside the WEBAPP.
+
+
STATEDB
-------
diff --git a/lorry-controller-remove-old-jobs b/lorry-controller-remove-old-jobs
new file mode 100755
index 0000000..1448649
--- /dev/null
+++ b/lorry-controller-remove-old-jobs
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import logging
+import time
+import urllib2
+import urlparse
+import contextlib
+
+import cliapp
+
+
+class JobInfo(object):
+
+ def __init__(self, job_id, exit_code, exit_timestamp):
+ self.job_id = job_id
+ self.exit_code = exit_code
+ self.exit_timestamp = exit_timestamp
+
+ def __repr__(self):
+ return 'JobInfo(%s,%s,%s)' % (
+ self.job_id, self.exit_code, self.exit_timestamp)
+
+
+class OldJobRemover(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['webapp-host'],
+ 'address of WEBAPP',
+ default='localhost')
+
+ self.settings.integer(
+ ['webapp-port'],
+ 'port of WEBAPP',
+ default=12765)
+
+ ONE_MINUTE = 60
+ ONE_HOUR = 60 * ONE_MINUTE
+ ONE_DAY = 24 * ONE_HOUR
+ ONE_YEAR = 365 * ONE_DAY
+
+ self.settings.integer(
+ ['max-age-in-seconds', 'max-age'],
+ 'maximum age of a finished job in seconds',
+ metavar='SECONDS',
+ default=ONE_YEAR)
+
+ self.settings.integer(
+ ['debug-now'],
+ 'for tests and debugging, '
+ 'set current time to SECONDS since the epoch '
+ '(set to 0 to use real time',
+ metavar='SECONDS')
+
+ def process_args(self, args):
+ logging.info('Removing old jobs from Lorry Controller STATEDB')
+
+ job_ids = self.list_jobs()
+ job_infos = self.get_job_infos(job_ids)
+ ids_of_jobs_to_remove = self.select_for_removal(job_infos)
+ self.remove_jobs(ids_of_jobs_to_remove)
+
+ def list_jobs(self):
+ data = self.get('/1.0/list-jobs')
+ obj = json.loads(data)
+ return obj['job_ids']
+
+ def get(self, path):
+ url = self.make_url(path)
+ with contextlib.closing(urllib2.urlopen(url)) as f:
+ return f.read()
+
+ def make_url(self, path):
+ scheme = 'http'
+ netloc = '%s:%s' % (
+ self.settings['webapp-host'], self.settings['webapp-port'])
+ query = None
+ fragment = None
+ parts = (scheme, netloc, path, query, fragment)
+ return urlparse.urlunsplit(parts)
+
+ def get_job_infos(self, job_ids):
+ job_infos = []
+ for job_id in job_ids:
+ try:
+ job_infos.append(self.get_job_info(job_id))
+ except urllib2.HTTPError as e:
+ logging.warning(
+ 'Trouble getting job info for job %s: %s' %
+ (job_id, str(e)))
+ return job_infos
+
+ def get_job_info(self, job_id):
+ data = self.get('/1.0/job/%s' % job_id)
+ obj = json.loads(data)
+ exit_code = obj['exit']
+ if obj['job_ended']:
+ exit_timestamp = self.parse_timestamp(obj['job_ended'])
+ else:
+ exit_timestamp = None
+ return JobInfo(job_id, exit_code, exit_timestamp)
+
+ def parse_timestamp(self, timestamp):
+ return time.mktime(time.strptime(timestamp, '%Y-%m-%d %H:%M:%S UTC'))
+
+ def select_for_removal(self, job_infos):
+ return [job_info for job_info in job_infos if self.is_old(job_info)]
+
+ def is_old(self, job_info):
+ if job_info.exit_timestamp is None:
+ return False
+ current_time = self.get_current_time()
+ age_in_seconds = current_time - job_info.exit_timestamp
+ return age_in_seconds >= self.settings['max-age-in-seconds']
+
+ def get_current_time(self):
+ if self.settings['debug-now']:
+ return self.settings['debug-now']
+ return time.time()
+
+ def remove_jobs(self, job_infos):
+ for job_info in job_infos:
+ self.remove_job(job_info.job_id)
+
+ def remove_job(self, job_id):
+ logging.info('Removing job %s', job_id)
+ self.post('/1.0/remove-job', 'job_id=%s' % job_id)
+
+ def post(self, path, data):
+ url = self.make_url(path)
+ f = urllib2.urlopen(url, data)
+ result = f.read()
+ f.close()
+
+
+OldJobRemover().run()
diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py
index 951ad22..bc82bfe 100644
--- a/lorrycontroller/showjob.py
+++ b/lorrycontroller/showjob.py
@@ -39,7 +39,7 @@ class JobShower(object):
'path': statedb.get_job_path(job_id),
'exit': 'no' if exit is None else exit,
'disk_usage': disk_usage,
- 'disk_usage_nice': self.format_bytesize(disk_usage),
+ 'disk_usage_nice': self.format_bytesize(disk_usage or 0),
'output': output,
'job_started': self.format_time(started),
'job_ended': '' if ended is None else self.format_time(ended),
diff --git a/setup.py b/setup.py
index e214c33..5fc49d2 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,8 @@ setup(name='lorry-controller',
author='Codethink Ltd',
author_email='baserock-dev@baserock.org',
url='http://www.baserock.com/',
- scripts=['lorry-controller-webapp', 'lorry-controller-minion'],
+ scripts=['lorry-controller-webapp', 'lorry-controller-minion',
+ 'lorry-controller-remove-old-jobs'],
packages=['lorrycontroller'],
data_files=[
('share/lorry-controller/templates', glob.glob('templates/*')),
diff --git a/units/lorry-controller-remove-old-jobs.service b/units/lorry-controller-remove-old-jobs.service
new file mode 100644
index 0000000..d1372b4
--- /dev/null
+++ b/units/lorry-controller-remove-old-jobs.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Lorry Controller remove old jobs
+After=lighttpd-lorry-controller-webapp.service
+
+[Install]
+WantedBy=multi-user.target
+
+[Service]
+ExecStart=/usr/bin/lorry-controller-remove-old-jobs
+Restart=no
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-remove-old-jobs.timer b/units/lorry-controller-remove-old-jobs.timer
new file mode 100644
index 0000000..508a43f
--- /dev/null
+++ b/units/lorry-controller-remove-old-jobs.timer
@@ -0,0 +1,8 @@
+[Unit]
+Description=Lorry Controller remove old jobs
+
+[Install]
+WantedBy=multi-user.target
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn
index 571afd6..cbc8f75 100644
--- a/yarns.webapp/040-running-jobs.yarn
+++ b/yarns.webapp/040-running-jobs.yarn
@@ -359,3 +359,59 @@ Remove it.
Cleanup.
FINALLY WEBAPP terminates
+
+
+Remove old terminated jobs with helper program
+--------------------------
+
+There is a helper program to remove old jobs automatically.
+
+ SCENARIO remove old terminated jobs
+
+Setup.
+
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ WHEN admin makes request POST /1.0/read-configuration
+
+Start job 1. We start it a known time of 100, so that we can control
+when jobs become old.
+
+ WHEN admin makes request POST /1.0/pretend-time with now=100
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+
+Remove old jobs while job 1 is running, still pretending time is 100
+seconds since epoch. This should leave job 1 running.
+
+ WHEN admin removes old jobs at 100
+ AND admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to [1]
+
+Finish the job.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ WHEN admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to [1]
+
+Remove old jobs, still at 100 seconds. Job 1 should still remain, as
+it just finished.
+
+ WHEN admin removes old jobs at 100
+ AND admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to [1]
+
+Let a long time pass, and remove old jobs again. Job 1 should now go
+away.
+
+ WHEN admin removes old jobs at 100000000000
+ AND admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn
index 54a82a4..455a993 100644
--- a/yarns.webapp/900-implementations.yarn
+++ b/yarns.webapp/900-implementations.yarn
@@ -406,6 +406,19 @@ Some responses are just plain text, so we match them with a regexp.
grep "$MATCH_1" "$DATADIR/response.body"
+Running the "remove old jobs" helper program
+--------------------------------------------
+
+Lorry Controller comes with a helper program to remove old jobs from
+STATEDB. Tests need to be able to run it.
+
+ IMPLEMENTS WHEN admin removes old jobs at (\d+)
+ "$SRCDIR/lorry-controller-remove-old-jobs" \
+ --log "$DATADIR/remove-old-jobs.log" \
+ --webapp-host=127.0.0.1 \
+ --webapp-port="$(cat "$DATADIR/webapp.port")" \
+ --debug-now="$MATCH_1"
+
Status web page
---------------