From e96aeb1260382cdaa297b22d12e37bab69ac6444 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:13 +0300 Subject: Fix job listing HTTP queries in ARCH --- ARCH | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ARCH b/ARCH index a815c99..1588dfc 100644 --- a/ARCH +++ b/ARCH @@ -282,9 +282,12 @@ Running job management: MINION gets around to it.) This request returns as soon as the STATEDB change is done. -* `GET /1.0/list-all-jobs` causes WEBAPP to return a JSON list of ids +* `GET /1.0/list-jobs` causes WEBAPP to return a JSON list of ids of all jobs, running or finished, that it knows about. (RQ/ALLJOBS) +* `GET /1.0/list-jobs-html` is the same as `list-jobs`, but returns an + HTML page instead. + * `POST /1.0/remove-job` with `job_id=jobid` in the body, removes a stopped job from the state database. -- cgit v1.2.1 From 4efbdabd77d2ec71766d2ab140198964202884e1 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:14 +0300 Subject: Document how old job removal is to work --- ARCH | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ARCH b/ARCH index 1588dfc..6eb8376 100644 --- a/ARCH +++ b/ARCH @@ -372,6 +372,17 @@ The MINION to WEBAPP. * Go back to top to request new job. + +Old job removal +--------------- + +To avoid the STATEDB filling up with logs of old jobs, a systemd timer +unit will run occasionally to remove jobs so old, nobody cares about +them anymore. To make it easier to experiment with the logic of +choosing what to remove (age only? keep failed ones? something else?) +the removal is kept outside the WEBAPP. + + STATEDB ------- -- cgit v1.2.1 From 58fdfa14e8148e19fe27036eaa2b3e9e55a93887 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:15 +0300 Subject: Handle case when there is no disk usage info Use 0 instead. This was exposed by a test suite change: test suite creates a dummy job, but doesn't fill in all the fields. With this change, WEBAPP doesn't crash when it tries to report a job's information when not all fields are filled in. --- lorrycontroller/showjob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py index 951ad22..bc82bfe 100644 --- a/lorrycontroller/showjob.py +++ b/lorrycontroller/showjob.py @@ -39,7 +39,7 @@ class JobShower(object): 'path': statedb.get_job_path(job_id), 'exit': 'no' if exit is None else exit, 'disk_usage': disk_usage, - 'disk_usage_nice': self.format_bytesize(disk_usage), + 'disk_usage_nice': self.format_bytesize(disk_usage or 0), 'output': output, 'job_started': self.format_time(started), 'job_ended': '' if ended is None else self.format_time(ended), -- cgit v1.2.1 From 883825d7c6d66bc49ed140482453aad3e98edc8a Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:16 +0300 Subject: Add scenario for automatically removing old jobs --- yarns.webapp/040-running-jobs.yarn | 56 +++++++++++++++++++++++++++++++++++ yarns.webapp/900-implementations.yarn | 13 ++++++++ 2 files changed, 69 insertions(+) diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn index 571afd6..cbc8f75 100644 --- a/yarns.webapp/040-running-jobs.yarn +++ b/yarns.webapp/040-running-jobs.yarn @@ -359,3 +359,59 @@ Remove it. Cleanup. FINALLY WEBAPP terminates + + +Remove old terminated jobs with helper program +-------------------------- + +There is a helper program to remove old jobs automatically. + + SCENARIO remove old terminated jobs + +Setup. + + GIVEN a new git repository in CONFGIT + AND an empty lorry-controller.conf in CONFGIT + AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream + AND WEBAPP uses CONFGIT as its configuration directory + AND a running WEBAPP + GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}} + WHEN admin makes request POST /1.0/read-configuration + +Start job 1. We start it a known time of 100, so that we can control +when jobs become old. + + WHEN admin makes request POST /1.0/pretend-time with now=100 + AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123 + THEN response has job_id set to 1 + +Remove old jobs while job 1 is running, still pretending time is 100 +seconds since epoch. This should leave job 1 running. + + WHEN admin removes old jobs at 100 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Finish the job. + + WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0 + WHEN admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Remove old jobs, still at 100 seconds. Job 1 should still remain, as +it just finished. + + WHEN admin removes old jobs at 100 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Let a long time pass, and remove old jobs again. Job 1 should now go +away. + + WHEN admin removes old jobs at 100000000000 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [] + +Cleanup. + + FINALLY WEBAPP terminates diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn index 54a82a4..455a993 100644 --- a/yarns.webapp/900-implementations.yarn +++ b/yarns.webapp/900-implementations.yarn @@ -406,6 +406,19 @@ Some responses are just plain text, so we match them with a regexp. grep "$MATCH_1" "$DATADIR/response.body" +Running the "remove old jobs" helper program +-------------------------------------------- + +Lorry Controller comes with a helper program to remove old jobs from +STATEDB. Tests need to be able to run it. + + IMPLEMENTS WHEN admin removes old jobs at (\d+) + "$SRCDIR/lorry-controller-remove-old-jobs" \ + --log "$DATADIR/remove-old-jobs.log" \ + --webapp-host=127.0.0.1 \ + --webapp-port="$(cat "$DATADIR/webapp.port")" \ + --debug-now="$MATCH_1" + Status web page --------------- -- cgit v1.2.1 From 07c2d2ec50801051af4406f3fa24a1be58a90e51 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:17 +0300 Subject: Add helper for removing old jobs --- lorry-controller-remove-old-jobs | 153 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100755 lorry-controller-remove-old-jobs diff --git a/lorry-controller-remove-old-jobs b/lorry-controller-remove-old-jobs new file mode 100755 index 0000000..1448649 --- /dev/null +++ b/lorry-controller-remove-old-jobs @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import json +import logging +import time +import urllib2 +import urlparse +import contextlib + +import cliapp + + +class JobInfo(object): + + def __init__(self, job_id, exit_code, exit_timestamp): + self.job_id = job_id + self.exit_code = exit_code + self.exit_timestamp = exit_timestamp + + def __repr__(self): + return 'JobInfo(%s,%s,%s)' % ( + self.job_id, self.exit_code, self.exit_timestamp) + + +class OldJobRemover(cliapp.Application): + + def add_settings(self): + self.settings.string( + ['webapp-host'], + 'address of WEBAPP', + default='localhost') + + self.settings.integer( + ['webapp-port'], + 'port of WEBAPP', + default=12765) + + ONE_MINUTE = 60 + ONE_HOUR = 60 * ONE_MINUTE + ONE_DAY = 24 * ONE_HOUR + ONE_YEAR = 365 * ONE_DAY + + self.settings.integer( + ['max-age-in-seconds', 'max-age'], + 'maximum age of a finished job in seconds', + metavar='SECONDS', + default=ONE_YEAR) + + self.settings.integer( + ['debug-now'], + 'for tests and debugging, ' + 'set current time to SECONDS since the epoch ' + '(set to 0 to use real time', + metavar='SECONDS') + + def process_args(self, args): + logging.info('Removing old jobs from Lorry Controller STATEDB') + + job_ids = self.list_jobs() + job_infos = self.get_job_infos(job_ids) + ids_of_jobs_to_remove = self.select_for_removal(job_infos) + self.remove_jobs(ids_of_jobs_to_remove) + + def list_jobs(self): + data = self.get('/1.0/list-jobs') + obj = json.loads(data) + return obj['job_ids'] + + def get(self, path): + url = self.make_url(path) + with contextlib.closing(urllib2.urlopen(url)) as f: + return f.read() + + def make_url(self, path): + scheme = 'http' + netloc = '%s:%s' % ( + self.settings['webapp-host'], self.settings['webapp-port']) + query = None + fragment = None + parts = (scheme, netloc, path, query, fragment) + return urlparse.urlunsplit(parts) + + def get_job_infos(self, job_ids): + job_infos = [] + for job_id in job_ids: + try: + job_infos.append(self.get_job_info(job_id)) + except urllib2.HTTPError as e: + logging.warning( + 'Trouble getting job info for job %s: %s' % + (job_id, str(e))) + return job_infos + + def get_job_info(self, job_id): + data = self.get('/1.0/job/%s' % job_id) + obj = json.loads(data) + exit_code = obj['exit'] + if obj['job_ended']: + exit_timestamp = self.parse_timestamp(obj['job_ended']) + else: + exit_timestamp = None + return JobInfo(job_id, exit_code, exit_timestamp) + + def parse_timestamp(self, timestamp): + return time.mktime(time.strptime(timestamp, '%Y-%m-%d %H:%M:%S UTC')) + + def select_for_removal(self, job_infos): + return [job_info for job_info in job_infos if self.is_old(job_info)] + + def is_old(self, job_info): + if job_info.exit_timestamp is None: + return False + current_time = self.get_current_time() + age_in_seconds = current_time - job_info.exit_timestamp + return age_in_seconds >= self.settings['max-age-in-seconds'] + + def get_current_time(self): + if self.settings['debug-now']: + return self.settings['debug-now'] + return time.time() + + def remove_jobs(self, job_infos): + for job_info in job_infos: + self.remove_job(job_info.job_id) + + def remove_job(self, job_id): + logging.info('Removing job %s', job_id) + self.post('/1.0/remove-job', 'job_id=%s' % job_id) + + def post(self, path, data): + url = self.make_url(path) + f = urllib2.urlopen(url, data) + result = f.read() + f.close() + + +OldJobRemover().run() -- cgit v1.2.1 From 6cee48214c995b4ff720d96b290bc6e761823cde Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:18 +0300 Subject: Install new script --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e214c33..5fc49d2 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,8 @@ setup(name='lorry-controller', author='Codethink Ltd', author_email='baserock-dev@baserock.org', url='http://www.baserock.com/', - scripts=['lorry-controller-webapp', 'lorry-controller-minion'], + scripts=['lorry-controller-webapp', 'lorry-controller-minion', + 'lorry-controller-remove-old-jobs'], packages=['lorrycontroller'], data_files=[ ('share/lorry-controller/templates', glob.glob('templates/*')), -- cgit v1.2.1 From d91b652951cf9203c8e8cd255a0e0a81af6d2591 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 17 Oct 2014 15:27:19 +0300 Subject: Add systemd units to trigger old job removal --- units/lorry-controller-remove-old-jobs.service | 12 ++++++++++++ units/lorry-controller-remove-old-jobs.timer | 8 ++++++++ 2 files changed, 20 insertions(+) create mode 100644 units/lorry-controller-remove-old-jobs.service create mode 100644 units/lorry-controller-remove-old-jobs.timer diff --git a/units/lorry-controller-remove-old-jobs.service b/units/lorry-controller-remove-old-jobs.service new file mode 100644 index 0000000..d1372b4 --- /dev/null +++ b/units/lorry-controller-remove-old-jobs.service @@ -0,0 +1,12 @@ +[Unit] +Description=Lorry Controller remove old jobs +After=lighttpd-lorry-controller-webapp.service + +[Install] +WantedBy=multi-user.target + +[Service] +ExecStart=/usr/bin/lorry-controller-remove-old-jobs +Restart=no +User=lorry +Group=lorry diff --git a/units/lorry-controller-remove-old-jobs.timer b/units/lorry-controller-remove-old-jobs.timer new file mode 100644 index 0000000..508a43f --- /dev/null +++ b/units/lorry-controller-remove-old-jobs.timer @@ -0,0 +1,8 @@ +[Unit] +Description=Lorry Controller remove old jobs + +[Install] +WantedBy=multi-user.target + +[Timer] +OnUnitInactiveSec=60 -- cgit v1.2.1