summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-04-23 16:38:47 +0000
committerLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-04-23 16:38:47 +0000
commit5e2584a7936af5413588232be1f0df7b4cef18c1 (patch)
tree54f30c0282eba421ee720bafd23e0bfe1f8d61ff
parent627c50633977804e6a10bafad81e72ae376b1cf6 (diff)
parentca8eaccfd8c0d89c91873dbfe4f5d33d479d2913 (diff)
downloadlorry-controller-5e2584a7936af5413588232be1f0df7b4cef18c1.tar.gz
Merge branch 'baserock/liw/new-lc-2'
Reviewed by Daniel and Richard on the mailing list, and then my changes based on review feedback by Richard on IRC. Reviewed-by: Daniel Silverstone Reviewed-by: Richard Maw
-rw-r--r--ARCH341
-rw-r--r--README96
-rwxr-xr-xcheck5
-rw-r--r--etc/lighttpd/lorry-controller-webapp-httpd.conf33
-rwxr-xr-xlorry-controller355
-rwxr-xr-xlorry-controller-minion308
-rwxr-xr-xlorry-controller-webapp223
-rw-r--r--lorry-controller.conf20
-rw-r--r--lorry-controller.morph14
-rw-r--r--lorrycontroller/__init__.py36
-rw-r--r--lorrycontroller/confparser.py335
-rw-r--r--lorrycontroller/gitano.py153
-rw-r--r--lorrycontroller/givemejob.py128
-rw-r--r--lorrycontroller/htmlstatus.py286
-rw-r--r--lorrycontroller/jobupdate.py76
-rw-r--r--lorrycontroller/listjobs.py63
-rw-r--r--lorrycontroller/listqueue.py33
-rw-r--r--lorrycontroller/listrunningjobs.py34
-rw-r--r--lorrycontroller/lstroves.py217
-rw-r--r--lorrycontroller/maxjobs.py55
-rw-r--r--lorrycontroller/movetopbottom.py58
-rw-r--r--lorrycontroller/pretendtime.py42
-rw-r--r--lorrycontroller/proxy.py51
-rw-r--r--lorrycontroller/readconf.py351
-rw-r--r--lorrycontroller/removejob.py44
-rw-r--r--lorrycontroller/route.py45
-rw-r--r--lorrycontroller/showjob.py83
-rw-r--r--lorrycontroller/showlorry.py86
-rw-r--r--lorrycontroller/startstopqueue.py55
-rw-r--r--lorrycontroller/statedb.py581
-rw-r--r--lorrycontroller/static.py36
-rw-r--r--lorrycontroller/status.py162
-rw-r--r--lorrycontroller/stopjob.py41
-rw-r--r--lorrycontroller/workingstate.py127
-rw-r--r--setup.py18
-rw-r--r--static/style.css18
-rw-r--r--templates/job.tpl20
-rw-r--r--templates/list-jobs.tpl32
-rw-r--r--templates/lorry.tpl44
-rw-r--r--templates/status.tpl113
-rwxr-xr-xtest-wait-for-port40
-rw-r--r--units/lighttpd-lorry-controller-webapp.service12
-rw-r--r--units/lorry-controller-ls-troves.service8
-rw-r--r--units/lorry-controller-ls-troves.timer6
-rw-r--r--units/lorry-controller-minion@.service9
-rw-r--r--units/lorry-controller-readconf.service8
-rw-r--r--units/lorry-controller-readconf.timer6
-rw-r--r--units/lorry-controller-status.service9
-rw-r--r--units/lorry-controller-status.timer6
-rw-r--r--yarns.webapp/010-introduction.yarn77
-rw-r--r--yarns.webapp/020-status.yarn27
-rw-r--r--yarns.webapp/030-queue-management.yarn106
-rw-r--r--yarns.webapp/040-running-jobs.yarn260
-rw-r--r--yarns.webapp/050-troves.yarn76
-rw-r--r--yarns.webapp/060-validation.yarn237
-rw-r--r--yarns.webapp/900-implementations.yarn442
-rw-r--r--yarns.webapp/yarn.sh122
57 files changed, 5131 insertions, 1138 deletions
diff --git a/ARCH b/ARCH
new file mode 100644
index 0000000..d6199bf
--- /dev/null
+++ b/ARCH
@@ -0,0 +1,341 @@
+% Architecture of daemonised Lorry Controller
+% Codethink Ltd
+
+Introduction
+============
+
+This is an architecture document for Lorry Controller. It is aimed at
+those who develop the software.
+
+Lorry is a tool in Baserock for mirroring code from whatever format
+upstream provides it into git repositories, converting them to git as
+needed. Lorry Controller is service, running on a Trove, which runs
+Lorry against all configured upstreams, including other Troves.
+
+Lorry Controller reads a configuration from a git repository. That
+configuration includes specifications of which upstreams to
+mirror/convert. This includes what upstream Troves to mirror. Lorry
+Controller instructs Lorry to push to a Trove's git repositories.
+
+Lorry specifications, and upstream Trove specifications, may include
+scheduling information, which the Lorry Controller uses to decide when
+to execute which specification.
+
+Requirements
+============
+
+Some concepts/terminology:
+
+* CONFGIT is the git repository the Lorry Controller instance uses for
+ its configuration.
+* Lorry specification: which upstream version control repository or
+ tarball to mirror.
+* Trove specification: which upstream Trove to mirror. This gets
+ broken into generated Lorry specifications, one per git repository
+ on the upstream Trove. There can be many Trove specifications to
+ mirror many Troves.
+* job: An instance of executing a Lorry specification. Each job has an
+ identifier and associated data (such as the output provided by the
+ running job, and whether it succeeded).
+* run queue: all the Lorry specifications (from CONFGIT or generated
+ from the Troe specifications) a Lorry Controller knows about; this
+ is the set of things that get scheduled. The queue has a linear
+ order (first job in the queue is the next job to execute).
+* admin: a person who can control or reconfigure a Lorry Controller
+ instance.
+
+Original set of requirement, which have been broken down and detailed
+up below:
+
+* Lorry Controller should be capable of being reconfigured at runtime
+ to allow new tasks to be added and old tasks to be removed.
+ (RC/ADD, RC/RM, RC/START)
+* Lorry Controller should not allow all tasks to become stuck if one
+ task is taking a long time. (RR/MULTI)
+* Lorry Controller should not allow stuck tasks to remain stuck
+ forever. (Configurable timeout? monitoring of disk usage or CPU to
+ see if work is being done?) (RR/TIMEOUT)
+* Lorry Controller should be able to be controlled at runtime to allow:
+ - Querying of the current task set (RQ/SPECS, RQ/SPEC)
+ - Querying of currently running tasks (RQ/RUNNING)
+ - Promotion or demotion of a task in the queue (RT/TOP, RT/BOT)
+ - Supporting of the health monitoring to allow appropriate alerts
+ to be sent out (MON/STATIC, MON/DU)
+
+The detailed requirements (prefixed by a unique identfier, which is
+used elsewhere to refer to the exact requirement):
+
+* (FW) Lorry Controller can access upstream Troves from behind firewalls.
+ * (FW/H) Lorry Controller can access the upstream Trove using HTTP or
+ HTTPS only, without using ssh, in order to get a list of
+ repositories to mirror. (Lorry itself also needs to be able to
+ access the upstream Trove using HTTP or HTTPS only, bypassing
+ ssh, but that's a Lorry problem and outside the scope of Lorry
+ Controller, so it'll need to be dealt separately.)
+ * (FW/C) Lorry Controller does not verify SSL/TLS certificates
+ when accessing the upstream Trove.
+* (RC) Lorry Controller can be reconfigured at runtime.
+ * (RC/ADD) A new Lorry specification can be added to CONFGIT, and
+ a running Lorry Controller will add them to its run queue as
+ soon as it is notified of the change.
+ * (RC/RM) A Lorry specification can be removed from CONFGIT, and a
+ running Lorry Controller will remove it from its run queue as
+ soon as it is notified of the change.
+ * (RC/START) A Lorry Controller reads CONFGIT when it starts,
+ updating its run queue if anything has changed.
+* (RT) Lorry Controller can controlled at runtime.
+ * (RT/KILL) An admin can get their Lorry Controller to stop a running job.
+ * (RT/TOP) An admin can get their Lorry Controller to move a Lorry spec to
+ the beginning of the run queue.
+ * (RT/BOT) An admin can get their Lorry Controller to move a Lorry
+ spec to the end of the run queue.
+ * (RT/QSTOP) An admin can stop their Lorry Controller from scheduling any new
+ jobs.
+ * (RT/QSTART) An admin can get their Lorry Controller to start
+ scheduling jobs again.
+* (RQ) Lorry Controller can be queried at runtime.
+ * (RQ/RUNNING) An admin can list all currently running jobs.
+ * (RQ/ALLJOBS) An admin can list all finished jobs that the Lorry
+ Controller still remembers.
+ * (RQ/SPECS) An admin can list all existing Lorry specifications
+ in the run queue.
+ * (RQ/SPEC) An admin can query existing Lorry specifications in
+ the run queue for any information the Lorry Controller holds for
+ them, such as the last time they successfully finished running.
+* (RR) Lorry Controller is reasonably robust.
+ * (RR/CONF) Lorry Controller ignores any broken Lorry or Trove
+ specifications in CONFGIT, and runs without them.
+ * (RR/TIMEOUT) Lorry Controller stops a job that runs for too
+ long.
+ * (RR/MULTI) Lorry Controller can run multiple jobs at the same
+ time, and lets the maximal number of such jobs be configured by
+ the admin.
+ * (RR/DU) Lorry Controller (and the way it runs Lorry) is
+ designed to be frugal about disk space usage.
+ * (RR/CERT) Lorry Controller tells Lorry to not worry about
+ unverifiable SSL/TLS certificates and to continue even if the
+ certificate can't be verified or the verification fails.
+* (RS) Lorry Controller is reasonably scalable.
+ * (RS/SPECS) Lorry Controller works for the number of Lorry
+ specifications we have on git.baserock.org (a number that will
+ increase, and is currently about 500).
+ * (RS/GITS) Lorry Controller works for mirroring git.baserock.org
+ (about 500 git repositories).
+ * (RS/HW) Lorry Controller may assume that CPU, disk, and
+ bandwidth are sufficient, if not to be needlessly wasted.
+* (MON) Lorry Controller can be monitored from the outside.
+ * (MON/STATIC) Lorry Controller updates at least once a minute a
+ static HTML file, which shows its current status with sufficient
+ detail that an admin knows if things get stuck or break.
+ * (MON/DU) Lorry Controller measures, at least, the disk usage of
+ each job and Lorry specification.
+* (SEC) Lorry Controller is reasonably secure.
+ * (SEC/API) Access to the Lorry Controller run-time query and
+ controller interfaces is managed with iptables (for now).
+ * (SEC/CONF) Access to CONFGIT is managed by the git server that
+ hosts it. (Gitano on Trove.)
+
+Architecture design
+===================
+
+Constraints
+-----------
+
+Python is not good at multiple threads (partly due to the global
+interpreter lock), and mixing threads and executing subprocesses is
+quite tricky to get right in general. Thus, this design avoids using
+threads.
+
+Entities
+--------
+
+* An admin is a human being that communicates with the Lorry
+ Controller using an HTTP API. They might do it using a command line
+ client.
+* Lorry Controller runs Lorry appropriately, and consists of several
+ components described below.
+* The local Trove is where Lorry Controller tells its Lorry to push
+ the results.
+* Upstream Trove is a Trove that Lorry Controller mirrors to the local
+ Trove. There can be multiple upstream Troves.
+
+Components of Lorry Controller
+------------------------------
+
+* CONFGIT is a git repository for Lorry Controller configuration,
+ which the Lorry Controller can access and pull from. Pushing is not
+ required and should be prevented by Gitano. CONFGIT is hosted on the
+ local Trove.
+* STATEDB is persistent storage for the Lorry Controller's state: what
+ Lorry specs it knows about (provided by the admin, or generated from
+ a Trove spec by Lorry Controller itself), their ordering, jobs that
+ have been run or are being run, information about the jobs, etc.
+ The idea is that the Lorry Controller process can terminate (cleanly
+ or by crashing), and be restarted, and continue approximately where
+ it was. Also, a persistent storage is useful if there are multiple
+ processes involved due to how bottle.py and WSGI work. STATEDB is
+ implemented using sqlite3.
+* WEBAPP is the controlling part of Lorry Controller, which maintains
+ the run queue, and provides an HTTP API for monitoring and
+ controller Lorry Controller. WEBAPP is implemented as a bottle.py
+ application.
+* MINION runs jobs (external processes) on behalf of WEBAPP. It
+ communicates with WEBAPP over HTTP, and requests a job to run,
+ starts it, and while it waits, sends partial output to the WEBAPP,
+ and asks the WEBAPP whether the job should be aborted or not. MINION
+ may eventually run on a different host than WEBAPP, for added
+ scalability.
+
+Components external to Lorry Controller
+---------------------------------------
+
+* A web server. This runs the Lorry Controller WEBAPP, using WSGI so
+ that multiple instances (processes) can run at once, and thus serve
+ many clients.
+* bottle.py is a Python microframework for web applications. We
+ already have it in Baserock, where we use it for morph-cache-server,
+ and it seems to be acceptable.
+* systemd is the operating system component that starts services and
+ processes.
+
+How the components work together
+--------------------------------
+
+* Each WEBAPP instance is started by the web server, when a request
+ comes in. The web server is started by a systemd unit.
+* Each MINION instance is started by a systemd unit. Each MINION
+ handles one job at a time, and doesn't block other MINIONs from
+ running other jobs. The admins decide how many MINIONs run at once,
+ depending on hardware resources and other considerations. (RR/MULTI)
+* An admin communicates with the WEBAPP only, by making HTTP requests.
+ Each request is either a query (GET) or a command (POST). Queries
+ report state as stored in STATEDB. Commands cause the WEBAPP
+ instance to do something and alter STATEDB accordingly.
+* When an admin makes changes to CONFGIT, and pushes them to the local
+ Trove, the Trove's git post-update hook makes an HTTP request to
+ WEBAPP to update STATEDB from CONFGIT. (RC/ADD, RC/RM)
+* Each MINION likewise communicates only with the WEBAPP using HTTP
+ requests. MINION requests a job to run (which triggers WEBAPP's job
+ scheduling), and then reports results to the WEBAPP (which causes
+ WEBAPP to store them in STATEDB), which tells MINION whether to
+ continue running the job or not (RT/KILL). There is no separate
+ scheduling process: all scheduling happens when there is a MINION
+ available.
+* At system start up, a systemd unit makes an HTTP request to WEBAPP
+ to make it refresh STATEDB from CONFGIT. (RC/START)
+* A timer unit for systemd makes an HTTP request to get WEBAPP to
+ refresh the static HTML status page. (MON/STATIC)
+
+In summary: systemd starts WEBAPP and MINIONs, and whenever a
+MINION can do work, it asks WEBAPP for something to do, and reports
+back results. Meanwhile, admin can query and control via HTTP requests
+to WEBAPP, and WEBAPP instances communicate via STATEDB.
+
+The WEBAPP
+----------
+
+The WEBAPP provides an HTTP API as described below.
+
+Requests for admins:
+
+* `GET /1.0/status` causes WEBAPP to return a JSON object that
+ describes the state of Lorry Controller. This information is meant
+ to be programmatically useable and may or may not be the same as in
+ the HTML page.
+* `POST /1.0/stop-queue` causes WEBAPP to stop scheduling new jobs to
+ run. Any currently running jobs are not affected. (RT/QSTOP)
+* `POST /1.0/start-queue` causes WEBAPP to start scheduling jobs
+ again. (RT/QSTART)
+
+* `GET /1.0/list-queue` causes WEBAPP to return a JSON list of ids of
+ all Lorry specifications in the run queue, in the order they are in
+ the run queue. (RQ/SPECS)
+* `GET /1.0/lorry/<lorryspecid>` causes WEBAPP to return a JSON map
+ (dict) with all the information about the specified Lorry
+ specification. (RQ/SPEC)
+* `POST /1.0/move-to-top/<lorryspecid>` where `lorryspecid` is the id
+ of a Lorry specification in the run queue, causes WEBAPP to move the
+ specified spec to the head of the run queue, and store this in
+ STATEDB. It doesn't affect currently running jobs. (RT/TOP)
+* `POST /1.0/move-to-bottom/<lorryspecid>` is like `/move-to-top`, but
+ moves the job to the end of the run queue. (RT/BOT)
+
+* `GET /1.0/list-running-jobs` causes WEBAPP to return a JSON list of
+ ids of all currently running jobs. (RQ/RUNNING)
+* `GET /1.0/job/<jobid>` causes WEBAPP to return a JSON map (dict)
+ with all the information about the specified job.
+* `POST /1.0/stop-job/<jobid>` where `jobid` is an id of a running job,
+ causes WEBAPP to record in STATEDB that the job is to be killed, and
+ waits for it to be killed. (Killing to be done when MINION gets
+ around to it.) This request returns as soon as the STATEDB change is
+ done.
+* `GET /1.0/list-all-jobs` causes WEBAPP to return a JSON list of ids
+ of all jobs, running or finished, that it knows about. (RQ/ALLJOBS)
+
+Requests for MINION:
+
+* `GET /1.0/give-me-job` is used by MINION to get a new job to run.
+ WEBAPP will either return a JSON object describing the job to run,
+ or return a status code indicating that there is nothing to do.
+ WEBAPP will respond immediately, even if there is nothing for MINION
+ to do, and MINION will then sleep for a while before it tries again.
+ WEBAPP updates STATEDB to record that the job is allocated to a
+ MINION.
+* `POST /1.0/job-update` is used by MINION to push updates about the
+ job it is running to WEBAPP. The body is a JSON object containing
+ additional information about the job, such as data from its
+ stdout/stderr, and current resource usage. There MUST be at least
+ one `job-update` call, which indicates the job has terminated.
+ WEBAPP responds with a status indicating whether the job should
+ continue to run or be terminated (RR/TIMEOUT). WEBAPP records the
+ job as terminated only after MINION tells it the job has been
+ terminated. MINION makes the `job-update` request frequently, even
+ if the job has produced no output, so that WEBAPP can update a
+ timestamp in STATEDB to indicate the job is still alive.
+
+Other requests:
+
+* `POST /1.0/read-configuration` causes WEBAPP to update its copy of
+ CONFGIT and update STATEDB based on the new configuration, if it has
+ changed. Returns OK/ERROR status. (RC/ADD, RC/RM, RC/START)
+* `GET /1.0/status-html` causes WEBAPP to return an HTML page that
+ describes the state of Lorry Controller. This also updates an
+ on-disk copy of the HTML page, which the web server is configured to
+ serve using a normal HTTP request. (MON/STATIC)
+
+The MINION
+----------
+
+* Do `GET /1.0/give-me-job` to WEBAPP.
+* If didn't get a job, sleep a while and try again.
+* If did get job, fork and exec that.
+* In a loop: wait for output, for a suitably short period of time,
+ from job (or its termination), with `select` or similar mechanism,
+ and send anything (if anything) you get to WEBAPP. If the WEBAPP
+ told us to kill the job, kill it, then send an update to that effect
+ to WEBAPP.
+* Go back to top to request new job.
+
+STATEDB
+-------
+
+The STATEDB has several tables. This section explains them.
+
+The `running_queue` table has a single column (`running`) and a single
+row, and is used to store a single boolean value that specifies
+whether WEBAPP is giving out jobs to run from the run-queue. This
+value is controlled by `/1.0/start-queue` and `/1.0/stop-queue`
+requests.
+
+The `lorries` table implements the run-queue: all the Lorry specs that
+WEBAPP knows about. It has the following columns:
+
+* `path` is the path of the git repository on the local Trove, i.e.,
+ the git repository to which Lorry will push. This is a unique
+ identifier. It is used, for example, to determine if a Lorry spec
+ is obsolete after a CONFGIT update.
+* `text` has the text of the Lorry spec. This may be read from a file
+ or generated by Lorry Controller itself. This text will be given to
+ Lorry when a job is run.
+* `generated` is set to 0 or 1, depending on if the Lorry came from an
+ actual `.lorry` file or was generated by Lorry Controller.
diff --git a/README b/README
index b6b2662..ea3059f 100644
--- a/README
+++ b/README
@@ -1,8 +1,96 @@
README for lorry-controller
===========================
-Notes
------
+Lorry Controller mirrors Troves and mirrors or converts upstream
+projects into git repositories on the local Trove. Lorry Controller
+reads a configuration file (see below) and runs the Lorry program
+against the intended targets at suitable intervals.
-The lorry-controller.conf here is an example starting config for any customer
-configuration. It is a single trove instance pointing at git.baserock.org.
+Lorry Controller configuration
+------------------------------
+
+Lorry Controller reads a configuration file of the following format.
+
+* The file uses JSON syntax.
+* The file is a list.
+* Each item in the list is a mapping specifying a Trove or a set of
+ `.lorry` files.
+* A Trove spec specifies another Trove to mirror completely.
+* A Lorry spec specifies a set of Lorry specification (`.lorry` files)
+ for individual project repositories to convert or mirror to git on
+ the local Trove.
+
+Each spec (mapping) has a number of key/value pairs. The following are
+shared between Trove and Lorry specs:
+
+* `type` is the type of the spec; value MUST be either `trove` or
+ `lorries`.
+* `interval` specifies how often Lorry Controller should mirror the
+ repositories in the spec. See below for INTERVAL.
+* `protocol`: specifies how Lorry Controller (and Lorry) should talk
+ to remove Troves. Allowed values are `ssh`, `https`, `http`. This
+ field is mandatory.
+* `auth`: Specifies how to authenticate to the remote Trove over
+ https. The is an optional field. If present, it should be a
+ dictionary with the fields `username` and `password`.
+* Additionally, the following seem to be supported by an old version
+ of Lorry Controller, but are ignored by the new Lorry Controller:
+ `uuid`, `serial`, `create`, `destroy`, `stagger`, `tarball`.
+
+Trove specs have the following keys:
+
+* `trovehost` is the other Trove to mirror; a domain name or IP
+ address. It is mandatory.
+* `ls-interval` determines how often should Lorry Controller query the
+ other Trove for a list of repositories it may mirror. See below for
+ INTERVAL. `ls-interval` is mandatory.
+* `prefixmap` maps repository path prefixes from the other Trove to
+ the local Trove. It is mandatory in a Trove spec. If the remote
+ prefix is `foo`, and the local prefix is `bar`, then remote
+ repository `foo/baserock/yeehaa` gets mirrored to local repository
+ `bar/baserock/yeehaa`. If the remote Trove has a repository that
+ does not match a prefix, that repository gets ignored.
+* `ignore` is a list of git repositories from the other Trove that
+ should NOT be mirrored. Each list element is a path to the git
+ repository (not including leading slash). `ignore` is optional.
+
+An INTERVAL value (for `interval` or `ls-interval`) is number and a
+unit to indicate a time interval. Allowed units are minutes (`m`),
+hours (`h`), and days (`d`), expressed as single-letter codes in upper
+or lower case.
+
+Lorry specs have the following keys:
+
+* `prefix` is a path prefix to be prepended to all repositories
+ created from the `.lorry` files from this spec. It is mandatory.
+* `globs` is a list of globs (as strings) for `.lorry` files to use.
+ The glob is matched in the directory containing the configuration
+ file in which this spec is. It is OK for the globs to not match
+ anything. A `globs` entry is mandatory, however.
+
+A fairly minimal example for mirroring `git.baserock.org` and using
+local `.lorry` files.
+
+ [
+ {
+ "type": "trove",
+ "trovehost": "git.baserock.org",
+ "ls-interval": "4d",
+ "interval": "2h",
+ "prefixmap": {
+ "baserock": "baserock",
+ "delta": "delta"
+ }
+ },
+ {
+ "type": "lorries",
+ "interval": "1h",
+ "prefix": "delta",
+ "globs": [
+ "open-source-lorries/*.lorry"
+ ]
+ }
+ ]
+
+The syntax of `.lorry` files is specified by the Lorry program; see
+its documentation for details.
diff --git a/check b/check
new file mode 100755
index 0000000..b449f2b
--- /dev/null
+++ b/check
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -eu
+
+yarn -s yarns.webapp/yarn.sh yarns.webapp/*.yarn --env PYTHONPATH="$PYTHONPATH" "$@"
diff --git a/etc/lighttpd/lorry-controller-webapp-httpd.conf b/etc/lighttpd/lorry-controller-webapp-httpd.conf
new file mode 100644
index 0000000..ec92783
--- /dev/null
+++ b/etc/lighttpd/lorry-controller-webapp-httpd.conf
@@ -0,0 +1,33 @@
+server.document-root = "/var/www/htdocs"
+
+server.port = 12765
+
+server.username = "lorry"
+server.groupname = "lorry"
+
+server.modules += ("mod_cgi", "mod_fastcgi")
+
+mimetype.assign = (
+ ".html" => "text/html",
+ ".txt" => "text/plain",
+ ".jpg" => "image/jpeg",
+ ".png" => "image/png",
+ ".css" => "text/css"
+)
+
+$SERVER["socket"] == ":12765" {
+ server.username = "lorry"
+ server.groupname = "lorry"
+ fastcgi.server = (
+ "" =>
+ (
+ "python-fcgi" =>
+ (
+ "socket" => "/run/lighttpd-lorry/lorry-controller-webapp.socket",
+ "bin-path" => "/usr/bin/lorry-controller-webapp --config=/etc/lorry-controller/webapp.conf",
+ "check-local" => "disable",
+ "max-procs" => 16,
+ )
+ )
+ )
+}
diff --git a/lorry-controller b/lorry-controller
deleted file mode 100755
index 0ae4ceb..0000000
--- a/lorry-controller
+++ /dev/null
@@ -1,355 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2013 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-import cliapp
-import json
-import logging
-import os
-import time
-import re
-import urllib
-import urllib2
-
-
-from lorrycontroller.confparser import LorryControllerConfig
-from lorrycontroller.workingstate import WorkingStateManager
-from lorrycontroller.htmlstatus import HTMLStatusManager
-
-
-defaults = {
- 'work-area': '/home/lorry/controller-area',
- 'config-name': 'lorry-controller.conf',
- 'lorry': 'lorry',
-}
-
-
-token_finder = re.compile("([0-9a-f]{40})")
-
-
-class LorryController(cliapp.Application):
-
- def add_settings(self):
- self.settings.string(['work-area'],
- 'path to the area for the controller to work in',
- metavar='PATH',
- default=defaults['work-area'])
- self.settings.boolean(['dry-run'],
- "do a dry-run and don't actually do anything "
- "beyond updating the git tree",
- default=False)
- self.settings.string(['lorry'],
- 'path to the lorry binary to use',
- metavar='LORRY',
- default=defaults['lorry'])
- self.settings.string(['config-name'],
- 'configuration leafname. Defaults to '
- 'lorry-controller.conf',
- metavar='CONFNAME',
- default=defaults['config-name'])
- self.settings.boolean(['lorry-verbose'],
- 'Whether to pass --verbose to lorry',
- default=False)
- self.settings.string(['lorry-log'],
- 'Log file name for lorry if wanted',
- metavar='LORRYLOG',
- default=None)
- self.settings.string(['html-file'],
- 'HTML filename for lorry controller status',
- metavar='HTMLFILE',
- default=None)
-
- def process_args(self, args):
- logging.info("Starting to control lorry")
- try:
- os.chdir(self.settings['work-area'])
- except OSError, e:
- logging.error("Unable to chdir() to %s" %
- self.settings['work-area'])
- raise SystemExit(2)
- if not os.path.isdir("git"):
- logging.error("Unable to find git checkout")
- raise SystemExit(3)
- if not os.path.isdir("work"):
- os.mkdir("work")
-
- logging.info("Updating configuration checkout")
- self.rungit(['remote', 'update', 'origin'])
- self.rungit(['reset', '--hard', 'origin/master'])
- self.rungit(['clean', '-fdx'])
-
- self.lorrycmd=[self.settings['lorry']]
- if self.settings['lorry-verbose']:
- self.lorrycmd += ["--verbose"]
- if self.settings['lorry-log'] is not None:
- self.lorrycmd += ["--log", self.settings['lorry-log']]
-
- if not os.path.exists(os.path.join('git',
- self.settings['config-name'])):
- logging.error("Unable to find lorry-controller.conf in git")
- raise SystemExit(4)
-
- if os.path.isfile('git/proxy.conf'):
- self.set_proxy('git/proxy.conf')
- logging.info('Loaded proxy information')
- self.conf = LorryControllerConfig(self, 'git/lorry-controller.conf')
- self.html = HTMLStatusManager(self)
- if self.settings['dry-run']:
- self.html.series = 0
- self.html.write_out_status()
- self.conf.parse_config()
-
- with WorkingStateManager(self) as mgr:
- # Update any troves
- self.html.set_mgr(mgr)
- self.html.bump_state()
- self.conf.update_troves(mgr)
- prev_lorries = set(mgr.lorry_state.keys())
- cur_lorries = set(self.conf.lorries.keys())
- logging.info("Starting processing. Previously %d lorries "
- "were handled. We currently have %d defined." % (
- len(prev_lorries), len(cur_lorries)))
-
- # 1. Handle deletes for any old lorries we no longer want
- self.html.bump_state()
- logging.info("Delete any old lorries...")
- for dead_lorry in prev_lorries - cur_lorries:
- self.html.set_processing(dead_lorry)
- logging.info("Dead lorry: %s" % dead_lorry)
- conf_uuid = mgr.lorry_state[dead_lorry]['conf']
- if conf_uuid in self.conf.configs:
- should_delete = self.conf.configs[conf_uuid]['destroy']
- else:
- # Could not find UUID in config, switch to 'never'
- should_delete = "never"
- want_destroy = (should_delete == "always")
- if should_delete == "unchanged":
- exit, out, err = self.maybe_runcmd(
- ['git', 'ls-remote', 'ssh://git@localhost/%s.git' %
- dead_lorry], dry=True)
- if exit != 0:
- logging.error("Unable to ls-remote to decide if "
- "unchanged. Assuming it is changed.")
- else:
- logging.debug("TODO: Should decide if unchanged!")
-
- if want_destroy:
- exit, out, err = self.maybe_runcmd(['ssh', 'git@localhost',
- 'destroy', dead_lorry],
- dry=True)
- if exit != 0:
- logging.error("Unable to destroy %s" % dead_lorry)
- else:
- token = token_finder.match(out).group(1)
- exit, out, err = self.maybe_runcmd(
- ['ssh', 'git@localhost', 'destroy', dead_lorry,
- token])
- if exit != 0:
- logging.error("Unable to destroy %s despite having"
- " the token %s" %
- (dead_lorry, token))
- else:
- logging.debug("Destroyed")
- del mgr.lorry_state[dead_lorry]
-
- # 2. Handle creates for any new lorries we now want
- self.html.bump_state()
- logging.info("Create any new lorries...")
- for new_lorry in cur_lorries - prev_lorries:
- self.html.set_processing(new_lorry)
- logging.info("New lorry: %s" % new_lorry)
- lorry = self.conf.lorries[new_lorry]
- conf_uuid = lorry['controller-uuid']
- conf = self.conf.configs[conf_uuid]
- nextdue = self.conf.duetimes[new_lorry]
- # Make new lorries overdue.
- nextdue -= conf['interval-parsed']
- should_create = conf['create'] == "always"
- store_state = True
- if should_create:
- exit, out, err = self.maybe_runcmd(["ssh", "git@localhost",
- "create", new_lorry])
- if exit != 0:
- if ' already exists' in err:
- logging.warn("Repository %s already exists" %
- new_lorry)
- else:
- logging.error("Unable to create repository %s" %
- new_lorry)
- logging.error(err)
- store_state = False
- if store_state:
- self.maybe_runcmd(["ssh", "git@localhost", "set-head",
- new_lorry, lorry['source-HEAD']])
- mgr.lorry_state[new_lorry] = {
- 'destroy': conf['destroy'],
- 'conf': conf_uuid,
- 'lorry': lorry,
- 'next-due': nextdue,
- }
- else:
- # Remove this from cur_lorries so we don't run it
- cur_lorries.remove(new_lorry)
-
- # 3. For every lorry we have, update the settings if necessary.
- # and reset the next-due as appropriate.
- self.html.bump_state()
- logging.info("Update active lorry configurations...")
- updated_count = 0
- for upd_lorry in cur_lorries:
- if mgr.lorry_state[upd_lorry]['lorry'] != \
- self.conf.lorries[upd_lorry]:
- lorry = self.conf.lorries[upd_lorry]
- old_lorry = mgr.lorry_state[upd_lorry]["lorry"]
- if lorry["source-HEAD"] != \
- old_lorry.get("source-HEAD", "refs/heads/master"):
- self.maybe_runcmd(['ssh', 'git@localhost', 'set-head',
- upd_lorry, lorry["source-HEAD"]])
- conf_uuid = lorry['controller-uuid']
- conf = self.conf.configs[conf_uuid]
- nextdue = self.conf.duetimes[upd_lorry]
- mgr.lorry_state[upd_lorry] = {
- 'destroy': conf['destroy'],
- 'conf': conf_uuid,
- 'lorry': lorry,
- 'next-due': nextdue,
- }
- updated_count += 1
- logging.info("Result: %d/%d lorries needed updating" % (
- updated_count, len(cur_lorries)))
-
- # 3. Iterate all active lorries and see if they're due
- logging.info("Iterate active lorries looking for work...")
- now = time.time()
- lorried = 0
- earliest_due = None
- what_early_due = ""
- lorries_to_run = []
- for lorry in cur_lorries:
- state = mgr.lorry_state[lorry]
- conf_uuid = state['conf']
- conf = self.conf.configs[conf_uuid]
- due = state['next-due']
- if now >= due:
- lorries_to_run.append(lorry)
- lorries_to_run.sort()
- for lorry in lorries_to_run:
- state = mgr.lorry_state[lorry]
- conf_uuid = state['conf']
- conf = self.conf.configs[conf_uuid]
- due = state['next-due']
- lorried += 1
- logging.info("Running %d/%d. Lorrying: %s" % (
- lorried, len(lorries_to_run),lorry))
- self.html.set_processing(lorry)
- # Before we run lorry, make sure that Git doesn't verify
- # SSL certificates. This is a workaround for the fact that
- # we don't yet have a solution for proper SSL certificates
- # in Trove yet.
- os.environ['GIT_SSL_NO_VERIFY'] = 'true'
- with mgr.runner(lorry) as runner:
- runner.run_lorry(*self.lorrycmd)
- while state['next-due'] <= now:
- state['next-due'] += conf['interval-parsed']
-
- for lorry in cur_lorries:
- state = mgr.lorry_state[lorry]
- due = state['next-due']
- if earliest_due is None or due < earliest_due:
- earliest_due = due
- what_early_due = lorry
-
- if earliest_due is None:
- logging.info("Lorried %d. No idea what's next." % lorried)
- else:
- logging.info("Lorried %d. %s due in %d seconds" % (
- lorried, what_early_due, int(earliest_due - now)))
- logging.info("All done.")
- self.html.bump_state()
-
- def rungit(self, args):
- self.runcmd(['git']+args, cwd=os.path.join(self.settings['work-area'],
- 'git'))
-
- def maybe_http_request(self, url, auth=None, dry=False):
- """If not a dry run, make an HTTP request and return its output."""
- if (not self.settings['dry-run']) or dry:
- return self.http_request(url, auth)
- else:
- logging.debug('DRY-RUN: Not sending a request to %s' % url)
- return 0, 'DRY-RUN', 'DRY-RUN'
-
- def maybe_runcmd(self, cmdline, dry=False, *args, **kwargs):
- if (not self.settings['dry-run']) or dry:
- return self.runcmd_unchecked(cmdline, *args, **kwargs)
- else:
- logging.debug("DRY-RUN: Not running %r" % cmdline)
- return 0, 'DRY-RUN', 'DRY-RUN'
-
- def http_request(self, url, auth=None):
- """Make an HTTP request to the given url, return the output.
-
- Make an HTTP request to `url`. If the request succeeds (response code
- 200) then return an exit code 0, the data from the response and the
- response code. Otherwise return the response code, any data in the
- repsonse and a string containing the response code.
-
- """
- request = urllib2.Request(url, None, {})
- if auth:
- password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
- password_mgr.add_password(
- None, url, auth['username'], auth['password'])
- auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
- opener = urllib2.build_opener(auth_handler)
- response = opener.open(url)
- else:
- response = urllib2.urlopen(request)
- code = response.getcode()
- if code == 200:
- return 0, response.read(), '200'
- else:
- return code, response.read(), str(code)
-
- def set_proxy(self, proxy_def):
- """Tell urllib2 to use a proxy for http action by lorry-controller.
-
- Load the proxy information from the JSON file given by proxy_def, then
- set urllib2's url opener to open urls via an authenticated proxy.
-
- """
- with open(proxy_def, 'r') as proxy_info:
- proxy = json.load(proxy_info)
-
- # set the required environment variables
- hostname = urllib.quote(proxy['hostname'])
- user = '%s:%s' % (proxy['username'], proxy['password'])
- url = '%s:%s' % (hostname, proxy['port'])
- os.environ['http_proxy'] = 'http://%s@%s' % (user, url)
- os.environ['https_proxy'] = 'https://%s@%s' % (user, url)
-
- # create a ProxyHandler
- proxies = {'http_proxy': 'http://%s@%s' % (user, url),
- 'https_proxy': 'https://%s@%s' % (user, url)}
- proxy_handler = urllib2.ProxyHandler(proxies)
-
- # install an opener to use the proxy
- opener = urllib2.build_opener(proxy_handler)
- urllib2.install_opener(opener)
-
-if __name__ == '__main__':
- LorryController(version='1').run()
diff --git a/lorry-controller-minion b/lorry-controller-minion
new file mode 100755
index 0000000..c269645
--- /dev/null
+++ b/lorry-controller-minion
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import fcntl
+import httplib
+import json
+import logging
+import os
+import platform
+import random
+import select
+import subprocess
+import tempfile
+import time
+import urllib
+
+import cliapp
+
+import lorrycontroller
+
+
+class WEBAPPError(Exception):
+
+ def __init__(self, status, reason, body):
+ Exception.__init__(
+ self, 'WEBAPP returned %s %s:\n%sbody' % (status, reason, body))
+
+
+class MINION(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['webapp-host'],
+ 'address of WEBAPP',
+ default='localhost')
+
+ self.settings.integer(
+ ['webapp-port'],
+ 'port of WEBAPP',
+ default=80)
+
+ self.settings.integer(
+ ['webapp-timeout'],
+ 'how long to wait for an HTTP response from WEBAPP (in seconds)',
+ default=10)
+
+ self.settings.integer(
+ ['sleep'],
+ 'do nothing for this long if there is no new job available '
+ '(0 for random 30..60 s)',
+ default=0)
+
+ self.settings.string(
+ ['lorry-cmd'],
+ 'run CMD as argv0 instead of lorry '
+ '(args will be added as for lorry)',
+ metavar='CMD',
+ default='lorry')
+
+ self.settings.string(
+ ['lorry-working-area'],
+ 'where will Lorry put its files?',
+ metavar='DIR',
+ default='/home/lorry/working-area')
+
+ self.settings.string(
+ ['proxy-config'],
+ 'read HTTP proxy config from FILENAME',
+ metavar='FILENAME')
+
+ def process_args(self, args):
+ logging.info('Starting MINION')
+
+ if self.settings['sleep'] == 0:
+ self.settings['sleep'] = random.randint(30, 60)
+
+ if self.settings['proxy-config']:
+ lorrycontroller.setup_proxy(self.settings['proxy-config'])
+
+ while True:
+ job_spec = self.get_job_spec()
+ if job_spec:
+ self.run_job(job_spec)
+ else:
+ logging.info(
+ 'Got no job from WEBAPP, sleeping for %s s',
+ self.settings['sleep'])
+ time.sleep(self.settings['sleep'])
+
+ def get_job_spec(self):
+ host = self.settings['webapp-host']
+ port = int(self.settings['webapp-port'])
+ timeout = self.settings['webapp-timeout']
+
+ logging.debug('Requesting job from WEBAPP (%s:%s)', host, port)
+
+ params = urllib.urlencode({
+ 'host': platform.node(),
+ 'pid': os.getpid(),
+ })
+
+ try:
+ body = self.webapp_request('POST', '/1.0/give-me-job', params)
+ except WEBAPPError as e:
+ logging.error(str(e))
+ return None
+
+ obj = json.loads(body)
+ if obj.get('job_id', None):
+ return obj
+ return None
+
+ def run_job(self, job_spec):
+ self.start_job(job_spec)
+ while True:
+ stdout, stderr, exit = self.poll_job()
+ kill_job = self.update_webapp_about_job(
+ job_spec, stdout, stderr, exit)
+ if exit is not None:
+ break
+ if kill_job:
+ # FIXME: The job may have produced more output while
+ # we were talking to WEBAPP. We are not polling the
+ # process again, here. This should be fixed. However,
+ # since the process may be in an unkillable state (D
+ # state, for example), we also can't wait here until
+ # it actually dies. Thus, this needs thinking.
+ exit = self.kill_job()
+ self.update_webapp_about_job(
+ job_spec, '', '', exit)
+ break
+
+ def start_job(self, job_spec):
+ logging.info(
+ 'Running job %s: %s on %s',
+ job_spec['job_id'],
+ self.settings['lorry-cmd'],
+ job_spec['path'])
+
+ fd, self.temp_lorry_filename = tempfile.mkstemp()
+ os.write(fd, job_spec['text'])
+ os.close(fd)
+
+ argv = [
+ self.settings['lorry-cmd'],
+ self.temp_lorry_filename,
+ ]
+
+ pipe = os.pipe()
+ self.stdout_fd = pipe[0]
+ self.set_nonblocking(self.stdout_fd)
+
+ devnull = open('/dev/null')
+
+ self.process = subprocess.Popen(
+ argv,
+ stdin=devnull,
+ stdout=pipe[1],
+ stderr=subprocess.STDOUT)
+
+ os.close(pipe[1])
+ devnull.close()
+
+ def set_nonblocking(self, fd):
+ flags = fcntl.fcntl(fd, fcntl.F_GETFL, 0)
+ flags = flags | os.O_NONBLOCK
+ fcntl.fcntl(fd, fcntl.F_SETFL, flags)
+
+ def poll_job(self):
+ read_size = 1024
+
+ exit = self.process.poll()
+ if exit is None:
+ # Process is still running.
+ wait_for_output = 10.0
+ r, w, x = select.select([self.stdout_fd], [], [], wait_for_output)
+ stdout = stderr = ''
+ if r:
+ stdout = os.read(self.stdout_fd, read_size)
+ else:
+ # Finished.
+ if exit != 0:
+ logging.error('Subprocess failed')
+ stdout_parts = []
+ while True:
+ data = os.read(self.stdout_fd, read_size)
+ if not data:
+ break
+ stdout_parts.append(data)
+ stdout = ''.join(stdout_parts)
+ stderr = ''
+ os.remove(self.temp_lorry_filename)
+
+ os.close(self.stdout_fd)
+ self.stdout_fd = None
+
+ return stdout, stderr, exit
+
+ def kill_job(self):
+ self.process.kill()
+ return self.process.wait()
+
+ def update_webapp_about_job(self, job_spec, stdout, stderr, exit):
+ logging.debug(
+ 'Updating WEBAPP about running job %s', job_spec['job_id'])
+
+ if exit is None:
+ disk_usage = None
+ else:
+ disk_usage = self.get_lorry_disk_usage(job_spec)
+
+ params = urllib.urlencode({
+ 'job_id': job_spec['job_id'],
+ 'exit': 'no' if exit is None else exit,
+ 'stdout': stdout,
+ 'stderr': stderr,
+ 'disk_usage': disk_usage,
+ })
+
+ try:
+ body = self.webapp_request('POST', '/1.0/job-update', params)
+ except WEBAPPError as e:
+ logging.error(str(e))
+ return
+
+ obj = json.loads(body)
+ return obj['kill_job']
+
+ def webapp_request(self, method, path, body):
+ logging.debug(
+ 'Making HTTP request to WEBAPP: method=%r path=%r body=%r',
+ method, path, body)
+
+ host = self.settings['webapp-host']
+ port = int(self.settings['webapp-port'])
+ timeout = self.settings['webapp-timeout']
+ conn = httplib.HTTPConnection(host, port=port, timeout=timeout)
+
+ headers = {}
+ if body:
+ headers['Content-type'] = 'application/x-www-form-urlencoded'
+
+ conn.request(method, path, body=body, headers=headers)
+
+ response = conn.getresponse()
+ response_body = response.read()
+ conn.close()
+
+ if response.status != httplib.OK:
+ raise WEBAPPError(response.status, response.reason, response_body)
+
+ return response_body
+
+ def get_lorry_disk_usage(self, job_spec):
+ dirname = os.path.join(
+ self.settings['lorry-working-area'],
+ self.escape_lorry_area_basename(job_spec['path']))
+ return self.disk_usage_by_dir(dirname)
+
+ def escape_lorry_area_basename(self, basename):
+ # FIXME: This code should be kept in sync with the respective
+ # code in lorry, or, better, we would import the code from
+ # Lorry directly.
+
+ assert '\0' not in basename
+ # We escape slashes as underscores.
+ return '_'.join(basename.split('/'))
+
+ def disk_usage_by_dir(self, dirname):
+ exit, out, err = cliapp.runcmd_unchecked(['du', '-sk', dirname])
+ if exit:
+ logging.error('du -sk %s failed: %r', dirname, err)
+ return 0
+
+ lines = out.splitlines()
+ if not lines:
+ logging.warning('no output from du')
+ return 0
+
+ words = lines[-1].split()
+ if not words:
+ logging.warning('last line of du output is empty')
+ return 0
+
+ kibibyte = 1024
+ try:
+ return int(words[0]) * kibibyte
+ except ValueError:
+ logging.warning('error converting %r to string' % words[0])
+ return 0
+
+
+MINION().run()
diff --git a/lorry-controller-webapp b/lorry-controller-webapp
new file mode 100755
index 0000000..9234498
--- /dev/null
+++ b/lorry-controller-webapp
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import wsgiref.simple_server
+
+import bottle
+import cliapp
+from flup.server.fcgi import WSGIServer
+
+
+import lorrycontroller
+
+
+class WEBAPP(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['statedb'],
+ 'use FILE as the state database',
+ metavar='FILE')
+
+ self.settings.string(
+ ['configuration-directory'],
+ 'use DIR as the configuration directory',
+ metavar='DIR',
+ default='.')
+
+ self.settings.string(
+ ['confgit-url'],
+ 'get CONFGIT from URL',
+ metavar='URL')
+
+ self.settings.string(
+ ['confgit-branch'],
+ 'get git branch BRANCH in CONFGIT',
+ metavar='URL',
+ default='master')
+
+ self.settings.boolean(
+ ['debug-real-confgit'],
+ 'if true, do real git operations on the configuration directory; '
+ 'if false, do no git operations on it and just what is there',
+ default=True)
+
+ self.settings.string(
+ ['status-html'],
+ 'write a static HTML page to FILE to describe overall status',
+ metavar='FILE',
+ default='/dev/null')
+
+ self.settings.boolean(
+ ['wsgi'],
+ 'run in wsgi mode (default is debug mode, for development)')
+
+ self.settings.integer(
+ ['debug-port'],
+ 'use PORT in debugging mode '
+ '(i.e., when not running under WSGI); '
+ 'note that using this to non-zero disables --debug-port-file',
+ metavar='PORT',
+ default=0)
+
+ self.settings.string(
+ ['debug-port-file'],
+ 'write listening port to FILE when in debug mode '
+ '(i.e., not running under WSGI)',
+ metavar='FILE',
+ default='webapp.port')
+
+ self.settings.string(
+ ['debug-host'],
+ 'listen on HOST when in debug mode (i.e., not running under WSGI)',
+ metavar='HOST',
+ default='0.0.0.0')
+
+ self.settings.string_list(
+ ['debug-fake-trove'],
+ 'fake access to remote Troves (to do gitano ls, etc) '
+ 'using local files: get ls listing for TROVE from $PATH, '
+ 'where PATH names a file in JSON with the necessary info; '
+ 'may be used multiple times',
+ metavar='TROVE=PATH')
+
+ self.settings.string(
+ ['templates'],
+ 'find HTML page templates (*.tpl) in DIR',
+ metavar='DIR',
+ default='/usr/share/lorry-controller/templates')
+
+ self.settings.string(
+ ['static-files'],
+ 'server static files from DIR',
+ metavar='DIR',
+ default='/usr/share/lorry-controller/static')
+
+ def find_routes(self):
+ '''Return all classes that are API routes.
+
+ This is a generator.
+
+ '''
+
+ # This is a bit tricky and magic. Find all subclasses of
+ # LorryControllerRoute in the lorrycontroller package.
+ # This saves us from having to maintain a list of them
+ # manually, but the introspective code is not necessarily
+ # the most obvious.
+
+ for name in dir(lorrycontroller):
+ x = getattr(lorrycontroller, name)
+ is_route = (
+ type(x) == type and # it must be class, for issubclass
+ issubclass(x, lorrycontroller.LorryControllerRoute) and
+ x != lorrycontroller.LorryControllerRoute)
+ if is_route:
+ yield x
+
+ def process_args(self, args):
+ self.settings.require('statedb')
+
+ self.setup_proxy()
+
+ templates = self.load_templates()
+
+ webapp = bottle.Bottle()
+
+ for route_class in self.find_routes():
+ route = route_class(self.settings, templates)
+ webapp.route(
+ path=route.path,
+ method=route.http_method,
+ callback=route.run)
+
+ logging.info('Starting server')
+ if self.settings['wsgi']:
+ self.run_wsgi_server(webapp)
+ else:
+ self.run_debug_server(webapp)
+
+ def load_templates(self):
+ templates = {}
+ for basename in os.listdir(self.settings['templates']):
+ if basename.endswith('.tpl'):
+ name = basename[:-len('.tpl')]
+ pathname = os.path.join(self.settings['templates'], basename)
+ with open(pathname) as f:
+ templates[name] = f.read()
+ return templates
+
+ def run_wsgi_server(self, webapp):
+ WSGIServer(webapp).run()
+
+ def run_debug_server(self, webapp):
+ if self.settings['debug-port']:
+ self.run_debug_server_on_given_port(webapp)
+ else:
+ self.run_debug_server_on_random_port(webapp)
+
+ def run_debug_server_on_given_port(self, webapp):
+ bottle.run(
+ webapp,
+ host=self.settings['debug-host'],
+ port=self.settings['debug-port'],
+ quiet=True,
+ debug=True)
+
+ def run_debug_server_on_random_port(self, webapp):
+ server_port_file = self.settings['debug-port-file']
+
+ class DebugServer(wsgiref.simple_server.WSGIServer):
+ '''WSGI-like server that uses an ephemeral port.
+
+ Rather than use a specified port, or default, the
+ DebugServer connects to an ephemeral port and writes
+ its number to debug-port-file, so a non-racy temporary
+ port can be used.
+
+ '''
+
+ def __init__(self, (host, port), *args, **kwargs):
+ wsgiref.simple_server.WSGIServer.__init__(
+ self, (host, 0), *args, **kwargs)
+ with open(server_port_file, 'w') as f:
+ f.write(str(self.server_port) + '\n')
+
+ bottle.run(
+ webapp,
+ host=self.settings['debug-host'],
+ server_class=DebugServer,
+ quiet=True,
+ debug=True)
+
+ def setup_proxy(self):
+ """Tell urllib2 to use a proxy for http action by lorry-controller.
+
+ Load the proxy information from the JSON file given by proxy_def, then
+ set urllib2's url opener to open urls via an authenticated proxy.
+
+ """
+
+ config_filename = os.path.join(
+ self.settings['configuration-directory'], 'proxy.conf')
+ lorrycontroller.setup_proxy(config_filename)
+
+
+WEBAPP().run()
diff --git a/lorry-controller.conf b/lorry-controller.conf
deleted file mode 100644
index 3d46b23..0000000
--- a/lorry-controller.conf
+++ /dev/null
@@ -1,20 +0,0 @@
-[
- {
- "type": "trove",
- "uuid": "default-staggered-short",
- "trovehost": "git.baserock.org",
- "protocol": "ssh",
- "ls-interval": "1H",
- "prefixmap": {
- "baserock": "baserock"
- },
- "ignore": [
- "baserock/lorries",
- "baserock/tests/*"
- ],
- "create": "never",
- "destroy": "unchanged",
- "interval": "30M",
- "stagger": true
- }
-]
diff --git a/lorry-controller.morph b/lorry-controller.morph
new file mode 100644
index 0000000..9f1623a
--- /dev/null
+++ b/lorry-controller.morph
@@ -0,0 +1,14 @@
+build-system: python-distutils
+kind: chunk
+name: lorry-controller
+post-install-commands:
+- install -d 0755 "$DESTDIR/etc/lighttpd"
+- install -m 0644 -D etc/lighttpd/*.conf "$DESTDIR/etc/lighttpd/."
+- |
+ TGT="$DESTDIR/usr/lib/systemd/system"
+ install -d "$TGT/multi-user.target.wants"
+ install -m 0644 units/*.service units/*.timer "$TGT/."
+ cd "$TGT/multi-user.target.wants"
+ # Ignore template units for symlinking. The template units will be
+ # instantiated at deploy time by trove.configure.
+ ln -s ../*[^@].service ../*[^@].timer .
diff --git a/lorrycontroller/__init__.py b/lorrycontroller/__init__.py
index 0fe0b33..bc51b88 100644
--- a/lorrycontroller/__init__.py
+++ b/lorrycontroller/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2013 Codethink Limited
+# Copyright (C) 2014 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -14,5 +14,35 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import confparser
-import workingstate
+from statedb import (
+ StateDB,
+ LorryNotFoundError,
+ WrongNumberLorriesRunningJob,
+ TroveNotFoundError)
+from route import LorryControllerRoute
+from readconf import ReadConfiguration
+from status import Status, StatusHTML, StatusRenderer
+from listqueue import ListQueue
+from showlorry import ShowLorry, ShowLorryHTML
+from startstopqueue import StartQueue, StopQueue
+from givemejob import GiveMeJob
+from jobupdate import JobUpdate
+from listrunningjobs import ListRunningJobs
+from movetopbottom import MoveToTop, MoveToBottom
+from stopjob import StopJob
+from listjobs import ListAllJobs, ListAllJobsHTML
+from showjob import ShowJob, ShowJobHTML, JobShower
+from removejob import RemoveJob
+from lstroves import LsTroves, ForceLsTrove
+from pretendtime import PretendTime
+from maxjobs import GetMaxJobs, SetMaxJobs
+from gitano import (
+ GitanoCommand,
+ LocalTroveGitanoCommand,
+ GitanoCommandFailure,
+ new_gitano_command)
+from static import StaticFile
+from proxy import setup_proxy
+
+
+__all__ = locals()
diff --git a/lorrycontroller/confparser.py b/lorrycontroller/confparser.py
deleted file mode 100644
index 403b768..0000000
--- a/lorrycontroller/confparser.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# Copyright (C) 2013 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-import json
-import logging
-import re
-import glob
-import os
-import time
-import fnmatch
-import urllib
-
-default_values = [
- ( u'create', u'never' ),
- ( u'destroy', u'never' ),
- ( u'interval', u'1m' ),
- ( u'stagger', False ),
- ( u'tarball', u'never' ),
- ( u'type', u'invalid_type' ),
-]
-
-valid_interval = re.compile(r"^([1-9][0-9]*)([mhd])?$")
-interval_mults = {
- None: 1,
- 'm': 60,
- 'h': 60 * 60,
- 'd': 60 * 60 * 24,
-}
-class LorryControllerConfig(object):
- '''This encapsulates the configuration for lorry-controller.'''
-
- def __init__(self, app, confpath):
- self.app = app
- self.confpath = confpath
- self.lorries = {}
- self.configs = {}
- self.duetimes = {}
- self.troves = []
-
- def parse_config(self):
- confpath = os.path.join(self.app.settings['work-area'], self.confpath)
- logging.info("Parsing configuration: %s" % confpath)
- try:
- with open(confpath, "r") as fh:
- self._raw_conf = json.load(fh)
- except Exception, e:
- logging.error("Unable to parse: %r" % e)
- raise
- logging.debug("Validating configuration semantics")
- self._validate__raw_conf()
- logging.info("Configuration loaded")
-
- def _validate__raw_conf(self):
- '''Validate the entire raw config.'''
- if type(self._raw_conf) != list:
- self._give_up("Configuration was not a list.")
- for entry in self._raw_conf:
- if type(entry) != dict:
- self._give_up("Configuration entry was not a dict.")
- if type(entry.get('type', None)) != unicode:
- self._give_up("Configuration entry lacked a suitable 'type' "
- "field.")
- # Set the defaults
- for key, defval in default_values:
- entry[key] = entry.get(key, defval)
- # And validate the generic values
- self._validate__generics(entry)
- # Now validate the rest
- validator = getattr(self, '_validate_' + entry['type'], None)
- if validator is None:
- self._give_up("Configuration entry had unknown type: %s" %
- entry['type'])
- validator(entry)
-
- def _validate__generics(self, entry):
- '''Validate the generic entries such as 'uuid'.'''
- if type(entry.get('uuid', None)) != unicode:
- self._give_up("UUID missing, cannot reconcile without it!")
- if entry['uuid'] in self.configs:
- self._give_up("UUID is not unique")
- self.configs[entry['uuid']] = entry
- for key, defval in default_values:
- if type(defval) != type(entry[key]):
- self._give_up("Invalid type for '%s': %r" % (key, entry[key]))
- self._validate__when(entry, 'create', ["always", "never"])
- self._validate__when(entry, 'destroy',
- ["always", "never", "unchanged"])
- self._validate__when(entry, 'tarball', ["always", "never", "first"])
- entry['interval-parsed'] = self._parse_interval(entry['interval'])
- if 'ls-interval' in entry:
- entry['ls-interval-parsed'] = \
- self._parse_interval(entry['ls-interval'])
-
- def _validate__when(self, entry, key, valid_whens):
- if entry[key] not in valid_whens:
- self._give_up("Invalid value for %s: %s" % (key, entry[key]))
-
- def _parse_interval(self, interval):
- m = valid_interval.match(interval.lower())
- if m is None:
- self._give_up("Unable to parse '%s' as an interval" % interval)
- num, mult = m.groups()
- num = int(num)
- mult = interval_mults.get(mult, None)
- if mult is None:
- self._give_up("Somehow, '%s' managed to appear as a multiplier!" %
- m.group(2))
- logging.debug("Converted interval %r to %r", interval, (num * mult))
- return num * mult
-
- def _validate_lorries(self, entry):
- '''Validate a 'lorries' stanza.'''
- if type(entry.get('globs', None)) != list:
- self._give_up("Lorries stanzas need lists for their 'globs'")
- if entry.get('prefix', None) is None:
- entry['prefix'] = u""
- if type(entry['prefix']) != unicode:
- self._give_up("Lorry prefixes should be strings.")
- my_lorries = set()
- git_base = os.path.join(self.app.settings['work-area'], 'git')
- for glob_entry in entry['globs']:
- if type(glob_entry) != unicode:
- self._give_up("Lorries globs should be strings")
- fullglob = os.path.join(git_base, glob_entry)
- my_lorries = my_lorries.union(set(glob.iglob(fullglob)))
- for lorry in my_lorries:
- if not lorry.startswith(git_base):
- self._give_up("Glob found %s which is outside the git base")
-
- logging.debug("Expanded globs in entry to %d lorry files" %
- len(my_lorries))
- logging.debug("Loading lorries into memory, please wait...")
-
- my_lorry_names = set()
- for lorry in my_lorries:
- try:
- with open(lorry, "r") as fh:
- lorry_json = json.load(fh)
- for name, content in lorry_json.iteritems():
- fullname = os.path.join(entry['prefix'], name)
- if self.lorries.get(fullname, None) is not None:
- self._give_up("Lorry repeated: %s" % fullname)
- content['controller-uuid'] = entry['uuid']
- if not content.has_key('source-HEAD'):
- content['source-HEAD'] = 'refs/heads/master'
- my_lorry_names.add(fullname)
- self.lorries[fullname] = content
- except Exception, e:
- logging.warning("Unable to parse %s, because of %s. "
- "Moving on" % (lorry, e))
-
- # Now calculate the 'next due' time for every lorry we just parsed
- starttime = time.time() - 1
- endtime = starttime + entry['interval-parsed']
- step = 0
- if entry['stagger']:
- step = (endtime - starttime) / (len(my_lorry_names) + 1)
- for lorry_name in my_lorry_names:
- self.duetimes[lorry_name] = starttime
- starttime += step
-
- logging.debug("Now loaded %d lorries" % len(self.lorries.keys()))
-
- def _validate_trove(self, entry):
- # Validate top levels
- if type(entry.get('trovehost', None)) != unicode:
- self._give_up("Trove host %r is not a string" %
- entry.get('trovehost', None))
- if 'ls-interval-parsed' not in entry:
- self._give_up("No ls-interval specified for %s" %
- entry['trovehost'])
- if type(entry.get('prefixmap', None)) != dict:
- self._give_up("Prefixmap not a dict for %s" %
- entry['trovehost'])
- if type(entry.get('ignore', [])) != list:
- self._give_up("Ignore is not a list for %s" %
- entry['trovehost'])
- protocol = entry.get('protocol')
- auth = entry.get('auth')
- if protocol == 'https' and not auth:
- self._give_up('Trove access protocol requires authorisation '
- 'details but none were defined.')
- elif not protocol:
- self._give_up('Trove access protocol not defined.')
- # Validate prefixmap
- for local, remote in entry['prefixmap'].iteritems():
- if type(local) != unicode:
- self._give_up("Local part of prefixmap is not a string: %r" %
- local)
- if type(remote) != unicode:
- self._give_up("Remote part of prefixmap is not a string: %r" %
- remote)
- # Validate ignore
- for ign in entry.get('ignore', []):
- if type(ign) != unicode:
- self._give_up("Part of ignore list is not a string: %r" % ign)
-
- self.troves.append(entry)
-
- def update_trove(self, trove, state):
- logging.info("Processing trove %s (%s)" % (trove['trovehost'],
- trove['uuid']))
- # 1. Ensure that if we need to 'ls' the trove, we do it
- now = time.time()
- state['next-vls'] = state.get('next-vls', now - 1)
- if state['next-vls'] < now:
- exit, out, err = self.run_gitano_command(trove, True, 'ls', '--verbose')
- if exit == 0:
- repo_info = {}
- for entry in [x for x in out.split("\n") if x != ""]:
- while entry.find(" ") > -1:
- entry = entry.replace(" ", " ")
- elems = entry.split(" ")
- this_repo = {
- "perm": elems[0],
- "name": elems[1],
- "head": elems[2],
- "desc": " ".join(elems[3:]),
- }
- repo_info[elems[1]] = this_repo
- state['last-ls-output'] = repo_info
- logging.info("ls interval %d" % trove['ls-interval-parsed'])
- logging.info("next-vls was %s" % time.asctime(time.gmtime(state['next-vls'])))
- while state['next-vls'] < now:
- state['next-vls'] += trove['ls-interval-parsed']
- logging.info("next-vls now %s" % time.asctime(time.gmtime(state['next-vls'])))
- else:
- # Pass through unchanged
- state['last-ls-output'] = state.get('last-ls-output', {})
-
- def ignored(reponame):
- for pattern in trove['ignore']:
- if fnmatch.fnmatch(reponame, pattern):
- return True
- return False
-
- # 2. For every entry in last-ls-output, construct a lorry if we want it
- lorries_made = set()
- for remotereponame, info in state['last-ls-output'].iteritems():
- localreponame = None
- for local, remote in trove['prefixmap'].iteritems():
- if remotereponame.startswith(remote+"/"):
- localreponame = "%s/%s" % (local,
- remotereponame[len(remote)+1:])
- if ((not ignored(remotereponame)) and (localreponame is not None)):
- # Make the url in the correct form for the given protocol
- if trove['protocol'] == 'ssh':
- url = 'ssh://git@%s/%s.git' % (trove['trovehost'],
- remotereponame)
- elif trove['protocol'] == 'https':
- auth = trove['auth']
- url = 'https://%s:%s@%s/git/%s.git' % (auth['username'],
- auth['password'],
- trove['trovehost'],
- remotereponame)
- else:
- url = 'http://%s/git/%s.git' % (trove['trovehost'],
- remotereponame)
- # Construct a lorry for this one.
- lorry = {
- "type": "git",
- "url": url,
- "controller-uuid": trove['uuid'],
- "source-HEAD": info["head"],
- "refspecs": [ "+refs/heads/*:refs/heads/*",
- "+refs/tags/*:refs/tags/*" ]
- }
- if localreponame in self.lorries:
- logging.warn("Skipping %s (%s from %s) because we already "
- "have something for that." % (
- localreponame, remotereponame, trove['trovehost']))
- else:
- self.lorries[localreponame] = lorry
- lorries_made.add(localreponame)
-
- # 3. Now schedule all those lorries in case they're new
- starttime = time.time() - 1
- endtime = starttime + trove['interval-parsed']
- step = 0
- if trove['stagger']:
- step = (endtime - starttime) / (len(lorries_made)+1)
- for lorry_name in lorries_made:
- self.duetimes[lorry_name] = starttime
- starttime += step
-
- logging.debug("Generated %d lorries from that trove" %
- len(lorries_made))
-
- def update_troves(self, statemgr):
- # Now that we have a state manager we can look at the trove data.
- for trove in self.troves:
- self.app.html.set_processing(trove['uuid'])
- trove_state = statemgr.get_trove(trove['uuid'])
- self.update_trove(trove, trove_state)
-
- def run_gitano_command(self, trove, dry, command, *args):
- """Run a gitano command on the trove, and return the output."""
- if trove['protocol'] == 'ssh':
- # construct list to run command over ssh
- cmdargs = ['ssh',
- '-oStrictHostKeyChecking=no',
- '-oBatchMode=yes',
- 'git@%s' % trove['trovehost'],
- command]
- cmdargs.extend(args)
- # run the command
- exit, out, err = self.app.maybe_runcmd(cmdargs, dry=dry)
- else:
- # construct a url which will return the command output
- query_string = '%s %s' % (command, ' '.join(args))
- query_string = urllib.quote(query_string)
- trovehost = urllib.quote(trove['trovehost'])
- url = '%s://%s/gitano-command.cgi?cmd=%s' % (
- trove['protocol'], trovehost, query_string)
- auth = trove.get('auth', None)
- # make an http request to the url
- exit, out, err = self.app.maybe_http_request(url, auth=auth, dry=dry)
- return exit, out, err
-
- def _give_up(self, *args, **kwargs):
- logging.error(*args, **kwargs)
- raise SystemExit(5)
diff --git a/lorrycontroller/gitano.py b/lorrycontroller/gitano.py
new file mode 100644
index 0000000..2de291c
--- /dev/null
+++ b/lorrycontroller/gitano.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import logging
+import re
+import urllib2
+import urlparse
+
+import cliapp
+
+import lorrycontroller
+
+
+class GitanoCommandFailure(Exception):
+
+ def __init__(self, trovehost, command, stderr):
+ Exception.__init__(
+ self,
+ 'Failed to run "%s" on Gitano on %s\n%s' %
+ (command, trovehost, stderr))
+
+
+class GitanoCommand(object):
+
+ '''Run a Gitano command on a Trove.'''
+
+ def __init__(self, trovehost, protocol, username, password):
+ self.trovehost = trovehost
+ self.protocol = protocol
+ self.username = username
+ self.password = password
+
+ if protocol == 'ssh':
+ self._command = self._ssh_command
+ elif protocol in ('http', 'https'):
+ self._command = self._http_command
+ else:
+ raise GitanoCommandFailure(
+ self.trovehost, '__init__', 'unknown protocol %s' % protocol)
+
+ def whoami(self):
+ return self._command(['whoami'])
+
+ def create(self, repo_path):
+ self._command(['create', repo_path])
+
+ def get_gitano_config(self, repo_path):
+ stdout = self._command(['config', repo_path, 'show'])
+
+ # "config REPO show" outputs a sequence of lines of the form "key: value".
+ # Extract those into a collections.defaultdict.
+
+ result = collections.defaultdict(str)
+ for line in stdout.splitlines():
+ m = re.match(r'^([^:])+:\s*(.*)$', line)
+ if m:
+ result[m.group(0)] = m.group(1).strip()
+
+ return result
+
+ def set_gitano_config(self, path, key, value):
+ self._command(['config', path, 'set', key, value])
+
+ def ls(self):
+ return self._command(['ls'])
+
+ def _ssh_command(self, gitano_args):
+ quoted_args = [cliapp.shell_quote(x) for x in gitano_args]
+
+ base_argv = [
+ 'ssh',
+ '-oStrictHostKeyChecking=no',
+ '-oBatchMode=yes',
+ 'git@%s' % self.trovehost,
+ ]
+
+ exit, stdout, stderr = cliapp.runcmd_unchecked(
+ base_argv + quoted_args)
+
+ if exit != 0:
+ logging.error(
+ 'Failed to run "%s" for %s:\n%s',
+ self.trovehost, stdout + stderr)
+ raise GitanoCommandFailure(
+ self.trovehost,
+ ' '.join(gitano_args),
+ stdout + stderr)
+
+ return stdout
+
+ def _http_command(self, gitano_args):
+ quoted_args = urllib2.quote(' '.join(gitano_args))
+ url = urlparse.urlunsplit((
+ self.protocol,
+ self.trovehost,
+ '/gitano-command.cgi',
+ 'cmd=%s' % quoted_args,
+ ''))
+ logging.debug('url=%r', url)
+
+ try:
+ request = urllib2.Request(url, None, {})
+ logging.debug('request=%r', request.get_full_url())
+ if self.username and self.password:
+ password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ password_mgr.add_password(None, url, self.username, self.password)
+ auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
+ opener = urllib2.build_opener(auth_handler)
+ response = opener.open(url)
+ else:
+ response = urllib2.urlopen(request)
+ except urllib2.URLError as e:
+ raise GitanoCommandFailure(
+ self.trovehost, ' '.join(gitano_args), str(e))
+
+ return response.read()
+
+
+class LocalTroveGitanoCommand(GitanoCommand):
+
+ '''Run commands on the local Trove's Gitano.
+
+ This is a version of the GitanoCommand class specifically for
+ accessing the local Trove's Gitano.
+
+ '''
+
+ def __init__(self):
+ GitanoCommand.__init__(self, 'localhost', 'ssh', '', '')
+
+
+
+def new_gitano_command(statedb, trovehost):
+ trove_info = statedb.get_trove_info(trovehost)
+ return lorrycontroller.GitanoCommand(
+ trovehost,
+ trove_info['protocol'],
+ trove_info['username'],
+ trove_info['password'])
diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py
new file mode 100644
index 0000000..755def0
--- /dev/null
+++ b/lorrycontroller/givemejob.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import logging
+import re
+import time
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class GiveMeJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/give-me-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ with statedb:
+ if statedb.get_running_queue() and not self.max_jobs_reached(statedb):
+ lorry_infos = statedb.get_all_lorries_info()
+ now = statedb.get_current_time()
+ for lorry_info in lorry_infos:
+ if self.ready_to_run(lorry_info, now):
+ self.create_repository_in_local_trove(
+ statedb, lorry_info)
+ if lorry_info['from_trovehost']:
+ self.copy_repository_metadata(statedb, lorry_info)
+ self.give_job_to_minion(statedb, lorry_info, now)
+ logging.info(
+ 'Giving job %s to lorry %s to MINION %s:%s',
+ lorry_info['job_id'],
+ lorry_info['path'],
+ bottle.request.forms.host,
+ bottle.request.forms.pid)
+ return lorry_info
+
+ logging.info('No job to give MINION')
+ return { 'job_id': None }
+
+ def max_jobs_reached(self, statedb):
+ max_jobs = statedb.get_max_jobs()
+ if max_jobs is None:
+ return False
+ running_jobs = statedb.get_running_jobs()
+ return len(running_jobs) >= max_jobs
+
+ def ready_to_run(self, lorry_info, now):
+ due = lorry_info['last_run'] + lorry_info['interval']
+ return (lorry_info['running_job'] is None and due <= now)
+
+ def create_repository_in_local_trove(self, statedb, lorry_info):
+ # Create repository on local Trove. If it fails, assume
+ # it failed because the repository already existed, and
+ # ignore the failure (but log message).
+
+ local = lorrycontroller.LocalTroveGitanoCommand()
+ try:
+ local.create(lorry_info['path'])
+ except lorrycontroller.GitanoCommandFailure as e:
+ logging.debug(
+ 'Ignoring error creating %s on local Trove: %s',
+ lorry_info['path'], e)
+ else:
+ logging.info('Created %s on local repo', lorry_info['path'])
+
+ def copy_repository_metadata(self, statedb, lorry_info):
+ '''Copy project.head and project.description to the local Trove.'''
+
+ assert lorry_info['from_trovehost']
+ assert lorry_info['from_path']
+
+ remote = lorrycontroller.new_gitano_command(statedb, lorry_info['from_trovehost'])
+ local = lorrycontroller.LocalTroveGitanoCommand()
+
+ try:
+ remote_config = remote.get_gitano_config(lorry_info['from_path'])
+ local_config = local.get_gitano_config(lorry_info['path'])
+
+ if remote_config['project.head'] != local_config['project.head']:
+ local.set_gitano_config(
+ lorry_info['path'],
+ 'project.head',
+ remote_config['project.head'])
+
+ if not local_config['project.description']:
+ desc = '{host}: {desc}'.format(
+ host=lorry_info['from_trovehost'],
+ desc=remote_config['project.description'])
+ local.set_gitano_config(
+ lorry_info['path'],
+ 'project.description',
+ desc)
+ except lorrycontroller.GitanoCommandFailure as e:
+ logging.error('ERROR: %s' % str(e))
+ # FIXME: We need a good way to report these errors to the
+ # user. However, we probably don't want to fail the
+ # request, so that's not the way to do this. Needs
+ # thinking.
+
+ def give_job_to_minion(self, statedb, lorry_info, now):
+ path = lorry_info['path']
+ minion_host = bottle.request.forms.host
+ minion_pid = bottle.request.forms.pid
+ running_job = statedb.get_next_job_id()
+ statedb.set_running_job(path, running_job)
+ statedb.add_new_job(
+ running_job, minion_host, minion_pid, path, int(now))
+ lorry_info['job_id'] = running_job
+ return lorry_info
diff --git a/lorrycontroller/htmlstatus.py b/lorrycontroller/htmlstatus.py
deleted file mode 100644
index 30b52f0..0000000
--- a/lorrycontroller/htmlstatus.py
+++ /dev/null
@@ -1,286 +0,0 @@
-# Copyright (C) 2013 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-import os
-import time
-from cgi import escape
-
-state_names = [
- "Initialisation",
- "Load Troves",
- "Remove old repos",
- "Create new repos",
- "Process Lorries",
- "Finished"
- ]
-
-def format_time(time_t):
- return time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime(time_t))
-
-class HTMLStatusManager(object):
- '''Manage the HTML status page for lorry-controller.'''
-
-
- def __init__(self, app):
- self.app = app
- self.state = 0
- self.series = None
- self.filename = self.app.settings['html-file']
- self.mgr = None
- self.all_processing = set()
- self.processing = None
- self.processing_time = time.time()
- self.failing = None
- self.all_lorries_ever = set()
- self.bump_time = time.time()
-
- def set_failing(self, failmsg):
- self.failing = failmsg
- self.write_out_status()
-
- def set_mgr(self, mgr):
- self.mgr = mgr
-
- def set_processing(self, proc):
- if self.processing is not None:
- self.all_processing.add(self.processing)
- self.processing = proc
- self.processing_time = time.time()
- self.write_out_status()
-
- def bump_state(self):
- self.state = self.state + 1
- self.bump_time = time.time()
- self.processing = None
- self.write_out_status()
-
- def write_out_status(self):
- if self.filename is None: return
- try:
- with open(self.filename + ".new", "w") as ofh:
- ofh.write("<!DOCTYPE html>\n")
- ofh.write(self.gen_html())
- ofh.write("\n")
- target = self.filename
- if self.series is not None:
- target += ".%d" % self.series
- self.series += 1
- os.rename(self.filename + ".new", target)
- except:
- os.unlink(self.filename + ".new")
- raise
-
- def gen_html(self):
- head = self.gen_head()
- body = self.gen_body()
- return self.tag("html", content=head+"\n"+body, gap=True)
-
- def gen_head(self):
- title = self.tag("title", content="Lorry Controller")
- css = self.tag("link", href="trove.css", rel="stylesheet",
- type="text/css")
- script = self.tag(
- "script", type="text/javascript", src="/table.js", content="")
- return self.tag("head", content=title+css+script)
-
- def gen_body(self):
- # 1. Rough header, branded as trove
- curtime = format_time(time.time())
- link = "/"
- if self.series is not None:
- link = self.filename + ".%d" % (self.series + 1)
- header = '''
-<table id='header'><tr><td class='logo' rowspan='2'>
-<a href='%s'><img src='trove.png' alt='trove logo'/></a></td>
-<td class='main'>Status of Lorry Controller</td></tr>
-<tr><td class='sub'>Updated at %s</td></tr></table>
-''' % (link, curtime)
- # 2. List of steps and where we are currently
- steps = self.gen_steps()
-
- # 4. Main content
- content = self.gen_content()
-
- # 5. footer
- footer = self.gen_footer()
-
- # magic args
- return self.tag("body", content=self.tag(
- "div", content=(header+steps+content+footer),
- Class="lorrycontroller"))
-
- def gen_content(self):
- if self.failing is not None:
- return self.tag("div", Class="failure", content=self.failing)
- # 1. Troves known
- troves = self.gen_troves()
- # 2. Lorries known
- lorries = self.gen_lorries()
-
- return self.tag("div", Class="content", content=
- self.tag("div", id="troves", content=troves) +
- self.tag("div", id="lorries", content=lorries))
-
- def gen_troves(self):
- troves = []
- now = time.time()
- for trove in self.app.conf.troves:
- troveinfo = {}
- if self.mgr is not None:
- troveinfo = self.mgr.trove_state.get(trove['uuid'], {})
- uuid = self.tag("td", content=escape(trove['uuid']))
- state = "Up to date"
- if self.processing == trove['uuid']:
- state = "Processing since " + \
- format_time(self.processing_time)
- elif troveinfo.get('next-vls', now - 1) < now:
- if self.state < len(state_names) - 1:
- state = "Due to be checked this run."
- else:
- state = "Due to be checked on the next run"
- state = self.tag("td", content=escape(state))
- nextdue = self.tag("td", content=escape(format_time(
- troveinfo.get('next-vls', now - 1))))
- lorrycount = len([l for l in self.app.conf.lorries.itervalues()
- if l['controller-uuid'] == trove['uuid']])
- lorrycount = self.tag("td", content=str(lorrycount))
-
- troves.append(self.tag("tr", content=
- uuid+state+nextdue+lorrycount))
- if len(troves) == 0:
- content = "No troves detected"
- else:
- header = self.tag("tr", content=
- self.tag("th", content="Trove UUID") +
- self.tag("th", content="Status") +
- self.tag("th", content="Next due") +
- self.tag("th", content="Lorries created"))
- content = self.tag("table", content=
- header + "".join(troves))
-
- return content
-
- def gen_lorries(self):
- lorries = []
- now = time.time()
- all_lorry_names = set(self.app.conf.lorries.keys())
- if self.mgr is not None:
- all_lorry_names.update(set(self.mgr.lorry_state.keys()))
- self.all_lorries_ever.update(all_lorry_names)
- all_lorry_names = list(self.all_lorries_ever)
- all_lorry_names.sort()
- for lorry_name in all_lorry_names:
- lorry = self.app.conf.lorries.get(lorry_name, None)
- dead_lorry = False
- dead_and_gone = False
- if lorry is None:
- lorrystate = self.mgr.lorry_state.get(lorry_name, None)
- if lorrystate is None:
- dead_and_gone = True
- lorry = {}
- else:
- lorry = lorrystate['lorry']
- dead_lorry = True
- lorryinfo = {}
- if self.mgr is not None:
- lorryinfo = self.mgr.lorry_state.get(lorry_name, {})
- uuid = self.tag("td", content=
- escape(lorry.get('controller-uuid', 'Dead')))
- state = "Waiting "
- if self.processing == lorry_name:
- state = "Processing since " + \
- format_time(self.processing_time)
- elif lorryinfo.get('next-due', now - 1) < self.bump_time:
- if self.state < len(state_names) - 1:
- state = "Due to be checked this run."
- else:
- state = "Due to be checked on the next run"
- if self.mgr is not None:
- if self.mgr.lorry_state.get(lorry_name, None) is None:
- state = "Needs creating"
- elif lorry_name in self.all_processing:
- state = "Processed"
- if dead_lorry:
- state = "Dead - To be removed"
- if dead_and_gone:
- state = "Dead"
- if self.processing == lorry_name:
- state = "Removing since " + \
- format_time(self.processing_time)
- state = self.tag("td", content=escape(state))
- lastresult = self.tag("td", content=self.tag(
- "pre", content=escape(lorryinfo.get('result', '-'))))
- nextdue = self.tag("td", content=escape(format_time(
- lorryinfo.get('next-due', now - 1))))
- lorryname = self.tag("td", content=escape(lorry_name))
- lorries.append(self.tag("tr", content=
- lorryname+uuid+state+lastresult+nextdue))
- if len(lorries) == 0:
- content = "No lorries detected yet"
- else:
- header = self.tag("tr", content=
- self.tag("th",
- Class="table-sortable:alphanumeric",
- content="Lorry Name") +
- self.tag("th",
- Class="table-sortable:alphanumeric",
- content="Comes From") +
- self.tag("th",
- Class="table-sortable:alphanumeric",
- content="Status") +
- self.tag("th",
- Class="table-sortable:alphanumeric",
- content="Last result") +
- self.tag("th",
- Class="table-sortable:alphanumeric",
- content="Next due"))
- header = self.tag("thead", content=header)
- content = self.tag("table", Class="table-autosort:4", content=
- header + "\n" + "\n".join(lorries))
-
- return content
-
-
- def gen_footer(self):
- curtime = format_time(time.time())
- return self.tag("div", Class="footer", content=
- "Generated by Lorry Controller at " + curtime)
-
- def gen_steps(self):
- steps = []
- for idx in xrange(len(state_names)):
- if idx < self.state:
- Class = "donestep"
- elif idx == self.state:
- Class = "activestep"
- else:
- Class = "pendingstep"
- steps.append(self.tag("span", Class=Class,
- content=state_names[idx]))
- return self.tag("table", Class="steps", content=
- self.tag("tr", content=
- self.tag("td", content=
- "".join(steps))))
-
- def tag(self, tagname, content=None, gap=False, **kwargs):
- tagval = " ".join([tagname] +
- ["%s=%r" % (k.lower(), v) for k, v in kwargs.iteritems()])
- gap = "\n" if gap else ""
- if content is None:
- return "<%s />" % tagval
- else:
- return "<%s>%s%s%s</%s>" % (tagval, gap, content, gap, tagname)
-
diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py
new file mode 100644
index 0000000..24a3c4a
--- /dev/null
+++ b/lorrycontroller/jobupdate.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class JobUpdate(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/job-update'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ job_id = int(bottle.request.forms.job_id)
+ exit = bottle.request.forms.exit
+ stdout = bottle.request.forms.stdout
+ stderr = bottle.request.forms.stderr
+ disk_usage = bottle.request.forms.disk_usage
+
+ logging.info('Job %s updated (exit=%s)', job_id, exit)
+
+ with self.open_statedb() as statedb:
+ if stdout:
+ statedb.append_to_job_output(job_id, stdout)
+ if stderr:
+ statedb.append_to_job_output(job_id, stderr)
+
+ path = statedb.find_lorry_running_job(job_id)
+ lorry_info = statedb.get_lorry_info(path)
+
+ if exit is not None and exit != 'no':
+ now = statedb.get_current_time()
+ statedb.set_lorry_last_run(path, int(now))
+ statedb.set_running_job(path, None)
+ statedb.set_job_exit(job_id, exit, int(now), disk_usage)
+ statedb.set_lorry_disk_usage(path, disk_usage)
+ elif self.time_to_die(statedb, job_id, lorry_info):
+ logging.warning(
+ 'Job %r has been running too long, '
+ 'marking it to be exterminated', job_id)
+ statedb.set_kill_job(path, True)
+
+ obj = statedb.get_lorry_info(path)
+ logging.debug('obj=%r', obj)
+ return obj
+
+ def time_to_die(self, statedb, job_id, lorry_info):
+ started, ended = statedb.get_job_started_and_ended(job_id)
+ lorry_timeout = lorry_info['lorry_timeout']
+ now = statedb.get_current_time()
+ age = now - started
+ logging.debug('started=%r', started)
+ logging.debug('ended=%r', ended)
+ logging.debug('lorry_timeout=%r', lorry_timeout)
+ logging.debug('now=%r', now)
+ logging.debug('age=%r', age)
+ return age >= lorry_timeout
diff --git a/lorrycontroller/listjobs.py b/lorrycontroller/listjobs.py
new file mode 100644
index 0000000..eaffeef
--- /dev/null
+++ b/lorrycontroller/listjobs.py
@@ -0,0 +1,63 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class ListAllJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ return { 'job_ids': statedb.get_job_ids() }
+
+
+class ListAllJobsHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-jobs-html'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ now = statedb.get_current_time()
+ values = {
+ 'job_infos': self.get_jobs(statedb),
+ 'timestamp':
+ time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now)),
+ }
+ return bottle.template(self._templates['list-jobs'], **values)
+
+ def get_jobs(self, statedb):
+ jobs = []
+ for job_id in statedb.get_job_ids():
+ exit = statedb.get_job_exit(job_id)
+ job = {
+ 'job_id': job_id,
+ 'exit': 'no' if exit is None else str(exit),
+ 'path': statedb.get_job_path(job_id),
+ }
+ jobs.append(job)
+ return jobs
diff --git a/lorrycontroller/listqueue.py b/lorrycontroller/listqueue.py
new file mode 100644
index 0000000..5d68b83
--- /dev/null
+++ b/lorrycontroller/listqueue.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import lorrycontroller
+
+
+class ListQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ return {
+ 'queue':
+ [spec['path'] for spec in statedb.get_all_lorries_info()],
+ }
diff --git a/lorrycontroller/listrunningjobs.py b/lorrycontroller/listrunningjobs.py
new file mode 100644
index 0000000..1f44743
--- /dev/null
+++ b/lorrycontroller/listrunningjobs.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import lorrycontroller
+
+
+class ListRunningJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-running-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ job_ids = statedb.get_running_jobs()
+ return {
+ 'running_jobs': job_ids,
+ }
diff --git a/lorrycontroller/lstroves.py b/lorrycontroller/lstroves.py
new file mode 100644
index 0000000..e69dce2
--- /dev/null
+++ b/lorrycontroller/lstroves.py
@@ -0,0 +1,217 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import logging
+import time
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class GitanoLsError(Exception):
+
+ def __init__(self, trovehost, output):
+ Exception.__init__(
+ self,
+ 'Failed to get list of git repositories '
+ 'on remote host %s:\n%s' % (trovehost, output))
+ self.trovehost = trovehost
+
+
+class TroveRepositoryLister(object):
+
+ def __init__(self, app_settings, route):
+ self.app_settings = app_settings
+ self.route = route
+
+ def list_trove_into_statedb(self, statedb, trove_info):
+ remote_paths = self.ls(statedb, trove_info)
+ remote_paths = self.skip_ignored_repos(trove_info, remote_paths)
+ repo_map = self.map_remote_repos_to_local_ones(
+ trove_info, remote_paths)
+
+ with statedb:
+ self.update_lorries_for_trove(statedb, trove_info, repo_map)
+ now = statedb.get_current_time()
+ statedb.set_trove_ls_last_run(trove_info['trovehost'], now)
+
+ def ls(self, statedb, trove_info):
+ if self.app_settings['debug-fake-trove']:
+ repo_paths = self.get_fake_ls_output(trove_info)
+ else:
+ repo_paths = self.get_real_ls_output(statedb, trove_info)
+
+ return repo_paths
+
+ def get_fake_ls_output(self, trove_info):
+ trovehost = trove_info['trovehost']
+ for item in self.app_settings['debug-fake-trove']:
+ host, path = item.split('=', 1)
+ if host == trovehost:
+ with open(path) as f:
+ obj = json.load(f)
+ return obj['ls-output']
+ return None
+
+ def get_real_ls_output(self, statedb, trove_info):
+ gitano = lorrycontroller.new_gitano_command(statedb, trove_info['trovehost'])
+ output = gitano.ls()
+ return self.parse_ls_output(output)
+
+ def parse_ls_output(self, ls_output):
+ repo_paths = []
+ for line in ls_output.splitlines():
+ words = line.split(None, 1)
+ if words[0].startswith('R') and len(words) == 2:
+ repo_paths.append(words[1])
+ return repo_paths
+
+ def skip_ignored_repos(self, trovehost, repo_paths):
+ ignored_paths = json.loads(trovehost['ignore'])
+ return [x for x in repo_paths if x not in ignored_paths]
+
+ def map_remote_repos_to_local_ones(self, trove_info, remote_paths):
+ '''Return a dict that maps each remote repo path to a local one.'''
+ prefixmap = self.parse_prefixmap(trove_info['prefixmap'])
+ repo_map = {}
+ for remote_path in remote_paths:
+ local_path = self.map_one_remote_repo_to_local_one(
+ remote_path, prefixmap)
+ if local_path:
+ repo_map[remote_path] = local_path
+ else:
+ logging.debug('Remote repo %r not in prefixmap', remote_path)
+ return repo_map
+
+ def parse_prefixmap(self, prefixmap_string):
+ return json.loads(prefixmap_string)
+
+ def map_one_remote_repo_to_local_one(self, remote_path, prefixmap):
+ for remote_prefix in prefixmap:
+ if self.path_starts_with_prefix(remote_path, remote_prefix):
+ local_prefix = prefixmap[remote_prefix]
+ relative_path = remote_path[len(remote_prefix):]
+ local_path = local_prefix + relative_path
+ return local_path
+ return None
+
+ def path_starts_with_prefix(self, path, prefix):
+ return path.startswith(prefix) and path[len(prefix):].startswith('/')
+
+ def update_lorries_for_trove(self, statedb, trove_info, repo_map):
+ trovehost = trove_info['trovehost']
+ for remote_path, local_path in repo_map.items():
+ lorry = self.construct_lorry(trove_info, local_path, remote_path)
+ statedb.add_to_lorries(
+ path=local_path,
+ text=json.dumps(lorry, indent=4),
+ from_trovehost=trovehost,
+ from_path=remote_path,
+ interval=trove_info['lorry_interval'],
+ timeout=trove_info['lorry_timeout'])
+
+ all_local_paths = set(statedb.get_lorries_for_trove(trovehost))
+ wanted_local_paths = set(repo_map.values())
+ delete_local_paths = all_local_paths.difference(wanted_local_paths)
+ for local_path in delete_local_paths:
+ statedb.remove_lorry(local_path)
+
+ def construct_lorry(self, trove_info, local_path, remote_path):
+ return {
+ local_path: {
+ 'type': 'git',
+ 'url': self.construct_lorry_url(trove_info, remote_path),
+ 'refspecs': [
+ "+refs/heads/*",
+ "+refs/tags/*",
+ ],
+ }
+ }
+
+ def construct_lorry_url(self, trove_info, remote_path):
+ vars = dict(trove_info)
+ vars['remote_path'] = remote_path
+
+ patterns = {
+ 'ssh': 'ssh://git@{trovehost}/{remote_path}',
+ 'https':
+ 'https://{username}:{password}@{trovehost}/git/{remote_path}',
+ 'http': 'http://{trovehost}/git/{remote_path}',
+ }
+
+ return patterns[trove_info['protocol']].format(**vars)
+
+
+class ForceLsTrove(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/force-ls-trove'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ trovehost = bottle.request.forms.trovehost
+
+ statedb = self.open_statedb()
+ lister = TroveRepositoryLister(self.app_settings, self)
+ trove_info = statedb.get_trove_info(trovehost)
+ try:
+ updated = lister.list_trove_into_statedb(statedb, trove_info)
+ except GitanoLsError as e:
+ raise bottle.abort(500, str(e))
+
+ return { 'updated-troves': updated }
+
+
+class LsTroves(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/ls-troves'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ lister = TroveRepositoryLister(self.app_settings, self)
+
+ trove_infos = self.get_due_troves(statedb)
+ for trove_info in trove_infos:
+ logging.info('Trove %r is due an ls', trove_info['trovehost'])
+ try:
+ lister.list_trove_into_statedb(statedb, trove_info)
+ except GitanoLsError as e:
+ bottle.abort(500, str(e))
+
+ return {
+ 'updated-troves': [trove_info['trovehost'] for trove_info in trove_infos],
+ }
+
+ def get_due_troves(self, statedb):
+ trove_infos = [
+ statedb.get_trove_info(trovehost)
+ for trovehost in statedb.get_troves()]
+ now = statedb.get_current_time()
+ return [
+ trove_info
+ for trove_info in trove_infos
+ if self.is_due(trove_info, now)]
+
+ def is_due(self, trove_info, now):
+ ls_due = trove_info['ls_last_run'] + trove_info['ls_interval']
+ return ls_due <= now
diff --git a/lorrycontroller/maxjobs.py b/lorrycontroller/maxjobs.py
new file mode 100644
index 0000000..ce594c2
--- /dev/null
+++ b/lorrycontroller/maxjobs.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class GetMaxJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/get-max-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ return {
+ 'max_jobs': statedb.get_max_jobs(),
+ }
+
+
+class SetMaxJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/set-max-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ max_jobs = bottle.request.forms.max_jobs
+
+ with statedb:
+ statedb.set_max_jobs(max_jobs)
+ return {
+ 'max_jobs': statedb.get_max_jobs(),
+ }
diff --git a/lorrycontroller/movetopbottom.py b/lorrycontroller/movetopbottom.py
new file mode 100644
index 0000000..dcb79a4
--- /dev/null
+++ b/lorrycontroller/movetopbottom.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class MoveToTop(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/move-to-top'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ path = bottle.request.forms.path
+ statedb = self.open_statedb()
+ with statedb:
+ lorry_infos = statedb.get_all_lorries_info()
+ if lorry_infos:
+ topmost = lorry_infos[0]
+ timestamp = min(0, topmost['last_run'] - 1)
+ statedb.set_lorry_last_run(path, timestamp)
+ return 'Lorry %s moved to top of run-queue' % path
+
+
+class MoveToBottom(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/move-to-bottom'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ path = bottle.request.forms.path
+ statedb = self.open_statedb()
+ with statedb:
+ lorry_infos = statedb.get_all_lorries_info()
+ if lorry_infos:
+ bottommost = lorry_infos[-1]
+ timestamp = (
+ bottommost['last_run'] + bottommost['interval'] + 1)
+ statedb.set_lorry_last_run(path, timestamp)
+ return 'Lorry %s moved to bototm of run-queue' % path
diff --git a/lorrycontroller/pretendtime.py b/lorrycontroller/pretendtime.py
new file mode 100644
index 0000000..3fd1a70
--- /dev/null
+++ b/lorrycontroller/pretendtime.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import errno
+import glob
+import json
+import logging
+import os
+import re
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class PretendTime(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/pretend-time'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ now = bottle.request.forms.now
+
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_pretend_time(now)
diff --git a/lorrycontroller/proxy.py b/lorrycontroller/proxy.py
new file mode 100644
index 0000000..44749c9
--- /dev/null
+++ b/lorrycontroller/proxy.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import os
+import urllib
+import urllib2
+
+
+def setup_proxy(config_filename):
+ """Tell urllib2 to use a proxy for http action by lorry-controller.
+
+ Load the proxy information from the JSON file given by proxy_def, then
+ set urllib2's url opener to open urls via an authenticated proxy.
+
+ """
+
+ if not os.path.exists(config_filename):
+ return
+
+ with open(config_filename, 'r') as f:
+ proxy = json.load(f)
+
+ # set the required environment variables
+ hostname = urllib.quote(proxy['hostname'])
+ user = '%s:%s' % (proxy['username'], proxy['password'])
+ url = '%s:%s' % (hostname, proxy['port'])
+ os.environ['http_proxy'] = 'http://%s@%s' % (user, url)
+ os.environ['https_proxy'] = 'https://%s@%s' % (user, url)
+
+ # create a ProxyHandler
+ proxies = {'http_proxy': 'http://%s@%s' % (user, url),
+ 'https_proxy': 'https://%s@%s' % (user, url)}
+ proxy_handler = urllib2.ProxyHandler(proxies)
+
+ # install an opener to use the proxy
+ opener = urllib2.build_opener(proxy_handler)
+ urllib2.install_opener(opener)
diff --git a/lorrycontroller/readconf.py b/lorrycontroller/readconf.py
new file mode 100644
index 0000000..d060067
--- /dev/null
+++ b/lorrycontroller/readconf.py
@@ -0,0 +1,351 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import errno
+import glob
+import json
+import logging
+import os
+import re
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class LorryControllerConfParseError(Exception):
+
+ def __init__(self, filename, exc):
+ Exception.__init__(
+ self, 'ERROR reading %s: %s' % (filename, str(exc)))
+
+
+class ReadConfiguration(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/read-configuration'
+
+ DEFAULT_LORRY_TIMEOUT = 3600 # in seconds
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ self.get_confgit()
+
+ try:
+ conf_obj = self.read_config_file()
+ except LorryControllerConfParseError as e:
+ return str(e)
+
+ error = self.validate_config(conf_obj)
+ if error:
+ return 'ERROR: %s: %r' % (error, conf_obj)
+
+ self.fix_up_parsed_fields(conf_obj)
+
+ statedb = self.open_statedb()
+ with statedb:
+ lorries_to_remove = set(statedb.get_lorries_paths())
+ troves_to_remove = set(statedb.get_troves())
+
+ for section in conf_obj:
+ if not 'type' in section:
+ return 'ERROR: no type field in section'
+ if section['type'] == 'lorries':
+ added = self.add_matching_lorries_to_statedb(
+ statedb, section)
+ lorries_to_remove = lorries_to_remove.difference(added)
+ elif section['type'] in ('trove', 'troves'):
+ self.add_trove(statedb, section)
+ trovehost = section['trovehost']
+ if trovehost in troves_to_remove:
+ troves_to_remove.remove(trovehost)
+ lorries_to_remove = lorries_to_remove.difference(
+ statedb.get_lorries_for_trove(trovehost))
+ else:
+ logging.error(
+ 'Unknown section in configuration: %r', section)
+ return (
+ 'ERROR: Unknown section type in configuration: %r' %
+ section)
+
+ for path in lorries_to_remove:
+ statedb.remove_lorry(path)
+
+ for trovehost in troves_to_remove:
+ statedb.remove_trove(trovehost)
+ statedb.remove_lorries_for_trovehost(trovehost)
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Configuration has been updated.'
+
+ def get_confgit(self):
+ if self.app_settings['debug-real-confgit']:
+ confdir = self.app_settings['configuration-directory']
+ if not os.path.exists(confdir):
+ self.git_clone_confgit(confdir)
+ else:
+ self.git_pull_confgit(confdir)
+
+ def git_clone_confgit(self, confdir):
+ url = self.app_settings['confgit-url']
+ branch = self.app_settings['confgit-branch']
+ logging.info('Cloning %s to %s', url, confdir)
+ cliapp.runcmd(['git', 'clone', '-b', branch, url, confdir])
+
+ def git_pull_confgit(self, confdir):
+ logging.info('Updating CONFGIT in %s', confdir)
+ cliapp.runcmd(['git', 'pull'], cwd=confdir)
+
+ @property
+ def config_file_name(self):
+ return os.path.join(
+ self.app_settings['configuration-directory'],
+ 'lorry-controller.conf')
+
+ def read_config_file(self):
+ '''Read the configuration file, return as Python object.'''
+
+ filename = self.config_file_name
+ logging.debug('Reading configuration file %s', filename)
+
+ try:
+ with open(filename) as f:
+ return json.load(f)
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ logging.debug(
+ '%s: does not exist, returning empty config', filename)
+ return []
+ bottle.abort(500, 'Error reading %s: %s' % (filename, e))
+ except ValueError as e:
+ logging.error('Error parsing configuration: %s', e)
+ raise LorryControllerConfParseError(filename, e)
+
+ def validate_config(self, obj):
+ validator = LorryControllerConfValidator()
+ return validator.validate_config(obj)
+
+ def fix_up_parsed_fields(self, obj):
+ for item in obj:
+ item['interval'] = self.fix_up_interval(item.get('interval'))
+ item['ls-interval'] = self.fix_up_interval(item.get('ls-interval'))
+
+ def fix_up_interval(self, value):
+ default_interval = 86400 # 1 day
+ if not value:
+ return default_interval
+ m = re.match('(\d+)\s*(s|m|h|d)?', value, re.I)
+ if not m:
+ return default_value
+
+ number, factor = m.groups()
+ factors = {
+ 's': 1,
+ 'm': 60,
+ 'h': 60*60,
+ 'd': 60*60*24,
+ }
+ if factor is None:
+ factor = 's'
+ factor = factors.get(factor.lower(), 1)
+ return int(number) * factor
+
+ def add_matching_lorries_to_statedb(self, statedb, section):
+ logging.debug('Adding matching lorries to STATEDB')
+
+ added_paths = set()
+
+ filenames = self.find_lorry_files_for_section(section)
+ logging.debug('filenames=%r', filenames)
+ lorry_specs = []
+ for filename in sorted(filenames):
+ logging.debug('Reading .lorry: %s', filename)
+ for subpath, obj in self.get_valid_lorry_specs(filename):
+ self.add_refspecs_if_missing(obj)
+ lorry_specs.append((subpath, obj))
+
+ for subpath, obj in sorted(lorry_specs):
+ path = self.deduce_repo_path(section, subpath)
+ text = self.serialise_lorry_spec(path, obj)
+ interval = section['interval']
+ timeout = section.get(
+ 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT)
+
+ try:
+ old_lorry_info = statedb.get_lorry_info(path)
+ except lorrycontroller.LorryNotFoundError:
+ old_lorry_info = None
+
+ statedb.add_to_lorries(
+ path=path, text=text, from_trovehost='', from_path='',
+ interval=interval, timeout=timeout)
+
+ added_paths.add(path)
+
+ return added_paths
+
+ def find_lorry_files_for_section(self, section):
+ result = []
+ dirname = os.path.dirname(self.config_file_name)
+ for base_pattern in section['globs']:
+ pattern = os.path.join(dirname, base_pattern)
+ result.extend(glob.glob(pattern))
+ return result
+
+ def get_valid_lorry_specs(self, filename):
+ # We do some basic validation of the .lorry file and the Lorry
+ # specs contained within it. We silently ignore anything that
+ # doesn't look OK. We don't have a reasonable mechanism to
+ # communicate any problems to the user, but we do log them to
+ # the log file.
+
+ try:
+ with open(filename) as f:
+ obj = json.load(f)
+ except ValueError as e:
+ logging.error('JSON problem in %s', filename)
+ return []
+
+ if type(obj) != dict:
+ logging.error('%s: does not contain a dict', filename)
+ return []
+
+ items = []
+ for key in obj:
+ if type(obj[key]) != dict:
+ logging.error(
+ '%s: key %s does not map to a dict', filename, key)
+ continue
+
+ if 'type' not in obj[key]:
+ logging.error(
+ '%s: key %s does not have type field', filename, key)
+ continue
+
+ logging.debug('Happy with Lorry spec %r: %r', key, obj[key])
+ items.append((key, obj[key]))
+
+ return items
+
+ def add_refspecs_if_missing(self, obj):
+ if 'refspecs' not in obj:
+ obj['refspecs'] = [
+ '+refs/heads/*',
+ '+refs/tags/*',
+ ]
+
+ def deduce_repo_path(self, section, subpath):
+ return '%s/%s' % (section['prefix'], subpath)
+
+ def serialise_lorry_spec(self, path, obj):
+ new_obj = { path: obj }
+ return json.dumps(new_obj, indent=4)
+
+ def add_trove(self, statedb, section):
+ username = None
+ password = None
+ if 'auth' in section:
+ auth = section['auth']
+ username = auth.get('username')
+ password = auth.get('password')
+
+ statedb.add_trove(
+ trovehost=section['trovehost'],
+ protocol=section['protocol'],
+ username=username,
+ password=password,
+ lorry_interval=section['interval'],
+ lorry_timeout=section.get(
+ 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT),
+ ls_interval=section['ls-interval'],
+ prefixmap=json.dumps(section['prefixmap']),
+ ignore=json.dumps(section['ignore']))
+
+
+class ValidationError(Exception):
+
+ def __init__(self, msg):
+ Exception.__init__(self, msg)
+
+
+class LorryControllerConfValidator(object):
+
+ def validate_config(self, conf_obj):
+ try:
+ self._check_is_list(conf_obj)
+ self._check_is_list_of_dicts(conf_obj)
+
+ for section in conf_obj:
+ if 'type' not in section:
+ raise ValidationError(
+ 'section without type: %r' % section)
+ elif section['type'] in ('trove', 'troves'):
+ self._check_troves_section(section)
+ elif section['type'] == 'lorries':
+ self._check_lorries_section(section)
+ else:
+ raise ValidationError(
+ 'unknown section type %r' % section['type'])
+ except ValidationError as e:
+ return str(e)
+
+ return None
+
+ def _check_is_list(self, conf_obj):
+ if type(conf_obj) is not list:
+ raise ValidationError(
+ 'type %r is not a JSON list' % type(conf_obj))
+
+ def _check_is_list_of_dicts(self, conf_obj):
+ for item in conf_obj:
+ if type(item) is not dict:
+ raise ValidationError('all items must be dicts')
+
+ def _check_troves_section(self, section):
+ self._check_has_required_fields(
+ section,
+ ['trovehost', 'protocol', 'interval', 'ls-interval', 'prefixmap'])
+ self._check_protocol(section)
+ self._check_prefixmap(section)
+
+ def _check_protocol(self, section):
+ valid = ('ssh', 'http', 'https')
+ if section['protocol'] not in valid:
+ raise ValidationError(
+ 'protocol field has value "%s", but valid ones are %s' %
+ (section['protocol'], ', '.join(valid)))
+
+ def _check_prefixmap(self, section):
+ # FIXME: We should be checking the prefixmap for things like
+ # mapping to a prefix that starts with the local Trove ID, but
+ # since we don't have easy access to that, we don't do that
+ # yet. This should be fixed later.
+ pass
+
+ def _check_lorries_section(self, section):
+ self._check_has_required_fields(
+ section, ['interval', 'prefix', 'globs'])
+
+ def _check_has_required_fields(self, section, fields):
+ for field in fields:
+ if field not in section:
+ raise ValidationError(
+ 'mandatory field %s missing in section %r' %
+ (field, section))
diff --git a/lorrycontroller/removejob.py b/lorrycontroller/removejob.py
new file mode 100644
index 0000000..5de65ba
--- /dev/null
+++ b/lorrycontroller/removejob.py
@@ -0,0 +1,44 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class RemoveJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/remove-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ job_id = bottle.request.forms.job_id
+
+ statedb = self.open_statedb()
+ with statedb:
+ try:
+ statedb.find_lorry_running_job(job_id)
+ except lorrycontroller.WrongNumberLorriesRunningJob:
+ pass
+ else:
+ return { 'job_id': None, 'reason': 'still running' }
+
+ statedb.remove_job(job_id)
+ return { 'job_id': job_id }
diff --git a/lorrycontroller/route.py b/lorrycontroller/route.py
new file mode 100644
index 0000000..91a406e
--- /dev/null
+++ b/lorrycontroller/route.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import lorrycontroller
+
+
+class LorryControllerRoute(object):
+
+ '''Base class for Lorry Controller HTTP API routes.
+
+ A route is an HTTP request that the Bottle web application
+ recognises as satisfied by a particular callback. To make it
+ easier to implement them and get them added automagically to a
+ Bottle instance, we define the callbacks as subclasses of this
+ base class.
+
+ Subclasses MUST define the attributes ``http_method`` and
+ ``path``, which are given the bottle.Bottle.route method as the
+ arguments ``method`` and ``path``, respectively.
+
+ '''
+
+ def __init__(self, app_settings, templates):
+ self.app_settings = app_settings
+ self._templates = templates
+ self._statedb = None
+
+ def open_statedb(self):
+ return lorrycontroller.StateDB(self.app_settings['statedb'])
+
+ def run(self, **kwargs):
+ raise NotImplementedError()
diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py
new file mode 100644
index 0000000..6f73ed6
--- /dev/null
+++ b/lorrycontroller/showjob.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class JobShower(object):
+
+ def get_job_as_json(self, statedb, job_id):
+ path = statedb.get_job_path(job_id)
+ exit = statedb.get_job_exit(job_id)
+ output = statedb.get_job_output(job_id)
+ started, ended = statedb.get_job_started_and_ended(job_id)
+ disk_usage = statedb.get_job_disk_usage(job_id)
+ now = statedb.get_current_time()
+
+ return {
+ 'job_id': job_id,
+ 'host': statedb.get_job_minion_host(job_id),
+ 'pid': statedb.get_job_minion_pid(job_id),
+ 'path': statedb.get_job_path(job_id),
+ 'exit': 'no' if exit is None else exit,
+ 'disk_usage': disk_usage,
+ 'disk_usage_nice': self.format_bytesize(disk_usage),
+ 'output': output,
+ 'job_started': self.format_time(started),
+ 'job_ended': self.format_time(ended),
+ 'timestamp': self.format_time(now),
+ }
+
+ def format_time(self, timestamp):
+ return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(timestamp))
+
+ def format_bytesize(self, num_bytes):
+ if num_bytes is None:
+ return 'unknown'
+ mebibyte = 2**20
+ return '%.1f MiB' % (float(num_bytes) / float(mebibyte))
+
+
+class ShowJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/job/<job_id:int>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ job_id = int(kwargs['job_id'])
+
+ statedb = self.open_statedb()
+ return JobShower().get_job_as_json(statedb, job_id)
+
+
+class ShowJobHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/job-html/<job_id:int>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ job_id = int(kwargs['job_id'])
+
+ statedb = self.open_statedb()
+ variables = JobShower().get_job_as_json(statedb, job_id)
+ return bottle.template(self._templates['job'], **variables)
diff --git a/lorrycontroller/showlorry.py b/lorrycontroller/showlorry.py
new file mode 100644
index 0000000..fc336a5
--- /dev/null
+++ b/lorrycontroller/showlorry.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import logging
+import time
+import urlparse
+
+import bottle
+
+import lorrycontroller
+
+
+class ShowLorry(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/lorry/<path:path>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ try:
+ return statedb.get_lorry_info(kwargs['path'])
+ except lorrycontroller.LorryNotFoundError as e:
+ bottle.abort(404, str(e))
+
+
+class ShowLorryHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/lorry-html/<path:path>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ try:
+ lorry_info = statedb.get_lorry_info(kwargs['path'])
+ except lorrycontroller.LorryNotFoundError as e:
+ bottle.abort(404, str(e))
+
+ renderer = lorrycontroller.StatusRenderer()
+ shower = lorrycontroller.JobShower()
+
+ lorry_obj = json.loads(lorry_info['text']).values()[0]
+ lorry_info['url'] = lorry_obj['url']
+
+ lorry_info['interval_nice'] = renderer.format_secs_nicely(
+ lorry_info['interval'])
+
+ lorry_info['last_run_nice'] = time.strftime(
+ '%Y-%m-%d %H:%M:%S UTC',
+ time.gmtime(lorry_info['last_run']))
+
+ lorry_info['disk_usage_nice'] = shower.format_bytesize(
+ lorry_info['disk_usage'])
+
+ now = statedb.get_current_time()
+
+ due = lorry_info['last_run'] + lorry_info['interval']
+ lorry_info['due_nice'] = renderer.format_due_nicely(due, now)
+
+ timestamp = time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now))
+
+ parts = urlparse.urlparse(bottle.request.url)
+ host, port = parts.netloc.split(':', 1)
+ http_server_root = urlparse.urlunparse(
+ (parts.scheme, host, '', '', '', ''))
+
+ return bottle.template(
+ self._templates['lorry'],
+ http_server_root=http_server_root,
+ lorry=lorry_info,
+ timestamp=timestamp)
diff --git a/lorrycontroller/startstopqueue.py b/lorrycontroller/startstopqueue.py
new file mode 100644
index 0000000..fe36a43
--- /dev/null
+++ b/lorrycontroller/startstopqueue.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StartQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/start-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_running_queue(True)
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Queue set to run'
+
+
+class StopQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/stop-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_running_queue(False)
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Queue set to not run'
diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py
new file mode 100644
index 0000000..1f18189
--- /dev/null
+++ b/lorrycontroller/statedb.py
@@ -0,0 +1,581 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import sqlite3
+import time
+
+import lorrycontroller
+
+
+class LorryNotFoundError(Exception):
+
+ def __init__(self, path):
+ Exception.__init__(
+ self, 'Lorry with path %r not found in STATEDB' % path)
+
+
+class WrongNumberLorriesRunningJob(Exception):
+
+ def __init__(self, job_id, row_count):
+ Exception.__init__(
+ self, 'STATEDB has %d Lorry specs running job %r, should be 1' %
+ (row_count, job_id))
+
+
+class TroveNotFoundError(Exception):
+
+ def __init__(self, trovehost):
+ Exception.__init__(
+ self, 'Trove %s not known in STATEDB' % trovehost)
+
+
+class StateDB(object):
+
+ '''A wrapper around raw Sqlite for STATEDB.'''
+
+ def __init__(self, filename):
+ logging.debug('Creating StateDB instance for %r', filename)
+ self._filename = filename
+ self._conn = None
+ self._transaction_started = None
+
+ def _open(self):
+ self.lorries_fields = [
+ ('path', 'TEXT PRIMARY KEY'),
+ ('text', 'TEXT'),
+ ('from_trovehost', 'TEXT'),
+ ('from_path', 'TEXT'),
+ ('running_job', 'INT'),
+ ('kill_job', 'INT'),
+ ('last_run', 'INT'),
+ ('interval', 'INT'),
+ ('lorry_timeout', 'INT'),
+ ('disk_usage', 'INT'),
+ ]
+ self.lorries_booleans = [
+ 'kill_job',
+ ]
+
+ if self._conn is None:
+ existed = os.path.exists(self._filename)
+ logging.debug(
+ 'Connecting to %r (existed=%r)', self._filename, existed)
+ self._conn = sqlite3.connect(
+ self._filename,
+ timeout=100000,
+ isolation_level="IMMEDIATE")
+ logging.debug('New connection is %r', self._conn)
+ if not existed:
+ self._initialise_tables()
+
+ def _initialise_tables(self):
+ logging.debug('Initialising tables in database')
+ c = self._conn.cursor()
+
+ # Table for storing database schema version.
+ c.execute('CREATE TABLE version (running INT)')
+ c.execute('INSERT INTO version VALUES (1)')
+
+ # Table for holding the "are we scheduling jobs" value.
+ c.execute('CREATE TABLE running_queue (running INT)')
+ c.execute('INSERT INTO running_queue VALUES (1)')
+
+ # Table for known remote Troves.
+
+ c.execute(
+ 'CREATE TABLE troves ('
+ 'trovehost TEXT PRIMARY KEY, '
+ 'protocol TEXT, '
+ 'username TEXT, '
+ 'password TEXT, '
+ 'lorry_interval INT, '
+ 'lorry_timeout INT, '
+ 'ls_interval INT, '
+ 'ls_last_run INT, '
+ 'prefixmap TEXT, '
+ 'ignore TEXT '
+ ')')
+
+ # Table for all the known lorries (the "run queue").
+
+ fields_sql = ', '.join(
+ '%s %s' % (name, info) for name, info in self.lorries_fields
+ )
+
+ c.execute('CREATE TABLE lorries (%s)' % fields_sql)
+
+ # Table for the next available job id.
+ c.execute('CREATE TABLE next_job_id (job_id INT)')
+ c.execute('INSERT INTO next_job_id VALUES (1)')
+
+ # Table of all jobs (running or not), and their info.
+ c.execute(
+ 'CREATE TABLE jobs ('
+ 'job_id INT PRIMARY KEY, '
+ 'host TEXT, '
+ 'pid INT, '
+ 'started INT, '
+ 'ended INT, '
+ 'path TEXT, '
+ 'exit TEXT, '
+ 'disk_usage INT, '
+ 'output TEXT)')
+
+ # Table for holding max number of jobs running at once. If no
+ # rows, there is no limit. Otherwise, there is exactly one
+ # row.
+ c.execute('CREATE TABLE max_jobs (max_jobs INT)')
+
+ # A table to give the current pretended time, if one is set.
+ # This table is either empty, in which case time.time() is
+ # used, or has one row, which is used for the current time.
+ c.execute('CREATE TABLE time (now INT)')
+
+ # Stupid table we can always write to to trigger the start of
+ # a transaction.
+ c.execute('CREATE TABLE stupid (value INT)')
+
+ # Done.
+ self._conn.commit()
+ logging.debug('Finished initialising tables in STATEDB')
+
+ @property
+ def in_transaction(self):
+ return self._transaction_started is not None
+
+ def __enter__(self):
+ logging.debug('Entering context manager (%r)', self)
+ assert not self.in_transaction
+ self._transaction_started = time.time()
+ self._open()
+ c = self._conn.cursor()
+ c.execute('INSERT INTO stupid VALUES (1)')
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ logging.debug('Exiting context manager (%r)', self)
+ assert self.in_transaction
+ if exc_type is None:
+ logging.debug(
+ 'Committing transaction in __exit__ (%r)', self._conn)
+ c = self._conn.cursor()
+ c.execute('DELETE FROM stupid')
+ self._conn.commit()
+ else:
+ logging.error(
+ 'Rolling back transaction in __exit__ (%r)',
+ self._conn,
+ exc_info=(exc_type, exc_val, exc_tb))
+ self._conn.rollback()
+ self._conn.close()
+ self._conn = None
+ logging.debug(
+ 'Transaction duration: %r',
+ time.time() - self._transaction_started)
+ self._transaction_started = None
+ return False
+
+ def get_cursor(self):
+ '''Return a new cursor.'''
+ self._open()
+ return self._conn.cursor()
+
+ def get_running_queue(self):
+ c = self.get_cursor()
+ for (running,) in c.execute('SELECT running FROM running_queue'):
+ return bool(running)
+
+ def set_running_queue(self, new_status):
+ logging.debug('StateDB.set_running_queue(%r) called', new_status)
+ assert self.in_transaction
+ if new_status:
+ new_value = 1
+ else:
+ new_value = 0
+ self.get_cursor().execute(
+ 'UPDATE running_queue SET running = ?', str(new_value))
+
+ def get_trove_info(self, trovehost):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT protocol, username, password, lorry_interval, '
+ 'lorry_timeout, ls_interval, ls_last_run, '
+ 'prefixmap, ignore '
+ 'FROM troves WHERE trovehost IS ?',
+ (trovehost,))
+ row = c.fetchone()
+ if row is None:
+ raise lorrycontroller.TroveNotFoundError(trovehost)
+ return {
+ 'trovehost': trovehost,
+ 'protocol': row[0],
+ 'username': row[1],
+ 'password': row[2],
+ 'lorry_interval': row[3],
+ 'lorry_timeout': row[4],
+ 'ls_interval': row[5],
+ 'ls_last_run': row[6],
+ 'prefixmap': row[7],
+ 'ignore': row[8],
+ }
+
+ def add_trove(self, trovehost=None, protocol=None, username=None,
+ password=None, lorry_interval=None,
+ lorry_timeout=None, ls_interval=None,
+ prefixmap=None, ignore=None):
+ logging.debug(
+ 'StateDB.add_trove(%r,%r,%r,%r,%r,%r) called',
+ trovehost, lorry_interval, lorry_timeout, ls_interval,
+ prefixmap, ignore)
+
+ assert trovehost is not None
+ assert protocol is not None
+ assert lorry_interval is not None
+ assert lorry_timeout is not None
+ assert ls_interval is not None
+ assert prefixmap is not None
+ assert ignore is not None
+ assert self.in_transaction
+
+ try:
+ self.get_trove_info(trovehost)
+ except lorrycontroller.TroveNotFoundError:
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO troves '
+ '(trovehost, protocol, username, password, '
+ 'lorry_interval, lorry_timeout, '
+ 'ls_interval, ls_last_run, '
+ 'prefixmap, ignore) '
+ 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ (trovehost, protocol, username, password,
+ lorry_interval, lorry_timeout, ls_interval, 0,
+ prefixmap, ignore))
+ else:
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE troves '
+ 'SET lorry_interval=?, lorry_timeout=?, ls_interval=?, '
+ 'prefixmap=?, ignore=?, protocol=? '
+ 'WHERE trovehost IS ?',
+ (lorry_interval, lorry_timeout, ls_interval, prefixmap,
+ ignore, protocol, trovehost))
+
+ def remove_trove(self, trovehost):
+ logging.debug('StateDB.remove_trove(%r) called', trovehost)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM troves WHERE trovehost=?', (trovehost,))
+
+ def get_troves(self):
+ c = self.get_cursor()
+ c.execute('SELECT trovehost FROM troves')
+ return [row[0] for row in c.fetchall()]
+
+ def set_trove_ls_last_run(self, trovehost, ls_last_run):
+ logging.debug(
+ 'StateDB.set_trove_ls_last_run(%r,%r) called',
+ trovehost, ls_last_run)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE troves SET ls_last_run=? WHERE trovehost=?',
+ (ls_last_run, trovehost))
+
+ def make_lorry_info_from_row(self, row):
+ result = dict((t[0], row[i]) for i, t in enumerate(self.lorries_fields))
+ for field in self.lorries_booleans:
+ result[field] = bool(result[field])
+ return result
+
+ def get_lorry_info(self, path):
+ c = self.get_cursor()
+ c.execute('SELECT * FROM lorries WHERE path IS ?', (path,))
+ row = c.fetchone()
+ if row is None:
+ raise lorrycontroller.LorryNotFoundError(path)
+ return self.make_lorry_info_from_row(row)
+
+ def get_all_lorries_info(self):
+ c = self.get_cursor()
+ c.execute('SELECT * FROM lorries ORDER BY (last_run + interval)')
+ return [self.make_lorry_info_from_row(row) for row in c.fetchall()]
+
+ def get_lorries_paths(self):
+ c = self.get_cursor()
+ return [
+ row[0]
+ for row in c.execute(
+ 'SELECT path FROM lorries ORDER BY (last_run + interval)')]
+
+ def get_lorries_for_trove(self, trovehost):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM lorries WHERE from_trovehost IS ?', (trovehost,))
+ return [row[0] for row in c.fetchall()]
+
+ def add_to_lorries(self, path=None, text=None, from_trovehost=None,
+ from_path=None, interval=None, timeout=None):
+ logging.debug(
+ 'StateDB.add_to_lorries('
+ 'path=%r, text=%r, from_trovehost=%r, interval=%s, '
+ 'timeout=%r called',
+ path,
+ text,
+ from_trovehost,
+ interval,
+ timeout)
+
+ assert path is not None
+ assert text is not None
+ assert from_trovehost is not None
+ assert from_path is not None
+ assert interval is not None
+ assert timeout is not None
+ assert self.in_transaction
+
+ try:
+ self.get_lorry_info(path)
+ except lorrycontroller.LorryNotFoundError:
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO lorries '
+ '(path, text, from_trovehost, from_path, last_run, interval, '
+ 'lorry_timeout, running_job, kill_job) '
+ 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ (path, text, from_trovehost, from_path, 0,
+ interval, timeout, None, 0))
+ else:
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries '
+ 'SET text=?, from_trovehost=?, from_path=?, interval=?, '
+ 'lorry_timeout=? '
+ 'WHERE path IS ?',
+ (text, from_trovehost, from_path, interval, timeout, path))
+
+ def remove_lorry(self, path):
+ logging.debug('StateDB.remove_lorry(%r) called', path)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM lorries WHERE path IS ?', (path,))
+
+ def remove_lorries_for_trovehost(self, trovehost):
+ logging.debug(
+ 'StateDB.remove_lorries_for_trovest(%r) called', trovehost)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM lorries WHERE from_trovehost IS ?', (trovehost,))
+
+ def set_running_job(self, path, job_id):
+ logging.debug(
+ 'StateDB.set_running_job(%r, %r) called', path, job_id)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET running_job=? WHERE path=?',
+ (job_id, path))
+
+ def find_lorry_running_job(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM lorries WHERE running_job IS ?',
+ (job_id,))
+ rows = c.fetchall()
+ if len(rows) != 1:
+ raise lorrycontroller.WrongNumberLorriesRunningJob(job_id, len(rows))
+ return rows[0][0]
+
+ def get_running_jobs(self):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT running_job FROM lorries WHERE running_job IS NOT NULL')
+ return [row[0] for row in c.fetchall()]
+
+ def set_kill_job(self, path, value):
+ logging.debug('StateDB.set_kill_job(%r, %r) called', path, value)
+ assert self.in_transaction
+ if value:
+ value = 1
+ else:
+ value = 0
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET kill_job=? WHERE path=?',
+ (value, path))
+
+ def set_lorry_last_run(self, path, last_run):
+ logging.debug(
+ 'StateDB.set_lorry_last_run(%r, %r) called', path, last_run)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET last_run=? WHERE path=?',
+ (last_run, path))
+
+ def set_lorry_disk_usage(self, path, disk_usage):
+ logging.debug(
+ 'StateDB.set_lorry_disk_usage(%r, %r) called', path, disk_usage)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET disk_usage=? WHERE path=?',
+ (disk_usage, path))
+
+ def get_next_job_id(self):
+ logging.debug('StateDB.get_next_job_id called')
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('SELECT job_id FROM next_job_id')
+ row = c.fetchone()
+ job_id = row[0]
+ c.execute('UPDATE next_job_id SET job_id=?', (job_id + 1,))
+ return job_id
+
+ def get_job_ids(self):
+ c = self.get_cursor()
+ c.execute('SELECT job_id FROM jobs')
+ return [row[0] for row in c.fetchall()]
+
+ def add_new_job(self, job_id, host, pid, path, started):
+ logging.debug(
+ 'StateDB.add_new_job(%r, %r, %r, %r, %r) called',
+ job_id, host, pid, path, started)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO jobs (job_id, host, pid, path, started) '
+ 'VALUES (?, ?, ?, ?, ?)',
+ (job_id, host, pid, path, started))
+
+ def get_job_minion_host(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT host FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_minion_pid(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT pid FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_path(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_started_and_ended(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT started, ended FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0], row[1]
+
+ def get_job_exit(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT exit FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def set_job_exit(self, job_id, exit, ended, disk_usage):
+ logging.debug(
+ 'StateDB.set_job_exit(%r, %r, %r, %r) called',
+ job_id, exit, ended, disk_usage)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE jobs SET exit=?, ended=?, disk_usage=? '
+ 'WHERE job_id IS ?',
+ (exit, ended, disk_usage, job_id))
+
+ def get_job_disk_usage(self, job_id):
+ c = self.get_cursor()
+ c.execute('SELECT disk_usage FROM jobs WHERE job_id IS ?', (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_output(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT output FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def append_to_job_output(self, job_id, more_output):
+ logging.debug('StateDB.append_to_job_output(%r,..) called', job_id)
+ assert self.in_transaction
+
+ output = self.get_job_output(job_id) or ''
+
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE jobs SET output=? WHERE job_id=?',
+ (output + more_output, job_id))
+
+ def remove_job(self, job_id):
+ logging.debug('StateDB.append_to_job_output(%r,..) called', job_id)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM jobs WHERE job_id = ?', (job_id,))
+
+ def set_pretend_time(self, now):
+ logging.debug('StateDB.set_pretend_time(%r) called', now)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM time')
+ c.execute('INSERT INTO time (now) VALUES (?)', (int(now),))
+
+ def get_current_time(self):
+ c = self.get_cursor()
+ c.execute('SELECT now FROM time')
+ row = c.fetchone()
+ if row:
+ return row[0]
+ else:
+ return time.time()
+
+ def get_max_jobs(self):
+ c = self.get_cursor()
+ c.execute('SELECT max_jobs FROM max_jobs')
+ row = c.fetchone()
+ if row:
+ logging.info('returning max_jobs as %r', row[0])
+ return row[0]
+ logging.info('returning max_jobs as None')
+ return None
+
+ def set_max_jobs(self, max_jobs):
+ logging.debug('StateDB.set_max_jobs(%r) called', max_jobs)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM max_jobs')
+ if max_jobs is not None:
+ c.execute(
+ 'INSERT INTO max_jobs (max_jobs) VALUES (?)', (max_jobs,))
diff --git a/lorrycontroller/static.py b/lorrycontroller/static.py
new file mode 100644
index 0000000..a8ba938
--- /dev/null
+++ b/lorrycontroller/static.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StaticFile(lorrycontroller.LorryControllerRoute):
+
+ # Note that the path below must match what lighttpd (running on a
+ # different port than us) would accept.
+
+ http_method = 'GET'
+ path = '/lc-static/<filename>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ return bottle.static_file(
+ kwargs['filename'],
+ self.app_settings['static-files'])
diff --git a/lorrycontroller/status.py b/lorrycontroller/status.py
new file mode 100644
index 0000000..5e011d5
--- /dev/null
+++ b/lorrycontroller/status.py
@@ -0,0 +1,162 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class StatusRenderer(object):
+
+ '''Helper class for rendering service status as JSON/HTML'''
+
+ def get_status_as_dict(self, statedb, work_directory):
+ now = statedb.get_current_time()
+ status = {
+ 'running_queue': statedb.get_running_queue(),
+ 'timestamp':
+ time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now)),
+ 'run_queue': self.get_run_queue(statedb),
+ 'troves': self.get_troves(statedb),
+ 'warning_msg': '',
+ 'max_jobs': self.get_max_jobs(statedb),
+ }
+ status.update(self.get_free_disk_space(work_directory))
+ return status
+
+ def render_status_as_html(self, template, status):
+ return bottle.template(template, **status)
+
+ def write_status_as_html(self, template, status, filename):
+ html = self.render_status_as_html(template, status)
+ try:
+ with open(filename, 'w') as f:
+ f.write(html)
+ except (OSError, IOError) as e:
+ status['warning_msg'] = (
+ 'ERROR WRITING STATUS HTML TO DISK: %s' % str(e))
+
+ def get_free_disk_space(self, dirname):
+ result = os.statvfs(dirname)
+ free_bytes = result.f_bavail * result.f_bsize
+ return {
+ 'disk_free': free_bytes,
+ 'disk_free_mib': free_bytes / 1024**2,
+ 'disk_free_gib': free_bytes / 1024**3,
+ }
+
+ def get_run_queue(self, statedb):
+ lorries = statedb.get_all_lorries_info()
+ now = statedb.get_current_time()
+ for lorry in lorries:
+ due = lorry['last_run'] + lorry['interval']
+ lorry['interval_nice'] = self.format_secs_nicely(lorry['interval'])
+ lorry['due_nice'] = self.format_due_nicely(due, now)
+ return lorries
+
+ def format_due_nicely(self, due, now):
+ now = int(now)
+ if due <= now:
+ return 'now'
+ else:
+ nice = self.format_secs_nicely(due - now)
+ return 'in %s' % nice
+
+ def format_secs_nicely(self, secs):
+ if secs <= 0:
+ return 'now'
+
+ result = []
+
+ hours = secs / 3600
+ secs %= 3600
+ mins = secs / 60
+ secs %= 60
+
+ if hours > 0:
+ result.append('%d h' % hours)
+ if mins > 0:
+ result.append('%d min' % mins)
+ elif mins > 0:
+ result.append('%d min' % mins)
+ if secs > 0:
+ result.append('%d s' % secs)
+ else:
+ result.append('%d s' % secs)
+
+ return ' '.join(result)
+
+ def get_troves(self, statedb):
+ troves = []
+ for trovehost in statedb.get_troves():
+ trove_info = statedb.get_trove_info(trovehost)
+
+ trove_info['ls_interval_nice'] = self.format_secs_nicely(
+ trove_info['ls_interval'])
+
+ ls_due = trove_info['ls_last_run'] + trove_info['ls_interval']
+ now = int(statedb.get_current_time())
+ trove_info['ls_due_nice'] = self.format_due_nicely(ls_due, now)
+
+ troves.append(trove_info)
+ return troves
+
+ def get_max_jobs(self, statedb):
+ max_jobs = statedb.get_max_jobs()
+ if max_jobs is None:
+ return 'unlimited'
+ return max_jobs
+
+
+class Status(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/status'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ renderer = StatusRenderer()
+ statedb = self.open_statedb()
+ status = renderer.get_status_as_dict(
+ statedb, self.app_settings['statedb'])
+ renderer.write_status_as_html(
+ self._templates['status'],
+ status,
+ self.app_settings['status-html'])
+ return status
+
+
+class StatusHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/status-html'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ renderer = StatusRenderer()
+ statedb = self.open_statedb()
+ status = renderer.get_status_as_dict(
+ statedb, self.app_settings['statedb'])
+ renderer.write_status_as_html(
+ self._templates['status'],
+ status,
+ self.app_settings['status-html'])
+ return renderer.render_status_as_html(
+ self._templates['status'], status)
diff --git a/lorrycontroller/stopjob.py b/lorrycontroller/stopjob.py
new file mode 100644
index 0000000..947f733
--- /dev/null
+++ b/lorrycontroller/stopjob.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StopJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/stop-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ job_id = bottle.request.forms.job_id
+ try:
+ path = statedb.find_lorry_running_job(job_id)
+ except lorrycontroller.WrongNumberLorriesRunningJob:
+ logging.warning(
+ "Tried to kill job %s which isn't running" % job_id)
+ bottle.abort(409, 'Job is not currently running')
+ statedb.set_kill_job(path, True)
+ return statedb.get_lorry_info(path)
diff --git a/lorrycontroller/workingstate.py b/lorrycontroller/workingstate.py
deleted file mode 100644
index b8dc751..0000000
--- a/lorrycontroller/workingstate.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright (C) 2013 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-import json
-import os
-import logging
-import string
-
-class LorryFileRunner(object):
- def __init__(self, mgr, lorryname):
- self.mgr = mgr
- self.lorryname = lorryname
- self.lorryfile = os.path.join(self.mgr.workdir,
- self._esc(lorryname) + ".lorry")
-
- def _esc(self, name):
- valid_chars = string.digits + string.letters + '%_'
- transl = lambda x: x if x in valid_chars else '_'
- return ''.join([transl(x) for x in name])
-
- def __enter__(self):
- lorry_obj = { self.lorryname:
- self.mgr.lorry_state[self.lorryname]['lorry'] }
- with open(self.lorryfile, "w") as fh:
- json.dump(lorry_obj, fh)
- fh.write("\n")
- return self
-
- def __exit__(self, exctype, excvalue, exctraceback):
- os.unlink(self.lorryfile)
-
- def run_lorry(self, *args):
- cmdargs = list(args)
- cmdargs.append(self.lorryfile)
- conf_uuid = self.mgr.lorry_state[self.lorryname]['conf']
- conf = self.mgr.app.conf.configs[conf_uuid]
- cmdargs.append("--tarball=%s" % conf['tarball'])
- exit, out, err = self.mgr.app.maybe_runcmd(cmdargs)
- if exit == 0:
- logging.debug("Lorry of %s succeeded: %s" % (self.lorryname, out))
- self.mgr.lorry_state[self.lorryname]['result'] = "OK"
- else:
- logging.warn("Lorry of %s failed: %s" % (self.lorryname, err))
- self.mgr.lorry_state[self.lorryname]['result'] = err
-
-class WorkingStateManager(object):
- '''Manage the working state of lorry-controller'''
-
- def __init__(self, app):
- self.app = app
- self.workdir = os.path.join(self.app.settings['work-area'], 'work')
-
- def __enter__(self):
- self._load_state()
- return self
-
- def __exit__(self, exctype, excvalue, exctraceback):
- self.purge_dead_troves()
- if not self.app.settings['dry-run']:
- self.save_state()
- else:
- logging.debug("DRY-RUN: Not saving state again")
-
- def purge_dead_troves(self):
- old_trove_count = len(self.trove_state.keys())
- all_troves = self.trove_state
- self.trove_state = {}
- new_trove_count = 0
- for uuid, trove in all_troves.iteritems():
- self.trove_state[uuid] = trove
- new_trove_count += 1
- if old_trove_count != new_trove_count:
- trove_diff = old_trove_count - new_trove_count
- logging.info("Purged %d dead trove entr%s from the state file" % (
- trove_diff, ("y" if trove_diff == 1 else "ies")))
-
- def _load_state(self):
- self.lorry_state_file = os.path.join(self.workdir,
- "last-lorry-state.json")
- self.trove_state_file = os.path.join(self.workdir,
- "last-trove-state.json")
- if os.path.exists(self.lorry_state_file):
- logging.info("Loading lorry state file: %s" %
- self.lorry_state_file)
- with open(self.lorry_state_file, "r") as fh:
- self.lorry_state = json.load(fh)
- else:
- self.lorry_state = dict()
-
- if os.path.exists(self.trove_state_file):
- logging.info("Loading trove state file: %s" %
- self.trove_state_file)
- with open(self.trove_state_file, "r") as fh:
- self.trove_state = json.load(fh)
- else:
- self.trove_state = dict()
-
- def save_state(self):
- logging.info("Serialising lorry state: %s" % self.lorry_state_file)
- with open(self.lorry_state_file, "w") as fh:
- json.dump(self.lorry_state, fh, sort_keys=True, indent=4)
- fh.write("\n")
- logging.info("Serialising trove state: %s" % self.trove_state_file)
- with open(self.trove_state_file, "w") as fh:
- json.dump(self.trove_state, fh, sort_keys=True, indent=4)
- fh.write("\n")
-
- def get_trove(self, troveuuid):
- if troveuuid not in self.trove_state:
- self.trove_state[troveuuid] = {}
- return self.trove_state[troveuuid]
-
- def runner(self, lorryname):
- return LorryFileRunner(self, lorryname)
diff --git a/setup.py b/setup.py
index b27b9d5..e214c33 100644
--- a/setup.py
+++ b/setup.py
@@ -1,19 +1,21 @@
#!/usr/bin/python
#
-# Copyright (C) 2012 Codethink Limited
+# Copyright (C) 2012-2014 Codethink Limited
from distutils.core import setup
+import glob
setup(name='lorry-controller',
- description='FIXME',
- long_description='''\
-FIXME
-''',
- author='Daniel Silverstone',
- author_email='daniel.silverstne@codethink.co.uk',
+ description='Run Lorry on remote repositories in a controller manner',
+ author='Codethink Ltd',
+ author_email='baserock-dev@baserock.org',
url='http://www.baserock.com/',
- scripts=['lorry-controller'],
+ scripts=['lorry-controller-webapp', 'lorry-controller-minion'],
packages=['lorrycontroller'],
+ data_files=[
+ ('share/lorry-controller/templates', glob.glob('templates/*')),
+ ('share/lorry-controller/static', glob.glob('static/*')),
+ ],
)
diff --git a/static/style.css b/static/style.css
new file mode 100644
index 0000000..8a6937d
--- /dev/null
+++ b/static/style.css
@@ -0,0 +1,18 @@
+table {
+ border: 1px solid black;
+}
+
+th, td {
+ padding-right: 2em;
+}
+
+th {
+ font-weight: bold;
+ text-align: left;
+}
+
+td {
+ font-family: monospace;
+ border-top: 1px solid black;
+ text-align: left;
+}
diff --git a/templates/job.tpl b/templates/job.tpl
new file mode 100644
index 0000000..33fe909
--- /dev/null
+++ b/templates/job.tpl
@@ -0,0 +1,20 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller: Job {{job_id}}</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+<h1>Status of job {{job_id}}</h1>
+<p>Path of git repo: <code>{{path}}</code></p>
+<p>Started: {{job_started}}</p>
+<p>Ended: {{job_ended}}</p>
+<p>MINION: <code>{{host}}:{{pid}}</code></p>
+<p>Exit code: <code>{{exit}}</code></p>
+<p>Lorry disk usage (after job's finished): {{disk_usage_nice}}</p>
+<p>Output:</p>
+<pre>{{output}}</pre>
+<hr />
+<p>Updated: {{timestamp}}</p>
+ </body>
+</html>
diff --git a/templates/list-jobs.tpl b/templates/list-jobs.tpl
new file mode 100644
index 0000000..1d530aa
--- /dev/null
+++ b/templates/list-jobs.tpl
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller: ALL the jobs</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <h1>ALL the jobs</h2>
+
+<table>
+<tr>
+<th>Job ID</th>
+<th>path</th>
+<th>exit?</th>
+</tr>
+% for job in job_infos:
+<tr>
+<td><a href="/1.0/job-html/{{job['job_id']}}">{{job['job_id']}}</a></td>
+<td><a href="/1.0/lorry-html/{{job['path']}}">{{job['path']}}</a></td>
+<td>{{job['exit']}}</td>
+</tr>
+% end
+</table>
+
+ <hr />
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/templates/lorry.tpl b/templates/lorry.tpl
new file mode 100644
index 0000000..fad85cd
--- /dev/null
+++ b/templates/lorry.tpl
@@ -0,0 +1,44 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry {{lorry['path']}}</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <h1>Lorry {{lorry['path']}}</h1>
+
+<table>
+
+<tr> <th>Path</th>
+ <td><a href="{{http_server_root}}/cgi-bin/cgit.cgi/{{lorry['path']}}.git/">{{lorry['path']}}</a></td> </tr>
+<tr> <th>URL</th> <td>{{lorry['url']}}</td> </tr>
+<tr> <th>Interval</th> <td>{{lorry['interval_nice']}} ({{lorry['interval']}} s)</td> </tr>
+<tr> <th>Last run</th> <td>{{lorry['last_run_nice']}}</td> </tr>
+<tr> <th>Due</th> <td>{{lorry['due_nice']}}</td> </tr>
+<tr> <th>From Trove</th> <td>{{lorry['from_trovehost']}}</td> </tr>
+<tr> <th>Disk usage</th> <td>{{lorry['disk_usage_nice']}}</td> </tr>
+
+<tr> <th>Job?</th>
+% if lorry['running_job']:
+<td><a href="/1.0/job/{{lorry['running_job']}}">{{lorry['running_job']}}</a></td>
+% else:
+<td></td>
+% end
+</tr>
+
+</table>
+
+<h2>Full Lorry text</h2>
+
+<blockquote>
+<pre>{{lorry['text']}}</pre>
+</blockquote>
+
+ <hr />
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/templates/status.tpl b/templates/status.tpl
new file mode 100644
index 0000000..e583883
--- /dev/null
+++ b/templates/status.tpl
@@ -0,0 +1,113 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller status</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <p>{{warning_msg}}</p>
+
+ <h1>Status of Lorry Controller</h1>
+
+% if running_queue:
+<form method="POST" action="/1.0/stop-queue">
+ <p>New jobs are allowed.
+ <input type="submit" name="submit" value="Don't allow new jobs" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+% else:
+<form method="POST" action="/1.0/start-queue">
+ <p>New jobs are NOT allowed.
+ <input type="submit" name="submit" value="Allow new jobs" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+% end
+
+<form method="POST" action="/1.0/read-configuration">
+ <p>
+ <input type="submit" name="submit" value="Re-read configuration" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+
+<p>Maximum number of jobs: {{max_jobs}}.</p>
+
+ <p>Free disk space: {{disk_free_gib}} GiB.</p>
+
+<h2>Remote Troves</h2>
+
+<table>
+<tr>
+<th>Trove host</th>
+<th>Due for re-scan of remote repositories</th>
+</tr>
+% for trove_info in troves:
+<tr>
+<td>{{trove_info['trovehost']}}</td>
+<td>{{trove_info['ls_due_nice']}}</td>
+</tr>
+% end
+</table>
+
+ <h2>Currently running jobs</h2>
+
+% if len(run_queue) == 0:
+<p>There are no jobs running at this time.</p>
+% else:
+<table>
+<tr>
+<th>Job ID</th>
+<th>path</th>
+</tr>
+% for spec in run_queue:
+% if spec['running_job'] is not None:
+<tr>
+<td><a href="/1.0/job-html/{{spec['running_job']}}">{{spec['running_job']}}</a></td>
+<td><a href="/1.0/lorry-html/{{spec['path']}}">{{spec['path']}}</a></td>
+</tr>
+% end
+% end
+</table>
+% end
+
+<p>See separate list of <a href="/1.0/list-jobs-html">all jobs that
+ have ever been started.</a>.</p>
+
+ <h2>Run-queue</h2>
+
+<table>
+<tr>
+<th>Pos</th>
+<th>Path</th>
+<th>Interval</th>
+<th>Due</th>
+<th>Job?</th>
+</tr>
+% for i, spec in enumerate(run_queue):
+% obj = json.loads(spec['text'])
+% name = obj.keys()[0]
+% fields = obj[name]
+<tr>
+<td>{{i+1}}</td>
+<td><a href="/1.0/lorry-html/{{spec['path']}}">{{spec['path']}}</a></td>
+<td>{{spec['interval_nice']}}</td>
+<td>{{spec['due_nice']}}</td>
+% if spec['running_job']:
+<td><a href="/1.0/job-html/{{spec['running_job']}}">{{spec['running_job']}}</a></td>
+% else:
+<td></td>
+% end
+</tr>
+% end
+</table>
+
+ <hr />
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/test-wait-for-port b/test-wait-for-port
new file mode 100755
index 0000000..22e07be
--- /dev/null
+++ b/test-wait-for-port
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+'''Wait for a given port to be open.
+
+WARNING: This may wait for quite a long time. There is no timeout. Or
+spoon.
+
+'''
+
+import sys, socket, errno
+
+host = sys.argv[1]
+port = int(sys.argv[2])
+
+while True:
+ print "Trying %s port %s" % (host, port)
+ s = socket.socket()
+ try:
+ s.connect((host, port))
+ except socket.error as e:
+ if e.errno == errno.ECONNREFUSED:
+ continue
+ raise
+ s.close()
+ break
diff --git a/units/lighttpd-lorry-controller-webapp.service b/units/lighttpd-lorry-controller-webapp.service
new file mode 100644
index 0000000..0c04fac
--- /dev/null
+++ b/units/lighttpd-lorry-controller-webapp.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Lighttpd Web Server for Lorry Controller WEBAPP
+After=network.target
+
+[Service]
+PermissionsStartOnly=true
+User=lorry
+Group=lorry
+ExecStartPre=/usr/bin/install -d -o lorry -g lorry /run/lighttpd-lorry
+ExecStart=/usr/sbin/lighttpd -f /etc/lighttpd/lorry-controller-webapp-httpd.conf -D
+ExecStopPost=/bin/rm -r /run/lighttpd-lorry
+Restart=always
diff --git a/units/lorry-controller-ls-troves.service b/units/lorry-controller-ls-troves.service
new file mode 100644
index 0000000..fe97811
--- /dev/null
+++ b/units/lorry-controller-ls-troves.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=Lorry Controller ls-troves
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null -X POST --data "" http://localhost:12765/1.0/ls-troves
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-ls-troves.timer b/units/lorry-controller-ls-troves.timer
new file mode 100644
index 0000000..dbd157d
--- /dev/null
+++ b/units/lorry-controller-ls-troves.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller ls-troves
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/units/lorry-controller-minion@.service b/units/lorry-controller-minion@.service
new file mode 100644
index 0000000..b63d996
--- /dev/null
+++ b/units/lorry-controller-minion@.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=Lorry Controller MINION %i
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/lorry-controller-minion --config /etc/lorry-controller/minion.conf
+Restart=always
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-readconf.service b/units/lorry-controller-readconf.service
new file mode 100644
index 0000000..1f73b46
--- /dev/null
+++ b/units/lorry-controller-readconf.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=Lorry Controller read config at startup
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null -X POST --data "" http://localhost:12765/1.0/read-configuration
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-readconf.timer b/units/lorry-controller-readconf.timer
new file mode 100644
index 0000000..7e4f04e
--- /dev/null
+++ b/units/lorry-controller-readconf.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller read config at startup
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/units/lorry-controller-status.service b/units/lorry-controller-status.service
new file mode 100644
index 0000000..381677b
--- /dev/null
+++ b/units/lorry-controller-status.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=Lorry Controller Status update
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null http://localhost:12765/1.0/status
+Restart=no
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-status.timer b/units/lorry-controller-status.timer
new file mode 100644
index 0000000..1528b8c
--- /dev/null
+++ b/units/lorry-controller-status.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller Status update
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/yarns.webapp/010-introduction.yarn b/yarns.webapp/010-introduction.yarn
new file mode 100644
index 0000000..ae3af58
--- /dev/null
+++ b/yarns.webapp/010-introduction.yarn
@@ -0,0 +1,77 @@
+% Lorry Controller WEBAPP integration test suite
+% Codethink Ltd
+
+
+Introduction
+============
+
+This is an integration test suite for the WEBAPP component of Lorry
+Controller. It is implemented using the [yarn] tool and uses a style
+of automated testing called "scenario testing" by the tool authors.
+
+[yarn]: http://liw.fi/cmdtest/README.yarn/
+
+As an example, here is a scenario that verifies that the Lorry
+Controller WEBAPP can be started at all:
+
+ SCENARIO WEBAPP can be started at all
+ WHEN WEBAPP --help is requested
+ THEN WEBAPP --help exited with a zero exit code
+
+A scenario consists of a sequence of steps that can be executed by a
+computer. The steps are then defined using IMPLEMENTS:
+
+ IMPLEMENTS WHEN WEBAPP --help is requested
+ if "$SRCDIR/lorry-controller-webapp" --help
+ then
+ exit=0
+ else
+ exit=$?
+ fi
+ echo "$exit" > "$DATADIR/webapp.exit"
+
+And another:
+
+ IMPLEMENTS THEN WEBAPP --help exited with a zero exit code
+ grep -Fx 0 "$DATADIR/webapp.exit"
+
+Yarn will run each scenario in the order it finds them. If all steps
+in a scenario succeed, the scenario succeeds.
+
+Scenarios, though not their implementations, are intended to be
+understandable by people who aren't programmers, though some
+understanding of the technology is required.
+
+For more information, see the documentation for yarn.
+
+
+Test environment and setup
+==========================
+
+In this chapter, we discuss how the environment is set up for tests to
+run in. Yarn provides a temporary directory in which tests can create
+temporary directories, and sets the environment variable `$DATADIR` to
+point at that directory. Yarn also deletes the directory and all of
+its contents at the end, so the test suite itself does not need to do
+that.
+
+We put several files into `$DATADIR`.
+
+* The WEBAPP STATEDB database file.
+* Responses from HTTP queries to WEBAPP.
+* PID of the running WEBAPP.
+
+The purpose of each file is documented with the IMPLEMENTS sections
+that use it, typically with the one that creates it.
+
+Since many scenarios will start an instance of WEBAPP, they also need
+to make sure it gets killed. There are steps for these (`GIVEN a
+running WEBAPP` and `FINALLY WEBAPP is terminated`), which MUST be
+used as a pair in each scenario: having only one of these steps is
+always a bug in the scenario, whereas having neither is OK.
+
+WEBAPP has stores its persistent state in STATEDB, which is an Sqlite
+database on disk. Our tests do _not_ touch it directly, only via WEBAPP,
+so that we do not encode in our tests internals of the database, such
+as the database schema. We do not care: we only care that WEBAPP
+works, and the database schema of STATEDB is _not_ a public interface.
diff --git a/yarns.webapp/020-status.yarn b/yarns.webapp/020-status.yarn
new file mode 100644
index 0000000..5749920
--- /dev/null
+++ b/yarns.webapp/020-status.yarn
@@ -0,0 +1,27 @@
+WEBAPP status reporting
+=======================
+
+WEBAPP reports it status via an HTTP request. We verify that when it
+starts up, the status is that it is doing nothing: there are no jobs,
+it has no Lorry or Trove specs.
+
+ SCENARIO WEBAPP is idle when it starts
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status
+ THEN response is application/json
+ AND response has running_queue set to true
+ AND response has disk_free set
+ AND response has disk_free_mib set
+ AND response has disk_free_gib set
+ AND static status page got updated
+ FINALLY WEBAPP terminates
+
+As an alternative, we can request the HTML rendering of the status
+directly with `/1.0/status-html`.
+
+ SCENARIO WEBAPP provide HTML status directly
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status-html
+ THEN response is text/html
+ AND static status page got updated
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/030-queue-management.yarn b/yarns.webapp/030-queue-management.yarn
new file mode 100644
index 0000000..9fca4fb
--- /dev/null
+++ b/yarns.webapp/030-queue-management.yarn
@@ -0,0 +1,106 @@
+Run queue management
+====================
+
+This chapter contains tests meant for managing the run-queue in
+WEBAPP.
+
+Start and stop job scheduling
+-----------------------------
+
+The administrator needs to be able to stop WEBAPP from scheduling any
+new jobs, and later to start it again.
+
+ SCENARIO admin can start and stop WEBAPP job scheduling
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status
+ THEN response has running_queue set to true
+
+ WHEN admin makes request POST /1.0/stop-queue
+ AND admin makes request GET /1.0/status
+ THEN response has running_queue set to false
+
+Further, the state change needs to be persistent across WEBAPP
+instances, so we kill the WEBAPP that's currently running, and start a
+new one, and verify that the `running-queue` status is still `true`.
+
+ WHEN WEBAPP is terminated
+ THEN WEBAPP isn't running
+
+ WHEN WEBAPP is started
+ WHEN admin makes request GET /1.0/status
+ THEN response has running_queue set to false
+
+Start the queue again.
+
+ WHEN admin makes request POST /1.0/start-queue
+ AND admin makes request GET /1.0/status
+ THEN response has running_queue set to true
+
+Finally, clean up.
+
+ FINALLY WEBAPP terminates
+
+
+Read CONFGIT
+------------
+
+We need to be able to get Lorry Controller, specifically WEBAPP, to
+update its configuration and run-queue from CONFGIT using the
+`/1.0/read-configuration` HTTP API request.
+
+First, set up WEBAPP.
+
+ SCENARIO WEBAPP updates its configuration from CONFGIT
+ GIVEN a new git repository in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+We'll start with an empty configuration. This is the default state
+when WEBAPP has never read its configuration.
+
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Make WEBAPP read an empty configuration. Or rather, a configuration
+that does not match any existing `.lorry` files.
+
+ GIVEN an empty lorry-controller.conf in CONFGIT
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file, with one Lorry spec, and make sure reading the
+configuration makes `/list-queue` report it.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+
+If the `.lorry` file is removed, the queue should again become empty.
+
+ GIVEN file CONFGIT/foo.lorry is removed
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add two Lorries, then make sure they can reordered at will.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND Lorry file CONFGIT/bar.lorry with {"bar":{"type":"git","url":"git://bar"}}
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/bar", "upstream/foo"]
+
+ WHEN admin makes request POST /1.0/move-to-top with path=upstream/foo
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo", "upstream/bar"]
+
+ WHEN admin makes request POST /1.0/move-to-bottom with path=upstream/foo
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/bar", "upstream/foo"]
+
+Finally, clean up.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn
new file mode 100644
index 0000000..11ec557
--- /dev/null
+++ b/yarns.webapp/040-running-jobs.yarn
@@ -0,0 +1,260 @@
+Running jobs
+============
+
+This chapter contains tests that verify that WEBAPP schedules jobs,
+accepts job output, and lets the admin kill running jobs.
+
+Run a job successfully
+----------------------
+
+To start with, with an empty run-queue, nothing should be scheduled.
+
+ SCENARIO run a job
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+We stop the queue first.
+
+ WHEN admin makes request POST /1.0/stop-queue
+
+Then make sure we don't get a job when we request one.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Add a Lorry spec to the run-queue, and request a job. We still
+shouldn't get a job, since the queue isn't set to run yet.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+Enable the queue, and off we go.
+
+ WHEN admin makes request POST /1.0/start-queue
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+ AND response has path set to "upstream/foo"
+
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to 1
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to [1]
+
+Requesting another job should now again return null.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+Inform WEBAPP the job is finished.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ THEN response has kill_job set to false
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to null
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+
+Limit number of jobs running at the same time
+---------------------------------------------
+
+WEBAPP can be told to limit the number of jobs running at the same
+time.
+
+Set things up. Note that we have two local Lorry files, so that we
+could, in principle, run two jobs at the same time.
+
+ SCENARIO limit concurrent jobs
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND Lorry file CONFGIT/bar.lorry with {"bar":{"type":"git","url":"git://bar"}}
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ WHEN admin makes request POST /1.0/read-configuration
+
+Check the current set of the `max_jobs` setting.
+
+ WHEN admin makes request GET /1.0/get-max-jobs
+ THEN response has max_jobs set to null
+
+Set the limit to 1.
+
+ WHEN admin makes request POST /1.0/set-max-jobs with max_jobs=1
+ THEN response has max_jobs set to 1
+ WHEN admin makes request GET /1.0/get-max-jobs
+ THEN response has max_jobs set to 1
+
+Get a job. This should succeed.
+
+ WHEN MINION makes request POST /1.0/give-me-job with host=testhost&pid=1
+ THEN response has job_id set to 1
+
+Get a second job. This should not succeed.
+
+ WHEN MINION makes request POST /1.0/give-me-job with host=testhost&pid=2
+ THEN response has job_id set to null
+
+Finish the first job. Then get a new job. This should succeed.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ AND MINION makes request POST /1.0/give-me-job with host=testhost&pid=2
+ THEN response has job_id set to 2
+
+Stop job in the middle
+----------------------
+
+We need to be able to stop jobs while they're running as well. We
+start by setting up everything so that a job is running, the same way
+we did for the successful job scenario.
+
+ SCENARIO stop a job while it's running
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request POST /1.0/start-queue
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+ AND response has path set to "upstream/foo"
+
+Admin will now ask WEBAPP to kill the job. This changes sets a field
+in the STATEDB only.
+
+ WHEN admin makes request POST /1.0/stop-job with job_id=1
+ AND admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has kill_job set to true
+
+Now, when MINION updates the job, WEBAPP will tell it to kill it.
+MINION will do so, and then update the job again.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no
+ THEN response has kill_job set to true
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=1
+
+Admin will now see that the job has, indeed, been killed.
+
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to null
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+Stop a job that runs too long
+-----------------------------
+
+Sometimes a job gets "stuck" and should be killed. The
+`lorry-controller.conf` has an optional `lorry-timeout` field for
+this, to set the timeout, and WEBAPP will tell MINION to kill a job
+when it has been running too long.
+
+Some setup. Set the `lorry-timeout` to a know value. It doesn't
+matter what it is since we'll be telling WEBAPP to fake its sense of
+time, so that the test suite is not timing sensitive. We wouldn't want
+to have the test suite fail when running on slow devices.
+
+ SCENARIO stop stuck job
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND lorry-controller.conf in CONFGIT has lorry-timeout set to 1 for everything
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ WHEN admin makes request POST /1.0/read-configuration
+
+Pretend it is the start of time.
+
+ WHEN admin makes request POST /1.0/pretend-time with now=0
+ AND admin makes request GET /1.0/status
+ THEN response has timestamp set to "1970-01-01 00:00:00 UTC"
+
+Start the job.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+
+Check that the job info contains a start time.
+
+ WHEN admin makes request GET /1.0/job/1
+ THEN response has job_started set
+
+Pretend it is now much later, or at least later than the timeout specified.
+
+ WHEN admin makes request POST /1.0/pretend-time with now=2
+
+Pretend to be a MINION that reports an update on the job. WEBAPP
+should now be telling us to kill the job.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no
+ THEN response has kill_job set to true
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+Remove a terminated job
+-----------------------
+
+WEBAPP doesn't remove jobs automatically, it needs to be told to
+remove jobs.
+
+ SCENARIO remove job
+
+Setup.
+
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ WHEN admin makes request POST /1.0/read-configuration
+
+Start job 1.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+
+Try to remove job 1 while it is running. This should fail.
+
+ WHEN admin makes request POST /1.0/remove-job with job_id=1
+ THEN response has reason set to "still running"
+
+Finish the job.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ WHEN admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to [1]
+
+Remove it.
+
+ WHEN admin makes request POST /1.0/remove-job with job_id=1
+ AND admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/050-troves.yarn b/yarns.webapp/050-troves.yarn
new file mode 100644
index 0000000..503ac09
--- /dev/null
+++ b/yarns.webapp/050-troves.yarn
@@ -0,0 +1,76 @@
+Handling of remote Troves
+=========================
+
+This chapter has tests for WEBAPP's handling of remote Troves: getting
+the listing of repositories to mirror from the Trove, and creating
+entries in the run-queue for them.
+
+
+Reading a remote Trove specification from CONFGIT
+-------------------------------------------------
+
+When there's a `troves` section in the Lorry Controller configuration
+file, the WEBAPP should include that in the list of Troves when
+reported.
+
+ SCENARIO a Trove is listed in CONFGIT
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+
+Note that we need to fake a remote Trove, using static files, to keep
+test setup simpler.
+
+ AND WEBAPP fakes Trove example-trove
+ AND a running WEBAPP
+
+Initially WEBAPP should report no known Troves, and have an empty
+run-queue.
+
+ WHEN admin makes request GET /1.0/status
+ THEN response has run_queue set to []
+ AND response has troves set to []
+
+Let's add a `troves` section to the configuration file. After WEBAPP
+reads that, it should list the added Trove in status.
+
+ GIVEN lorry-controller.conf in CONFGIT adds trove example-trove
+ AND lorry-controller.conf in CONFGIT has prefixmap example:example for example-trove
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/status
+ THEN response has troves item 0 field trovehost set to "example-trove"
+
+However, this should not have made WEBAPP to fetch a new list of
+repositories from the remote Trove.
+
+ THEN response has run_queue set to []
+
+If we tell WEBAPP to fetch the list, we should see repositories.
+
+ GIVEN remote Trove example-trove has repository example/foo
+ WHEN admin makes request POST /1.0/ls-troves
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["example/foo"]
+
+If we re-read the configuration again, without any changes to it or to
+the fake Trove's repository list, the same Troves and Lorry specs
+should remain in STATEDB. (It wasn't always thus, due to a bug.)
+
+ WHEN admin makes request POST /1.0/read-configuration
+ AND admin makes request GET /1.0/status
+ THEN response has troves item 0 field trovehost set to "example-trove"
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["example/foo"]
+
+If the Trove deletes a repository, we should still keep it locally, to
+avoid disasters. However, it will be removed from the Trove's STATEDB,
+and it won't be lorried anymore.
+
+ GIVEN remote Trove example-trove doesn't have repository example/foo
+ WHEN admin makes request POST /1.0/ls-troves
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/060-validation.yarn b/yarns.webapp/060-validation.yarn
new file mode 100644
index 0000000..040f6cd
--- /dev/null
+++ b/yarns.webapp/060-validation.yarn
@@ -0,0 +1,237 @@
+Validation of CONFGIT
+=====================
+
+The CONFGIT repository contains two types of files we should validate:
+the `lorry-controller.conf` file, and the local Lorry files (specified
+by the former file in `lorries` sections).
+
+Validate `lorry-controller.conf`
+--------------------------------
+
+We'll start by validating the `lorry-controller.conf` file. There's
+several aspects here that need to be tested:
+
+* JSON syntax correctness: if the file doesn't parse as JSON, the
+ WEBAPP should cope and shouldn't change STATEDB in any way.
+* Semantic correctness: the file should contain a list of dicts, and
+ each dict should have the right fields with the right kind of
+ values. See the `README` for details. Other fields are also allowed,
+ though ignored. Again, if there's an error, WEBAPP should cope, and
+ probably shouldn't update STATEDB if there are any problems.
+
+The approach for testing this is to set up an empty STATEDB, then get
+WEBAPP to read a `lorry-controller.conf` with various kinds of
+brokenness, and after each read verify that STATEDB is still empty.
+This doesn't test that if the STATEDB wasn't empty it doesn't change
+existing data, but it seems like a reasonable assumption that an
+update happens regardless of previous contents of STATEDB, given how
+SQL transactions work.
+
+In summary:
+
+* Start WEBAPP without a STATEDB, and have it read its config. Verify
+ STATEDB is empty.
+* Add a `lorry-controller.conf` that is broken in some specific way.
+* Tell WEBAPP to re-read its config.
+* Verify that WEBAPP gives an error message.
+* Verify that STATEDB is still empty.
+
+Repeat this for each type of brokenness we want to ensure WEBAPP
+validates for.
+
+ SCENARIO validate lorry-controller.conf
+ GIVEN a new git repository in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+First of all, have WEBAPP read CONFGIT. This should succeed even if
+the `lorry-controller.conf` file doesn't actually exist.
+
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "Configuration has been updated"
+ AND STATEDB is empty
+
+Add an empty configuration file. This is different from a file
+containing an empty JSON list. It should be treated as an error.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing ""
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Add a syntactically invalid JSON file.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "blah blah blah"
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Replace the bad JSON file with one that has an unknown section (no
+`type` field). Please excuse the non-escaping of double quotes: it's
+an artifact of how yarn steps are implemented and is OK.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "[{"foo": "bar"}]"
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+What about a section that has a `type` field, but it's set to a
+non-sensical value?
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "[{"type": "BACKUPS!"}]"
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Now we're getting to real sections. A `troves` section must have
+`trovehost`, `interval`, `ls-interval`, and `prefixmap` set, and may
+optionally have `ignore` set. The `trovehost` field can't really be
+checked, and `interval` and `ls-interval` don't need much checking: if
+they don't parse as sensible intervals, Lorry Controller will just use
+a default value.
+
+`prefixmap`, however, can have a reasonable check: it shouldn't map
+something to be under the Trove ID of the local Trove, otherwise Lorry
+won't be able to push the repositories. However, at this time, we do
+not have a reasonable way to get the Trove ID of the local Trove, so
+we're skipping implementing that test for now. (FIXME: fix this lack
+of testing.)
+
+Clean up at the end.
+
+ FINALLY WEBAPP terminates
+
+
+Validate local Lorry files
+--------------------------
+
+Lorry files (`.lorry`) are consumed by the Lorry program itself, but
+also by Lorry Controller. In fact, the ones that are in CONFGIT are
+only consumed by Lorry Controller: it reads them in, parses them,
+extracts the relevant information, puts that into STATEDB, and then
+generates a whole new (temporary) file for each Lorry run.
+
+Lorry Controller doesn't validate the Lorry files much, only
+enough that it can extract each separate Lorry specification and feed
+them to Lorry one by one. In other words:
+
+* The `.lorry` file must be valid JSON.
+* It must be a dict.
+* Each key must map to another dict.
+* Each inner dict must have a key `type`, which maps to a string.
+
+Everything else is left for Lorry itself. Lorry Controller only needs
+to handle Lorry not working, and it already does that.
+
+Firstly, some setup.
+
+ SCENARIO validate .lorry files
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+Make sure WEBAPP handles there not being any `.lorry` files.
+
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file that contains broken JSON.
+
+ GIVEN Lorry file CONFGIT/notjson.lorry with THIS IS NOT JSON
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file that is valid JSON, but is not a dict.
+
+ GIVEN Lorry file CONFGIT/notadict.lorry with [1,2,3]
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` that is a dict, but doesn't map keys to dicts.
+
+ GIVEN Lorry file CONFGIT/notadictofdicts.lorry with { "foo": 1 }
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` whose inner dict does not have a `type` field.
+
+ GIVEN Lorry file CONFGIT/notype.lorry with { "foo": { "bar": "yo" }}
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` that is A-OK. This should work even when there are some
+broken ones too.
+
+ GIVEN Lorry file CONFGIT/a-ok.lorry with { "foo": { "type": "git", "url": "git://example.com/foo" }}
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+
+Clean up at the end.
+
+ FINALLY WEBAPP terminates
+
+
+Load broken Lorry files
+-----------------------
+
+This scenario checks that if STATEDB already contains a valid Lorry
+file, and we're trying to load an invalid one, then the valid one
+doesn't get overwritten or discarded.
+
+First, some setup.
+
+ SCENARIO load broken .lorry files
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+Add a `.lorry` file that contains valid JSON.
+
+ GIVEN Lorry file CONFGIT/a-ok.lorry with { "foo": { "type": "git", "url": "git://example.com/foo" }}
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+
+Make sure it's the one we loaded.
+
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response matches "git://example.com/foo"
+
+Now, add a broken file and try to load that.
+
+ GIVEN Lorry file CONFGIT/notjson.lorry with THIS IS NOT JSON
+ WHEN admin makes request POST /1.0/read-configuration
+ THEN response matches "has been updated"
+
+Now, make sure we still have what we originally had.
+
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response matches "git://example.com/foo"
+
+Clean up at the end.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn
new file mode 100644
index 0000000..54a82a4
--- /dev/null
+++ b/yarns.webapp/900-implementations.yarn
@@ -0,0 +1,442 @@
+Implementations
+===============
+
+This chapter includes IMPLEMENTS sections for the various steps used
+in scenarios.
+
+Managing a WEBAPP instance
+--------------------------
+
+We're testing a web application (convenivently named WEBAPP, though
+the executable is `lorry-controller-webapp`), so we need to be able to
+start it and stop it in scenarios. We start it as a background
+process, and keep its PID in `$DATADIR/webapp.pid`. When it's time to
+kill it, we kill the process with the PID in that file. This is not
+perfect, though it's good enough for our purposes. It doesn't handle
+running multiple instances at the same time, which we don't need, and
+doens't handle the case of the process dying and the kernel re-using
+the PID for something else, which is quite unlikely.
+
+Start an instance of the WEBAPP, using a random port. Record the PID
+and the port. Listen only on localhost. We use `start-stop-daemon` to
+start the process, so that it can keep running in the background,
+but the shell doesn't wait for it to terminate. This way, WEBAPP will
+be running until it crashes or is explicitly killed.
+
+ IMPLEMENTS GIVEN a running WEBAPP
+ start_webapp
+
+ IMPLEMENTS WHEN WEBAPP is started
+ start_webapp
+
+Kill the running WEBAPP, using the recorded PID. We need to do this
+both as a WHEN and a FINALLY step.
+
+ IMPLEMENTS WHEN WEBAPP is terminated
+ kill_daemon_using_pid_file "$DATADIR/webapp.pid"
+
+ IMPLEMENTS FINALLY WEBAPP terminates
+ kill_daemon_using_pid_file "$DATADIR/webapp.pid"
+
+Also test that WEBAPP isn't running.
+
+ IMPLEMENTS THEN WEBAPP isn't running
+ pid=$(head -n1 "$DATADIR/webapp.pid")
+ if kill -0 "$pid"
+ then
+ echo "process $pid is still running, but shouldn't be" 1>&2
+ exit 1
+ fi
+
+Managing Lorry Controller configuration
+---------------------------------------
+
+We need to be able to create, and change, the `lorry-controller.conf`
+file, and other files, in CONFGIT. First of all, we need to create
+CONFGIT.
+
+ IMPLEMENTS GIVEN a new git repository in (\S+)
+ git init "$DATADIR/$MATCH_1"
+
+Then we need to create an empty `lorry-controller.conf` file there.
+This is not just an empty file, it must be a JSON file that contains
+an empty list object.
+
+ IMPLEMENTS GIVEN an empty lorry-controller.conf in (\S+)
+ printf '[]\n' > "$DATADIR/$MATCH_1/lorry-controller.conf"
+
+Set the contents of `lorry-controller.conf` from a textual form.
+
+ IMPLEMENTS GIVEN a lorry-controller.conf in (\S+) containing "(.*)"$
+ printf '%s\n' "$MATCH_2" > "$DATADIR/$MATCH_1/lorry-controller.conf"
+
+Add a `.lorry` file to be used by a `lorry-controller.conf`.
+
+ IMPLEMENTS GIVEN Lorry file (\S+) with (.*)
+ printf '%s\n' "$MATCH_2" > "$DATADIR/$MATCH_1"
+
+Remove a file. This is actually quite generic, but it's relevant to us
+for `.lorry` files only (when this is being written).
+
+ IMPLEMENTS GIVEN file (\S+) is removed
+ rm "$DATADIR/$MATCH_1"
+
+Add a `lorries` section to a `lorry-controller.conf`. This hardcodes
+most of the configuration.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) adds lorries (\S+) using prefix (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+
+ new = {
+ "type": "lorries",
+ "interval": "0s",
+ "prefix": MATCH_4,
+ "globs": [
+ MATCH_3,
+ ],
+ }
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ obj = json.load(f)
+ obj.append(new)
+ with open(filename, "w") as f:
+ json.dump(obj, f)
+ '
+
+Add a `troves` section to `lorry-controller.conf`. Again, we hardcode
+most of the configuration.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) adds trove (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+
+ new = {
+ "type": "troves",
+ "trovehost": MATCH_3,
+ "protocol": "ssh",
+ "interval": "0s",
+ "ls-interval": "0s",
+ "prefixmap": {},
+ "ignore": [],
+ }
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ obj = json.load(f)
+ obj.append(new)
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set the a specific field for all sections in a `lorry-controller.conf`
+file.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) has (\S+) set to (.+) for everything
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+
+ with open(filename, "r") as f:
+ obj = json.load(f)
+
+ for section in obj:
+ section[MATCH_3] = json.loads(MATCH_4)
+
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set a specific field for a `troves` section.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) sets (\S+) to (\S+) for trove (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_3"]
+ MATCH_5 = os.environ["MATCH_3"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+
+ with open(filename, "r") as f:
+ obj = json.load(f)
+
+ for section in obj:
+ if section["type"] in ["trove", "troves"]:
+ if section["trovehost"] == MATCH_5:
+ section[MATCH_3] = json.loads(MATCH_4)
+
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set the prefixmap for a Trove in a Lorry Controller configuration
+file. Note that the Trove must already be in the configuration file.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) has prefixmap (\S+):(\S+) for (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+ MATCH_5 = os.environ["MATCH_5"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ objs = json.load(f)
+
+ for obj in objs:
+ if obj["type"] == "troves" and obj["trovehost"] == MATCH_5:
+ obj["prefixmap"][MATCH_3] = MATCH_4
+
+ with open(filename, "w") as f:
+ json.dump(objs, f, indent=4)
+ '
+
+We need to be able to tell WEBAPP, when it runs, where the
+configuration directory is.
+
+ IMPLEMENTS GIVEN WEBAPP uses (\S+) as its configuration directory
+ add_to_config_file "$DATADIR/webapp.conf" \
+ configuration-directory "$DATADIR/$MATCH_1"
+
+Make WEBAPP fake access to a Trove using a static file.
+
+ IMPLEMENTS GIVEN WEBAPP fakes Trove (\S+)
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-fake-trove "$MATCH_1=$DATADIR/$MATCH_1.trove"
+
+Control the ls listing of a remote Trove.
+
+ IMPLEMENTS GIVEN remote Trove (\S+) has repository (\S+)
+ filename="$DATADIR/$MATCH_1.trove"
+ if [ ! -e "$filename" ]
+ then
+ echo "{}" > "$filename"
+ fi
+ cat "$filename"
+ python -c '
+ import json, os, sys
+ MATCH_2 = os.environ["MATCH_2"]
+ filename = sys.argv[1]
+ with open(filename) as f:
+ data = json.load(f)
+ data["ls-output"] = data.get("ls-output", []) + [MATCH_2]
+ with open(filename, "w") as f:
+ json.dump(data, f)
+ ' "$filename"
+
+Remove a repository from the fake remote Trove.
+
+ IMPLEMENTS GIVEN remote Trove (\S+) doesn't have repository (\S+)
+ filename="$DATADIR/$MATCH_1.trove"
+ if [ ! -e "$filename" ]
+ then
+ echo "{}" > "$filename"
+ fi
+ cat "$filename"
+ python -c '
+ import json, os, sys
+ MATCH_2 = os.environ["MATCH_2"]
+ filename = sys.argv[1]
+ with open(filename) as f:
+ data = json.load(f)
+ paths = data.get("ls-output", [])
+ if MATCH_2 in paths:
+ paths.remove(MATCH_2)
+ data["ls-output"] = paths
+ with open(filename, "w") as f:
+ json.dump(data, f)
+ ' "$filename"
+
+Making and analysing HTTP requests
+---------------------------------
+
+Simple HTTP GET and POST requests are simple. We make the request,
+sending a body if given, and capture the response: HTTP status code,
+response headers, response body.
+
+We make the request using the `curl` command line program, which makes
+capturing the response quite convenient.
+
+HTTP requests can be made by various entities. This does not affect
+test code, but allows for nicer scenario steps.
+
+We check that the HTTP status indicates success, so that every
+scenario doesn't need ot check that separately.
+
+A GET request:
+
+ IMPLEMENTS WHEN admin makes request GET (\S+)
+ > "$DATADIR/response.headers"
+ > "$DATADIR/response.body"
+ port=$(cat "$DATADIR/webapp.port")
+
+ # The timestamp is needed by "THEN static status page got updated"
+ touch "$DATADIR/request.timestamp"
+
+ curl \
+ -D "$DATADIR/response.headers" \
+ -o "$DATADIR/response.body" \
+ --silent --show-error \
+ "http://127.0.0.1:$port$MATCH_1"
+ cat "$DATADIR/response.headers"
+ cat "$DATADIR/response.body"
+ head -n1 "$DATADIR/response.headers" | grep '^HTTP/1\.[01] 200 '
+
+A POST request always has a body. The body consists of `foo=bar`
+pairs, separated by `&` signs.
+
+ IMPLEMENTS WHEN (\S+) makes request POST (\S+) with (.*)
+ post_request "$MATCH_2" "$MATCH_3"
+
+Except, sometimes we don't have a useful body to give. So we don't.
+
+ IMPLEMENTS WHEN (\S+) makes request POST (\S+)
+ post_request "$MATCH_2" dummy=value
+
+Check the Content-Type of the response has the desired type.
+
+ IMPLEMENTS THEN response is (\S+)
+ cat "$DATADIR/response.headers"
+ grep -i "^Content-Type: $MATCH_1" "$DATADIR/response.headers"
+
+A JSON response can then be queried further. The JSON is expected to
+be a dict, so that values are accessed by name from the dict. The
+value is expresssed as a JSON value in the step.
+
+ IMPLEMENTS THEN response has (\S+) set to (.+)
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ key = os.environ["MATCH_1"]
+ expected = json.loads(os.environ["MATCH_2"])
+ value = data[key]
+ if value != expected:
+ sys.stderr.write(
+ "Key {key} has value {value}, but "
+ "{expected} was expected".format(
+ key=key, value=value, expected=expected))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+A JSON response may need to be analysed in more depth. Specifically,
+we may need to look at a list of dicts, as below.
+
+ IMPLEMENTS THEN response has (\S+) item (\d+) field (\S+) set to (\S+)
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ print "data:", repr(data)
+ items = os.environ["MATCH_1"]
+ print "items:", repr(items)
+ item = int(os.environ["MATCH_2"])
+ print "item:", repr(item)
+ field = os.environ["MATCH_3"]
+ print "field:", repr(field)
+ print "match3:", repr(os.environ["MATCH_4"])
+ expected = json.loads(os.environ["MATCH_4"])
+ print "expected:", repr(expected)
+ print "data[items]:", repr(data[items])
+ print "data[items][item]:", repr(data[items][item])
+ print "data[items][item][field]:", repr(data[items][item][field])
+ value = data[items][item][field]
+ if value != expected:
+ sys.stderr.write(
+ "Item {item} in {items} has field {field} with "
+ "value {value}, but {expected} was expected".format (
+ item=item, items=items, field=field, value=value,
+ expected=expected))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+In some cases, such as free disk space, we don't care about the actual
+value, but we do care that it is there.
+
+ IMPLEMENTS THEN response has (\S+) set
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ key = os.environ["MATCH_1"]
+ if key not in data:
+ sys.stderr.write(
+ "Key {key} is not set, but was expected to be set".format (
+ key=key))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+Some responses are just plain text, so we match them with a regexp.
+
+ IMPLEMENTS THEN response matches "(.*)"$
+ cat "$DATADIR/response.body"
+ grep "$MATCH_1" "$DATADIR/response.body"
+
+
+Status web page
+---------------
+
+WEBAPP is expected to update a static HTML pages whenever the
+`/1.0/status` request is made. We configure WEBAPP to write it to
+`$DATADIR/lc-status.html`. We don't test the contents of the page, but
+we do test that it gets updated. We test for the updates by comparing
+the modification time of the file with the time of the request. We
+know the time of the request thanks to the "WHEN admin makes a
+request" step updating the modification time of a file for this
+purpose.
+
+ IMPLEMENTS THEN static status page got updated
+ # test -nt isn't useful: the timestamps might be identical, and
+ # that's OK on filesystems that only store full-second timestamps.
+ # We generate timestamps in (roughly) ISO 8601 format, with stat,
+ # and those can be compared using simple string comparison.
+
+ status=$(stat -c %y "$DATADIR/lc-status.html")
+ request=$(stat -c %y "$DATADIR/request.timestamp")
+ test "$request" = "$status" || test "$request" '<' "$status"
+
+
+STATEDB
+-------
+
+Check that the STATEDB is empty. This means it should exist, and
+should be initialised, but none of the important tables should have
+any rows in them.
+
+ IMPLEMENTS THEN STATEDB is empty
+ test -s "$DATADIR/webapp.db"
+ sqlite3 "$DATADIR/webapp.db" 'SELECT * FROM troves;' | stdin_is_empty
+ sqlite3 "$DATADIR/webapp.db" 'SELECT * FROM lorries;' | stdin_is_empty
diff --git a/yarns.webapp/yarn.sh b/yarns.webapp/yarn.sh
new file mode 100644
index 0000000..2a9081d
--- /dev/null
+++ b/yarns.webapp/yarn.sh
@@ -0,0 +1,122 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =*= License: GPL-2 =*=
+
+# This file is a yarn shell library for testing Lorry Controller.
+
+
+# Kill a daemon given its pid file. Report whether it got killed or not.
+
+kill_daemon_using_pid_file()
+{
+ local pid=$(head -n1 "$1")
+ if kill -9 "$pid"
+ then
+ echo "Killed daemon running as $pid"
+ else
+ echo "Error killing daemon running as pid $pid"
+ fi
+}
+
+
+# Add a configuration item to a cliapp-style configuration file.
+
+add_to_config_file()
+{
+ if [ ! -e "$1" ]
+ then
+ printf '[config]\n' > "$1"
+ fi
+ printf '%s = %s\n' "$2" "$3" >> "$1"
+}
+
+
+# Ensure the standard input is empty. If not, exit with an error.
+
+stdin_is_empty()
+{
+ if grep . > /dev/null
+ then
+ echo "ERROR: stdin was NOT empty" 1>&2
+ exit 1
+ fi
+}
+
+
+# Configure (unless already configured) and start a WEBAPP.
+
+start_webapp()
+{
+ rm -f "$DATADIR/webapp.pid"
+ rm -f "$DATADIR/webapp.port"
+ mkfifo "$DATADIR/webapp.port"
+
+ add_to_config_file "$DATADIR/webapp.conf" \
+ statedb "$DATADIR/webapp.db"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ status-html "$DATADIR/lc-status.html"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ log "$DATADIR/webapp.log"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ log-level debug
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-host 127.0.0.1
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-port-file "$DATADIR/webapp.port"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ static-files "$SRCDIR/static"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ templates "$SRCDIR/templates"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-real-confgit no
+
+ start-stop-daemon -S -x "$SRCDIR/lorry-controller-webapp" \
+ -b -p "$DATADIR/webapp.pid" -m --verbose \
+ -- \
+ --config "$DATADIR/webapp.conf"
+
+ port=$(cat "$DATADIR/webapp.port")
+ rm -f "$DATADIR/webapp.port"
+ echo "$port" >"$DATADIR/webapp.port"
+
+ # Wait for the WEBAPP to actually be ready, i.e., that it's
+ # listening on its assigned port.
+ "$SRCDIR/test-wait-for-port" 127.0.0.1 "$port"
+}
+
+
+# Make a POST request.
+
+post_request()
+{
+ > "$DATADIR/response.headers"
+ > "$DATADIR/response.body"
+ port=$(cat "$DATADIR/webapp.port")
+
+ # The timestamp is needed by "THEN static status page got updated"
+ touch "$DATADIR/request.timestamp"
+
+ curl \
+ -D "$DATADIR/response.headers" \
+ -o "$DATADIR/response.body" \
+ --silent --show-error \
+ --request POST \
+ --data "$2" \
+ "http://127.0.0.1:$port$1"
+ cat "$DATADIR/response.headers"
+ cat "$DATADIR/response.body"
+ head -n1 "$DATADIR/response.headers" | grep '^HTTP/1\.[01] 200 '
+}