From decb0ea74d30461bf79ecddb6e23231cb696a76c Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Tue, 3 Feb 2015 12:57:20 +0000 Subject: distbuild: Write name of build worker to build-step-xx.log files Knowing which worker built something is useful for debugging, and right now that information is only present on the initiator's console. It's good to have it in the build-step-xx.log file too so the information doesn't get lost. --- distbuild/initiator.py | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/distbuild/initiator.py b/distbuild/initiator.py index aaae7d62..54331072 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -20,7 +20,7 @@ import cliapp import logging import os import random -import sys +import time import distbuild @@ -137,23 +137,35 @@ class Initiator(distbuild.StateMachine): self._step_outputs[msg['step_name']].close() del self._step_outputs[msg['step_name']] + def _get_output(self, msg): + return self._step_outputs[msg['step_name']] + def _handle_step_already_started_message(self, msg): - self._app.status( - msg='%s is already building on %s' % (msg['step_name'], - msg['worker_name'])) + status = '%s is already building on %s' % ( + msg['step_name'], msg['worker_name']) + self._app.status(msg=status) + self._open_output(msg) + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + f.flush() + def _handle_step_started_message(self, msg): - self._app.status( - msg='Started building %(step_name)s on %(worker_name)s', - step_name=msg['step_name'], - worker_name=msg['worker_name']) + status = 'Started building %s on %s' % ( + msg['step_name'], msg['worker_name']) + self._app.status(msg=status) + self._open_output(msg) + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + f.flush() + def _handle_step_output_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - f = self._step_outputs[step_name] + f = self._get_output(msg) f.write(msg['stdout']) f.write(msg['stderr']) f.flush() @@ -164,9 +176,12 @@ class Initiator(distbuild.StateMachine): def _handle_step_finished_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - self._app.status( - msg='Finished building %(step_name)s', - step_name=step_name) + status = 'Finished building %s' % step_name + self._app.status(msg=status) + + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + self._close_output(msg) else: logging.warning( @@ -175,9 +190,12 @@ class Initiator(distbuild.StateMachine): def _handle_step_failed_message(self, msg): step_name = msg['step_name'] if step_name in self._step_outputs: - self._app.status( - msg='Build failed: %(step_name)s', - step_name=step_name) + status = 'Build of %s failed.' % step_name + self._app.status(msg=status) + + f = self._get_output(msg) + f.write(time.strftime('%Y-%m-%d %H:%M:%S ') + status + '\n') + self._close_output(msg) else: logging.warning( -- cgit v1.2.1 From fae23a5162e40a559f931279ae72b940aba8064d Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 11:40:17 +0000 Subject: distbuild: Rearrange code that sends exec-request message --- distbuild/build_controller.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 387b410f..1794586d 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -244,6 +244,15 @@ class BuildController(distbuild.StateMachine): self.mainloop.queue_event(self, _Start()) + def _request_command_execution(self, argv, request_id): + '''Tell the controller's distbuild-helper to run a command.''' + msg = distbuild.message('exec-request', + id=request_id, + argv=argv, + stdin_contents='') + req = distbuild.HelperRequest(msg) + self.mainloop.queue_event(distbuild.HelperRouter, req) + def _start_graphing(self, event_source, event): distbuild.crash_point() @@ -260,14 +269,10 @@ class BuildController(distbuild.StateMachine): ] if 'original_ref' in self._request: argv.append(self._request['original_ref']) - msg = distbuild.message('exec-request', - id=self._idgen.next(), - argv=argv, - stdin_contents='') - self._helper_id = msg['id'] - req = distbuild.HelperRequest(msg) - self.mainloop.queue_event(distbuild.HelperRouter, req) - + + self._helper_id = self._idgen.next() + self._request_command_execution(argv, self._helper_id) + progress = BuildProgress(self._request['id'], 'Computing build graph') self.mainloop.queue_event(BuildController, progress) -- cgit v1.2.1 From a006c418d3cc81d6d4a4353a43133b40486a6610 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 11:58:42 +0000 Subject: distbuild: Simplify failure cases in BuildController There's no need to handle failure differently at each stage of the build. Simpler to use the BuildFailed message for all errors. This then allows us to have a single self.fail() function that can be used everywhere. --- distbuild/build_controller.py | 62 +++++++++++++------------------------------ 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 1794586d..cfa0e2e2 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -37,11 +37,6 @@ class _Start(object): pass class _Annotated(object): pass class _Built(object): pass -class _AnnotationFailed(object): - - def __init__(self, http_status_code, error_msg): - self.http_status_code = http_status_code - self.error_msg = error_msg class _GotGraph(object): @@ -49,11 +44,6 @@ class _GotGraph(object): self.artifact = artifact -class _GraphFailed(object): - - pass - - class BuildCancel(object): def __init__(self, id): @@ -192,14 +182,13 @@ class BuildController(distbuild.StateMachine): 'graphing', self._maybe_finish_graph), ('graphing', self, _GotGraph, 'annotating', self._start_annotating), - ('graphing', self, _GraphFailed, None, None), + ('graphing', self, BuildFailed, None, None), ('graphing', self._initiator_connection, distbuild.InitiatorDisconnect, None, None), ('annotating', distbuild.HelperRouter, distbuild.HelperResult, 'annotating', self._maybe_handle_cache_response), - ('annotating', self, _AnnotationFailed, None, - self._notify_annotation_failed), + ('annotating', self, BuildFailed, None, None), ('annotating', self, _Annotated, 'building', self._queue_worker_builds), ('annotating', self._initiator_connection, @@ -244,6 +233,17 @@ class BuildController(distbuild.StateMachine): self.mainloop.queue_event(self, _Start()) + def fail(self, reason): + logging.error(reason) + message = BuildFailed(self._request['id'], reason) + + # The message is sent twice so that it can be matched both by state + # transitions listening for this specific controller instance, and by + # state transitions listening for messages from the BuildController + # class that then filter the message based on the request ID field. + self.mainloop.queue_event(self, message) + self.mainloop.queue_event(BuildController, message) + def _request_command_execution(self, argv, request_id): '''Tell the controller's distbuild-helper to run a command.''' msg = distbuild.message('exec-request', @@ -286,16 +286,6 @@ class BuildController(distbuild.StateMachine): def _maybe_finish_graph(self, event_source, event): distbuild.crash_point() - def notify_failure(msg_text): - logging.error('Graph creation failed: %s' % msg_text) - - failed = BuildFailed( - self._request['id'], - 'Failed to compute build graph: %s' % msg_text) - self.mainloop.queue_event(BuildController, failed) - - self.mainloop.queue_event(self, _GraphFailed()) - def notify_success(artifact): logging.debug('Graph is finished') @@ -313,8 +303,8 @@ class BuildController(distbuild.StateMachine): error_text = self._artifact_error.peek() if event.msg['exit'] != 0 or error_text: - notify_failure('Problem with serialise-artifact: %s' - % error_text) + self.fail('Failed to compute build graph. Problem with ' + 'serialise-artifact: %s' % error_text) if event.msg['exit'] != 0: return @@ -324,7 +314,7 @@ class BuildController(distbuild.StateMachine): artifact = distbuild.deserialise_artifact(text) except ValueError, e: logging.error(traceback.format_exc()) - notify_failure(str(e)) + self.fail('Failed to compute build graph: %s' % e) return notify_success(artifact) @@ -367,13 +357,10 @@ class BuildController(distbuild.StateMachine): logging.debug('Got cache response: %s' % repr(event.msg)) http_status_code = event.msg['status'] - error_msg = event.msg['body'] if http_status_code != httplib.OK: - logging.debug('Cache request failed with status: %s' - % event.msg['status']) - self.mainloop.queue_event(self, - _AnnotationFailed(http_status_code, error_msg)) + self.fail('Failed to annotate build graph: http request got %d: %s' + % (http_status_code, event.msg['body'])) return cache_state = json.loads(event.msg['body']) @@ -586,14 +573,6 @@ class BuildController(distbuild.StateMachine): self._queue_worker_builds(None, event) - def _notify_annotation_failed(self, event_source, event): - errmsg = ('Failed to annotate build graph: http request got %d: %s' - % (event.http_status_code, event.error_msg)) - - logging.error(errmsg) - failed = BuildFailed(self._request['id'], errmsg) - self.mainloop.queue_event(BuildController, failed) - def _maybe_notify_build_failed(self, event_source, event): distbuild.crash_point() @@ -618,10 +597,7 @@ class BuildController(distbuild.StateMachine): self._request['id'], build_step_name(artifact)) self.mainloop.queue_event(BuildController, step_failed) - build_failed = BuildFailed( - self._request['id'], - 'Building failed for %s' % artifact.name) - self.mainloop.queue_event(BuildController, build_failed) + self.fail('Building failed for %s' % artifact.name) # Cancel any jobs waiting to be executed, since there is no point # running them if this build has failed, it would just waste -- cgit v1.2.1 From d0143c276f31bf9de26dfbe92d64693cdcbd2014 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 12:13:41 +0000 Subject: distbuild: Fix case where 'computing build graph' would hang forever If there's no distbuild-helper process running on the controller then the controller would hang forever. This situation is unlikely, but it's important to give the user feedback instead of silently hanging forever. --- distbuild/build_controller.py | 3 +++ distbuild/mainloop.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index cfa0e2e2..8dbbaef9 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -246,6 +246,9 @@ class BuildController(distbuild.StateMachine): def _request_command_execution(self, argv, request_id): '''Tell the controller's distbuild-helper to run a command.''' + if self.mainloop.n_state_machines_of_type(distbuild.HelperRouter) == 0: + self.fail('No distbuild-helper process running on controller!') + msg = distbuild.message('exec-request', id=request_id, argv=argv, diff --git a/distbuild/mainloop.py b/distbuild/mainloop.py index f0e5eebc..8c46b2fb 100644 --- a/distbuild/mainloop.py +++ b/distbuild/mainloop.py @@ -56,7 +56,10 @@ class MainLoop(object): def remove_state_machine(self, machine): logging.debug('MainLoop.remove_state_machine: %s' % machine) self._machines.remove(machine) - + + def n_state_machines_of_type(self, machine_type): + return len([m for m in self._machines if isinstance(m, machine_type)]) + def add_event_source(self, event_source): logging.debug('MainLoop.add_event_source: %s' % event_source) self._sources.append(event_source) -- cgit v1.2.1 From 5bf5bb1d8f719ac6759bbe8affac7665e24acb1e Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 12:34:27 +0000 Subject: distbuild: Simplify error when computing build graph fails The previous error looked like this by the time it had reached the initiator's console: ERROR: Failed to build baserock:baserock/definitions c7292b7c81cdd7e5b9e85722406371748453c44f systems/base-system-x86_64-generic.morph.frodsham: Failed to compute build graph. Problem with serialise-artifact: ERROR: Couldn't find morphology: systems/base-system-x86_64-generic.morph.frodsham New message is at least a bit simpler: ERROR: Failed to build baserock:baserock/definitions c7292b7c81cdd7e5b9e85722406371748453c44f systems/base-system-x86_64-generic.morph.frodsham: ERROR: Couldn't find morphology: systems/base-system-x86_64-generic.morph.frodsham --- distbuild/build_controller.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 8dbbaef9..aeeda417 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -306,8 +306,7 @@ class BuildController(distbuild.StateMachine): error_text = self._artifact_error.peek() if event.msg['exit'] != 0 or error_text: - self.fail('Failed to compute build graph. Problem with ' - 'serialise-artifact: %s' % error_text) + self.fail(error_text) if event.msg['exit'] != 0: return -- cgit v1.2.1 From 1ce2492256af256e1076f0e88402e8e1d8a271f2 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 12:41:27 +0000 Subject: distbuild: Give more detail when requests to cache-server fail Let the end-user see the URL that distbuild was attempting to talk to, so they can more easily spot configuration errors. It's kind of silly to say 'HTTP request failed' without saying where the request was going. --- distbuild/build_controller.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index aeeda417..6752f400 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -361,8 +361,9 @@ class BuildController(distbuild.StateMachine): http_status_code = event.msg['status'] if http_status_code != httplib.OK: - self.fail('Failed to annotate build graph: http request got %d: %s' - % (http_status_code, event.msg['body'])) + self.fail('Failed to annotate build graph: HTTP request to %s got ' + '%d: %s' % (self._artifact_cache_server, + http_status_code, event.msg['body'])) return cache_state = json.loads(event.msg['body']) -- cgit v1.2.1 From af515e5ba55658175a9434d3154396d892b26591 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 12:58:35 +0000 Subject: distbuild: Refuse to start controller if there are no workers listed This is another situation where builds could hang forever if the server is misconfigured. Longer term, workers should be able to come and go dynamically without needing to reconfigure and restart the controller process. --- morphlib/plugins/distbuild_plugin.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/morphlib/plugins/distbuild_plugin.py b/morphlib/plugins/distbuild_plugin.py index 66d86dcf..b7298bc0 100644 --- a/morphlib/plugins/distbuild_plugin.py +++ b/morphlib/plugins/distbuild_plugin.py @@ -238,6 +238,11 @@ class ControllerDaemon(cliapp.Plugin): distbuild.add_crash_conditions(self.app.settings['crash-condition']) + if not self.app.settings['worker']: + raise cliapp.AppException( + 'Distbuild controller has no workers configured. Refusing to ' + 'start.') + artifact_cache_server = ( self.app.settings['artifact-cache-server'] or self.app.settings['cache-server']) -- cgit v1.2.1 From 503517b75c8fc99306e4a852e876fdbca2b79d39 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 11 Feb 2015 12:43:26 +0000 Subject: Fix copyright years --- distbuild/build_controller.py | 2 +- distbuild/initiator.py | 2 +- distbuild/mainloop.py | 2 +- morphlib/plugins/distbuild_plugin.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 6752f400..aa11ae8f 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -1,6 +1,6 @@ # distbuild/build_controller.py -- control the steps for one build # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/distbuild/initiator.py b/distbuild/initiator.py index 54331072..ddea8cb3 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -1,6 +1,6 @@ # distbuild/initiator.py -- state machine for the initiator # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/distbuild/mainloop.py b/distbuild/mainloop.py index 8c46b2fb..97e439f3 100644 --- a/distbuild/mainloop.py +++ b/distbuild/mainloop.py @@ -1,6 +1,6 @@ # mainloop/mainloop.py -- select-based main loop # -# Copyright (C) 2012, 2014 Codethink Limited +# Copyright (C) 2012, 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/morphlib/plugins/distbuild_plugin.py b/morphlib/plugins/distbuild_plugin.py index b7298bc0..a7d69472 100644 --- a/morphlib/plugins/distbuild_plugin.py +++ b/morphlib/plugins/distbuild_plugin.py @@ -1,6 +1,6 @@ # distbuild_plugin.py -- Morph distributed build plugin # -# Copyright (C) 2014 Codethink Limited +# Copyright (C) 2014-2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -- cgit v1.2.1