From 84096556ea54d4af236f1fe5f7ccf61c1343016f Mon Sep 17 00:00:00 2001 From: Lauren Perry Date: Thu, 2 Apr 2015 17:23:07 +0100 Subject: distbuild: Add distbuild start and cancel functionality Add command for distbuild-start to build_plugin in morphlib, and create a boolean parameter to inform the initiator whether to disconnect the controller and leave the build running remotely. Add distbuild-cancel command to parse currently-running distbuild build-request IDs and cancel the one matching the given argument Change-Id: I458a5767bb768ceb2b4d8876adf1c86075d452bd --- distbuild/__init__.py | 8 ++- distbuild/build_controller.py | 42 +++++++++++- distbuild/initiator.py | 120 +++++++++++++++++++++++++++++++++-- distbuild/initiator_connection.py | 48 ++++++++++++++ distbuild/protocol.py | 11 ++++ morphlib/buildcommand.py | 10 ++- morphlib/plugins/build_plugin.py | 31 ++++++++- morphlib/plugins/distbuild_plugin.py | 42 ++++++++++++ 8 files changed, 301 insertions(+), 11 deletions(-) diff --git a/distbuild/__init__.py b/distbuild/__init__.py index e6ceda1f..aaf9ae41 100644 --- a/distbuild/__init__.py +++ b/distbuild/__init__.py @@ -35,7 +35,8 @@ from proxy_event_source import ProxyEventSource from json_router import JsonRouter from helper_router import (HelperRouter, HelperRequest, HelperOutput, HelperResult) -from initiator_connection import (InitiatorConnection, InitiatorDisconnect) +from initiator_connection import (InitiatorConnection, InitiatorDisconnect, + CancelRequest) from connection_machine import (ConnectionMachine, InitiatorConnectionMachine, Reconnect, StopConnecting) from worker_build_scheduler import (WorkerBuildQueuer, @@ -52,9 +53,10 @@ from worker_build_scheduler import (WorkerBuildQueuer, from build_controller import (BuildController, BuildFailed, BuildProgress, BuildStepStarted, BuildStepAlreadyStarted, BuildOutput, BuildStepFinished, BuildStepFailed, - BuildFinished, BuildCancel, + BuildFinished, BuildCancel, BuildStarted, build_step_name, map_build_graph) -from initiator import (Initiator, InitiatorListJobs) +from initiator import (Initiator, InitiatorStart, InitiatorCancel, + InitiatorListJobs) from protocol import message from crashpoint import (crash_point, add_crash_condition, add_crash_conditions, diff --git a/distbuild/build_controller.py b/distbuild/build_controller.py index 35b231f9..d879ffea 100644 --- a/distbuild/build_controller.py +++ b/distbuild/build_controller.py @@ -43,6 +43,12 @@ class _GotGraph(object): self.artifact = artifact +class BuildStarted(object): + + def __init__(self, id): + self.id = id + + class BuildCancel(object): def __init__(self, id): @@ -77,12 +83,14 @@ class BuildStepStarted(object): self.step_name = step_name self.worker_name = worker_name + class BuildStepAlreadyStarted(BuildStepStarted): def __init__(self, request_id, step_name, worker_name): super(BuildStepAlreadyStarted, self).__init__( request_id, step_name, worker_name) + class BuildOutput(object): def __init__(self, request_id, step_name, stdout, stderr): @@ -174,6 +182,7 @@ class BuildController(distbuild.StateMachine): self._helper_id = None self.debug_transitions = False self.debug_graph_state = False + self.allow_detach = build_request_message['allow_detach'] def __repr__(self): return '' % (id(self), @@ -194,6 +203,9 @@ class BuildController(distbuild.StateMachine): ('init', distbuild.InitiatorConnection, distbuild.InitiatorDisconnect, 'init', self._maybe_notify_initiator_disconnected), + ('init', distbuild.InitiatorConnection, + distbuild.CancelRequest, 'init', + self._maybe_notify_build_cancelled), ('init', self, _Abort, None, None), ('graphing', distbuild.HelperRouter, distbuild.HelperOutput, @@ -203,6 +215,9 @@ class BuildController(distbuild.StateMachine): ('graphing', self, _GotGraph, 'annotating', self._start_annotating), ('graphing', self, BuildFailed, None, None), + ('graphing', distbuild.InitiatorConnection, + distbuild.CancelRequest, 'graphing', + self._maybe_notify_build_cancelled), ('graphing', distbuild.InitiatorConnection, distbuild.InitiatorDisconnect, 'graphing', self._maybe_notify_initiator_disconnected), @@ -213,6 +228,9 @@ class BuildController(distbuild.StateMachine): ('annotating', self, BuildFailed, None, None), ('annotating', self, _Annotated, 'building', self._queue_worker_builds), + ('annotating', distbuild.InitiatorConnection, + distbuild.CancelRequest, 'annotating', + self._maybe_notify_build_cancelled), ('annotating', distbuild.InitiatorConnection, distbuild.InitiatorDisconnect, 'annotating', self._maybe_notify_initiator_disconnected), @@ -249,6 +267,9 @@ class BuildController(distbuild.StateMachine): self._maybe_notify_build_failed), ('building', self, _Abort, None, None), ('building', self, _Built, None, self._notify_build_done), + ('building', distbuild.InitiatorConnection, + distbuild.CancelRequest, 'building', + self._maybe_notify_build_cancelled), ('building', distbuild.InitiatorConnection, distbuild.InitiatorDisconnect, 'building', self._maybe_notify_initiator_disconnected), @@ -448,6 +469,9 @@ class BuildController(distbuild.StateMachine): self.mainloop.queue_event(self, _Built()) return + self.mainloop.queue_event(BuildController, + BuildStarted(self._request['id'])) + logging.debug('Queuing more worker-builds to run') if self.debug_graph_state: logging.debug('Current state of build graph nodes:') @@ -496,8 +520,24 @@ class BuildController(distbuild.StateMachine): logging.debug("BuildController %r: initiator id %s disconnected", self, event.id) + if self.allow_detach: + logging.debug('Detaching from client; build continuing remotely.') + else: + self.mainloop.queue_event(BuildController, distbuild.CancelRequest) + + def _maybe_notify_build_cancelled(self, event_source, event): + if event.id != self._request['id']: + logging.debug('Heard initiator cancel request with event id %s ' + 'but our request id is %s', + event.id, self._request['id']) + return # not for us + + logging.debug("BuildController %r: initiator id %s cancelled", + self, event.id) + cancel_pending = distbuild.WorkerCancelPending(event.id) - self.mainloop.queue_event(distbuild.WorkerBuildQueuer, cancel_pending) + self.mainloop.queue_event(distbuild.WorkerBuildQueuer, + cancel_pending) cancel = BuildCancel(event.id) self.mainloop.queue_event(BuildController, cancel) diff --git a/distbuild/initiator.py b/distbuild/initiator.py index 332d54a1..40b56a9d 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -31,6 +31,11 @@ class _Finished(object): self.msg = msg +class _Cancelled(object): + + pass + + class _Failed(object): def __init__(self, msg): @@ -69,6 +74,7 @@ class Initiator(distbuild.StateMachine): self._partial = True self._step_outputs = {} self.debug_transitions = False + self.allow_detach = False # The build-log output dir is set up in _open_output() when we # receive the first log message. Thus if we never get that far, we @@ -88,6 +94,7 @@ class Initiator(distbuild.StateMachine): ('waiting', self._jm, distbuild.JsonNewMessage, 'waiting', self._handle_json_message), ('waiting', self, _Finished, None, self._succeed), + ('waiting', self, _Cancelled, None, self._cancel), ('waiting', self, _Failed, None, self._fail), ] self.add_transitions(spec) @@ -107,7 +114,8 @@ class Initiator(distbuild.StateMachine): original_ref=self._original_ref, component_names=self._component_names, partial=self._partial, - protocol_version=distbuild.protocol.VERSION + protocol_version=distbuild.protocol.VERSION, + allow_detach=self.allow_detach, ) self._jm.send(msg) logging.debug('Initiator: sent to controller: %s', repr(msg)) @@ -118,8 +126,10 @@ class Initiator(distbuild.StateMachine): logging.debug('Initiator: from controller: %s' % repr(event.msg)) handlers = { + 'build-started': lambda msg: None, 'build-finished': self._handle_build_finished_message, 'build-failed': self._handle_build_failed_message, + 'build-cancelled': self._handle_build_cancelled_message, 'build-progress': self._handle_build_progress_message, 'step-started': self._handle_step_started_message, 'step-already-started': self._handle_step_already_started_message, @@ -127,13 +137,17 @@ class Initiator(distbuild.StateMachine): 'step-finished': self._handle_step_finished_message, 'step-failed': self._handle_step_failed_message, } - + handler = handlers[event.msg['type']] handler(event.msg) def _handle_build_finished_message(self, msg): self.mainloop.queue_event(self, _Finished(msg)) + # TODO: def _handle_build_cancelled_message(self, who): + def _handle_build_cancelled_message(self, msg): + self.mainloop.queue_event(self, _Cancelled()) + def _handle_build_failed_message(self, msg): self.mainloop.queue_event(self, _Failed(msg)) @@ -224,7 +238,7 @@ class Initiator(distbuild.StateMachine): self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) self._jm.close() logging.info('Build finished OK') - + urls = event.msg['urls'] if urls: for url in urls: @@ -233,6 +247,12 @@ class Initiator(distbuild.StateMachine): self._app.status( msg='Controller did not give us any artifact URLs.') + def _cancel(self, event_source, event): + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + self._app.status(msg='Build was cancelled') + def _fail(self, event_source, event): self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) self._jm.close() @@ -257,6 +277,98 @@ class Initiator(distbuild.StateMachine): self._step_outputs = {} +class InitiatorStart(Initiator): + + def __init__(self, cm, conn, app, repo_name, ref, morphology, + original_ref, component_names): + super(InitiatorStart, self).__init__(cm, conn, app, repo_name, ref, + morphology, original_ref, + component_names) + self._step_outputs = {} + self.debug_transitions = False + self.allow_detach = True + + def _handle_json_message(self, event_source, event): + distbuild.crash_point() + + logging.debug('Initiator: from controller: %s' % repr(event.msg)) + + handlers = { + 'build-started': self._handle_build_started_message, + 'build-finished': self._handle_build_finished_message, + 'build-failed': self._handle_build_failed_message, + 'build-cancelled': self._handle_build_cancelled_message, + 'build-progress': self._handle_build_progress_message, + } + + msg_type = event.msg['type'] + + if msg_type in handlers: + handler = handlers[msg_type] + handler(event.msg) + + def _handle_build_started_message(self, msg): + self._app.status(msg='Detaching distbuild from controller (build' + ' will continue on the distbuild network): ' + 'build request ID: %s' % msg['id']) + + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + +class InitiatorCancel(distbuild.StateMachine): + + def __init__(self, cm, conn, app, job_id): + distbuild.StateMachine.__init__(self, 'waiting') + self._cm = cm + self._conn = conn + self._app = app + self._job_id = job_id + + def setup(self): + distbuild.crash_point() + + self._jm = distbuild.JsonMachine(self._conn) + self.mainloop.add_state_machine(self._jm) + logging.debug('initiator: _jm=%s' % repr(self._jm)) + + spec = [ + # state, source, event_class, new_state, callback + ('waiting', self._jm, distbuild.JsonEof, None, self._terminate), + ('waiting', self._jm, distbuild.JsonNewMessage, None, + self._handle_json_message), + ] + self.add_transitions(spec) + + self._app.status(msg='Sending cancel request for distbuild job.') + msg = distbuild.message('build-cancel', + id=self._job_id, + protocol_version=distbuild.protocol.VERSION, + ) + self._jm.send(msg) + logging.debug('Initiator: sent to controller: %s', repr(msg)) + + def _handle_json_message(self, event_source, event): + distbuild.crash_point() + + logging.debug('Initiator: from controller: %s', str(event.msg)) + + handlers = { + 'request-output': self._handle_request_output, + } + + handler = handlers[event.msg['type']] + handler(event.msg) + + def _handle_request_output(self, msg): + self._app.status(msg=str(msg['message'])) + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + def _terminate(self, event_source, event): + self.mainloop.queue_event(self._cm, distbuild.StopConnecting()) + self._jm.close() + + class InitiatorListJobs(distbuild.StateMachine): def __init__(self, cm, conn, app): @@ -285,7 +397,7 @@ class InitiatorListJobs(distbuild.StateMachine): self._app.status(msg='Requesting currently running distbuilds.') msg = distbuild.message('list-requests', id=msg_uuid, - protocol_version=distbuild.protocol.VERSION + protocol_version=distbuild.protocol.VERSION, ) self._jm.send(msg) logging.debug('Initiator: sent to controller: %s', repr(msg)) diff --git a/distbuild/initiator_connection.py b/distbuild/initiator_connection.py index fdb1dab5..718686dc 100644 --- a/distbuild/initiator_connection.py +++ b/distbuild/initiator_connection.py @@ -26,6 +26,12 @@ class InitiatorDisconnect(object): self.id = id +class CancelRequest(object): + + def __init__(self, id): + self.id = id + + class _Close(object): def __init__(self, event_source): @@ -76,8 +82,12 @@ class InitiatorConnection(distbuild.StateMachine): 'idle', self._send_build_failed_message), ('idle', distbuild.BuildController, distbuild.BuildProgress, 'idle', self._send_build_progress_message), + ('idle', distbuild.BuildController, distbuild.BuildCancel, + 'idle', self._send_build_cancelled_message), ('idle', distbuild.BuildController, distbuild.BuildStepStarted, 'idle', self._send_build_step_started_message), + ('idle', distbuild.BuildController, distbuild.BuildStarted, + 'idle', self._send_build_started_message), ('idle', distbuild.BuildController, distbuild.BuildStepAlreadyStarted, 'idle', self._send_build_step_already_started_message), @@ -117,6 +127,8 @@ class InitiatorConnection(distbuild.StateMachine): self._handle_build_request(event) elif event.msg['type'] == 'list-requests': self._handle_list_requests(event) + elif event.msg['type'] == 'build-cancel': + self._handle_build_cancel(event) else: logging.error('Invalid message type: %s', event.msg) except (KeyError, ValueError) as ex: @@ -151,6 +163,24 @@ class InitiatorConnection(distbuild.StateMachine): message=('\n\n'.join(output_msg))) self.jm.send(msg) + def _handle_build_cancel(self, event): + requests = self.mainloop.state_machines_of_type( + distbuild.BuildController) + for build in requests: + if build.get_request()['id'] == event.msg['id']: + self.mainloop.queue_event(InitiatorConnection, + CancelRequest(event.msg['id'])) + msg = distbuild.message('request-output', message=( + 'Cancelling build request with ID %s' % + event.msg['id'])) + self.jm.send(msg) + break + else: + msg = distbuild.message('request-output', message=('Given ' + 'build-request ID does not match any ' + 'running build IDs.')) + self.jm.send(msg) + def _disconnect(self, event_source, event): for id in self.our_ids: logging.debug('InitiatorConnection: %s: InitiatorDisconnect(%s)', @@ -187,6 +217,16 @@ class InitiatorConnection(distbuild.StateMachine): self.jm.send(msg) self._log_send(msg) + def _send_build_cancelled_message(self, event_source, event): + if event.id in self.our_ids: + msg = distbuild.message('build-cancelled', + id=self._route_map.get_incoming_id(event.id)) + + self._route_map.remove(event.id) + self.our_ids.remove(event.id) + self.jm.send(msg) + self._log_send(msg) + def _send_build_failed_message(self, event_source, event): if event.id in self.our_ids: msg = distbuild.message('build-failed', @@ -205,6 +245,14 @@ class InitiatorConnection(distbuild.StateMachine): self.jm.send(msg) self._log_send(msg) + def _send_build_started_message(self, event_source, event): + logging.debug('InitiatorConnection: build_started: id=%s' % event.id) + + if event.id in self.our_ids: + msg = distbuild.message('build-started', id=event.id) + self.jm.send(msg) + self._log_send(msg) + def _send_build_step_started_message(self, event_source, event): logging.debug('InitiatorConnection: build_step_started: ' 'id=%s step_name=%s worker_name=%s' % diff --git a/distbuild/protocol.py b/distbuild/protocol.py index 2fbfa085..8f533e75 100644 --- a/distbuild/protocol.py +++ b/distbuild/protocol.py @@ -33,6 +33,7 @@ _required_fields = { 'morphology', 'partial', 'protocol_version', + 'allow_detach', ], 'build-progress': [ 'id', @@ -43,6 +44,9 @@ _required_fields = { 'step_name', 'worker_name', ], + 'build-started': [ + 'id', + ], 'step-already-started': [ 'id', 'step_name', @@ -70,6 +74,9 @@ _required_fields = { 'id', 'reason', ], + 'build-cancelled': [ + 'id', + ], 'exec-request': [ 'id', 'argv', @@ -92,6 +99,10 @@ _required_fields = { 'request-output': [ 'message', ], + 'build-cancel': [ + 'id', + 'protocol_version', + ], } diff --git a/morphlib/buildcommand.py b/morphlib/buildcommand.py index 37dccf82..efd10f26 100644 --- a/morphlib/buildcommand.py +++ b/morphlib/buildcommand.py @@ -525,10 +525,11 @@ class InitiatorBuildCommand(BuildCommand): RECONNECT_INTERVAL = 30 # seconds MAX_RETRIES = 1 - def __init__(self, app, addr, port): + def __init__(self, app, addr, port, allow_detach): self.app = app self.addr = addr self.port = port + self.allow_detach = allow_detach self.app.settings['push-build-branches'] = True super(InitiatorBuildCommand, self).__init__(app) @@ -546,10 +547,15 @@ class InitiatorBuildCommand(BuildCommand): loop = distbuild.MainLoop() args = [repo_name, ref, filename, original_ref or ref, component_names] + if self.allow_detach: + initiator_type = distbuild.InitiatorStart + else: + initiator_type = distbuild.Initiator + cm = distbuild.InitiatorConnectionMachine(self.app, self.addr, self.port, - distbuild.Initiator, + initiator_type, [self.app] + args, self.RECONNECT_INTERVAL, self.MAX_RETRIES) diff --git a/morphlib/plugins/build_plugin.py b/morphlib/plugins/build_plugin.py index 12d69545..8da66358 100644 --- a/morphlib/plugins/build_plugin.py +++ b/morphlib/plugins/build_plugin.py @@ -46,10 +46,14 @@ class BuildPlugin(cliapp.Plugin): '[COMPONENT...]') self.app.add_subcommand('distbuild', self.distbuild, arg_synopsis='SYSTEM [COMPONENT...]') + self.app.add_subcommand('distbuild-start', self.distbuild_start, + arg_synopsis='SYSTEM [COMPONENT...]') self.use_distbuild = False + self.allow_detach = False def disable(self): self.use_distbuild = False + self.allow_detach = False def distbuild_morphology(self, args): '''Distbuild a system, outside of a system branch. @@ -97,6 +101,12 @@ class BuildPlugin(cliapp.Plugin): your system, the system artifact will be copied from your trove and cached locally. + Log information can be found in the current working directory, in + directories called build-xx. + + If you do not have a persistent connection to the server on which + the distbuild runs, consider using `morph distbuild-start` instead. + Example: morph distbuild devel-system-x86_64-generic.morph @@ -106,6 +116,25 @@ class BuildPlugin(cliapp.Plugin): self.use_distbuild = True self.build(args) + def distbuild_start(self, args): + '''Distbuild a system image without a lasting client-server connection. + + This command launches a distributed build, and disconnects from the + distbuild cluster once the build starts, leaving the build running + remotely. + + The command will return a build-ID which can be used to cancel the + distbuild via `morph distbuild-cancel`. Builds started in this manner + can be found via `morph distbuild-list-jobs` + + See `morph help distbuild` for more information and example usage. + + ''' + + self.use_distbuild = True + self.allow_detach = True + self.build(args) + def build_morphology(self, args): '''Build a system, outside of a system branch. @@ -211,7 +240,7 @@ class BuildPlugin(cliapp.Plugin): port = self.app.settings['controller-initiator-port'] build_command = morphlib.buildcommand.InitiatorBuildCommand( - self.app, addr, port) + self.app, addr, port, self.allow_detach) else: build_command = morphlib.buildcommand.BuildCommand(self.app) diff --git a/morphlib/plugins/distbuild_plugin.py b/morphlib/plugins/distbuild_plugin.py index 09669988..68a80784 100644 --- a/morphlib/plugins/distbuild_plugin.py +++ b/morphlib/plugins/distbuild_plugin.py @@ -40,6 +40,48 @@ class DistbuildOptionsPlugin(cliapp.Plugin): pass +class DistbuildCancel(cliapp.Plugin): + + RECONNECT_INTERVAL = 30 # seconds + MAX_RETRIES = 1 + + def enable(self): + self.app.add_subcommand('distbuild-cancel', self.distbuild_cancel, + arg_synopsis='ID') + + def disable(self): + pass + + def distbuild_cancel(self, args): + '''Cancels a currently-running distbuild + + Command line arguments: + + `ID` of the running process that you wish to cancel + (this can be found via distbuild-list-jobs) + + Example: + + * morph distbuild-cancel InitiatorConnection-1 + + ''' + + if len(args) != 1: + raise cliapp.AppException( + 'usage: morph distbuild-cancel ') + + addr = self.app.settings['controller-initiator-address'] + port = self.app.settings['controller-initiator-port'] + icm = distbuild.InitiatorConnectionMachine(self.app, addr, port, + distbuild.InitiatorCancel, + [self.app] + args, + self.RECONNECT_INTERVAL, + self.MAX_RETRIES) + loop = distbuild.MainLoop() + loop.add_state_machine(icm) + loop.run() + + class DistbuildListJobsPlugin(cliapp.Plugin): RECONNECT_INTERVAL = 30 # seconds -- cgit v1.2.1