summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2015-01-14 18:19:22 +0200
committerSam Thursfield <sam.thursfield@codethink.co.uk>2015-01-14 16:51:11 +0000
commit7db5c7b44b195a0967d534532c04478d8990bbe8 (patch)
tree9e36c8d522070b387e0f72977e778f73feb9903c
parentf33748d6e6795751e7ea628d5f4e8478353a88ee (diff)
downloadmorph-sam/distbuild-handle-socket-error.tar.gz
distbuild: Handle errors from socketsam/distbuild-handle-socket-error
We found a distbuild controller stuck in a busy loop, with the logs full of the same error message repeated: ... _flush(): Exception 'IOError: [Errno 32] Broken pipe' from sock.write() We suspect this came about because the initiator disconnected without sending an EOF. The initiator was in a VM on a laptop so it seems possible that the host OS turned off the wireless adaptor without giving the VM a chance to close its connections gracefully. The busy loop is because nothing in the SocketBuffer class handles the SocketError events queued by the _flush() method. Unhandled events are ignored. So the SocketBuffer stays in 'w' state without ever shifting any data and never returns. Adding transitions to handle the SocketError event will fix the problem. If a socket error happens now in the same scenario, it will be handled as if the initiator disconnected.
-rw-r--r--distbuild/initiator_connection.py4
-rw-r--r--distbuild/jm.py21
-rw-r--r--distbuild/sockbuf.py7
3 files changed, 27 insertions, 5 deletions
diff --git a/distbuild/initiator_connection.py b/distbuild/initiator_connection.py
index db982230..e038a1fd 100644
--- a/distbuild/initiator_connection.py
+++ b/distbuild/initiator_connection.py
@@ -1,6 +1,6 @@
# distbuild/initiator_connection.py -- communicate with initiator
#
-# Copyright (C) 2012, 2014 Codethink Limited
+# Copyright (C) 2012, 2014-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -70,6 +70,8 @@ class InitiatorConnection(distbuild.StateMachine):
# state, source, event_class, new_state, callback
('idle', self.jm, distbuild.JsonNewMessage, 'idle',
self._handle_msg),
+ ('idle', self.jm, distbuild.JsonError, 'closing',
+ self._disconnect),
('idle', self.jm, distbuild.JsonEof, 'closing', self._disconnect),
('idle', distbuild.BuildController, distbuild.BuildFinished,
'idle', self._send_build_finished_message),
diff --git a/distbuild/jm.py b/distbuild/jm.py
index 615100e4..0d0bc09d 100644
--- a/distbuild/jm.py
+++ b/distbuild/jm.py
@@ -1,6 +1,6 @@
# mainloop/jm.py -- state machine for JSON communication between nodes
#
-# Copyright (C) 2012, 2014 Codethink Limited
+# Copyright (C) 2012, 2014-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -39,8 +39,17 @@ class JsonNewMessage(object):
class JsonEof(object):
pass
-
-
+
+
+class JsonError(object):
+
+ '''An error has occured with a socket used for communication.'''
+
+ def __init__(self, sock, exception):
+ self.sock = sock
+ self.exception = exception
+
+
class _Close2(object):
pass
@@ -72,9 +81,12 @@ class JsonMachine(StateMachine):
# state, source, event_class, new_state, callback
('rw', sockbuf, SocketBufferNewData, 'rw', self._parse),
('rw', sockbuf, SocketBufferEof, 'w', self._send_eof),
+ ('rw', sockbuf, SocketError, 'error', self._send_error),
('rw', self, _Close2, None, self._really_close),
('w', self, _Close2, None, self._really_close),
+
+ ('error', self, _Close2, None, self._really_close)
]
self.add_transitions(spec)
@@ -115,6 +127,9 @@ class JsonMachine(StateMachine):
def _send_eof(self, event_source, event):
self.mainloop.queue_event(self, JsonEof())
+ def _send_error(self, event_source, event):
+ self.mainloop.queue_event(self, JsonError(event.sock, event.exception))
+
def _really_close(self, event_source, event):
self.sockbuf.close()
self._send_eof(event_source, event)
diff --git a/distbuild/sockbuf.py b/distbuild/sockbuf.py
index fc0315b0..3f0e36f2 100644
--- a/distbuild/sockbuf.py
+++ b/distbuild/sockbuf.py
@@ -1,6 +1,6 @@
# mainloop/sockbuf.py -- a buffering, non-blocking socket I/O state machine
#
-# Copyright (C) 2012, 2014 Codethink Limited
+# Copyright (C) 2012, 2014-2015 Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -106,22 +106,27 @@ class SocketBuffer(StateMachine):
('reading', self, _WriteBufferNotEmpty, 'rw',
self._start_writing),
('reading', self, SocketBufferEof, 'idle', None),
+ ('reading', self, SocketError, None, self._really_close),
('reading', self, _Close, None, self._really_close),
('rw', src, SocketReadable, 'rw', self._fill),
('rw', src, SocketWriteable, 'rw', self._flush),
('rw', self, _WriteBufferIsEmpty, 'reading', self._stop_writing),
('rw', self, SocketBufferEof, 'w', None),
+ ('rw', self, SocketError, None, self._really_close),
('rw', self, _Close, 'wc', None),
('idle', self, _WriteBufferNotEmpty, 'w', self._start_writing),
+ ('idle', self, SocketError, None, self._really_close),
('idle', self, _Close, None, self._really_close),
('w', src, SocketWriteable, 'w', self._flush),
('w', self, _WriteBufferIsEmpty, 'idle', self._stop_writing),
+ ('w', self, SocketError, None, self._really_close),
('wc', src, SocketWriteable, 'wc', self._flush),
('wc', self, _WriteBufferIsEmpty, None, self._really_close),
+ ('wc', self, SocketError, None, self._really_close),
]
self.add_transitions(spec)