From 594f59a8146fe45eba9e30dac7961f8aaaaf1997 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 19 Jun 2014 15:33:11 +0100 Subject: distbuild: Log a message when a socket error is received I found an issue in distbuild where the controller was stuck in a busy loop where it was continually writing to a closed socket. With 'strace' I saw write(), SIGPIPE, write(), SIGPIPE, ad infinitum. I got this much of a Python backtrace using GDB: distbuild.socketsrc.SocketEventSource.write() distbuild.sockbuf.SocketBuffer._flush() distbuild.sm.StateMachine.handle_event() I didn't manage to get further. However, I suspect one of the state machine transitions may be creating an event loop instead of correctly handling the error. The log file was quiet at this point, the last entries were: 2014-06-19 08:57:36 INFO There seems to be nothing to build 2014-06-19 08:57:36 INFO Requested artifact is built 2014-06-19 08:57:36 DEBUG InitiatorConnection: sent to 10.24.1.215:53818: {'mess age': 'Need to build 0 artifacts', 'type': 'build-progress', 'id': 790629564} 2014-06-19 08:57:36 DEBUG Notifying initiator of successful build 2014-06-19 08:57:36 DEBUG MainLoop.remove_state_machine: 2014-06-19 08:57:36 DEBUG InitiatorConnection: sent to 10.24.1.215:53818: {'type ': 'build-finished', 'id': 790629564, 'urls': [u'http://hawkdevtrove:8080/1.0/ar tifacts?filename=861f640923494ca3626bbd65655b350ce1bebea4c0bf7a57693bc06ed122cef 4.system.devel-system-x86_32-chroot-rootfs']} 2014-06-19 08:57:36 DEBUG InitiatorConnection: 10.24.1.215:53818: closing: 10.24.1.215:53818, max_buffer 1 6384> 2014-06-19 08:57:36 DEBUG MainLoop.remove_state_machine: 2014-06-19 08:57:36 DEBUG MainLoop.remove_state_machine: 10.24.1.215:53818, max_buffer 16384> 2014-06-19 08:57:36 DEBUG MainLoop.remove_state_machine: This commit should improve matters a little: in future the log file will show the ID of the SocketEventSource object and error we hit when calling its write() function. --- distbuild/sockbuf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/distbuild/sockbuf.py b/distbuild/sockbuf.py index 346706db..c2cc8e2a 100644 --- a/distbuild/sockbuf.py +++ b/distbuild/sockbuf.py @@ -133,6 +133,8 @@ class SocketBuffer(StateMachine): try: data = event.sock.read(self._max_buffer) except (IOError, OSError), e: + logging.debug( + '%s: _fill(): Exception %s from sock.read()', self, e) return [SocketError(event.sock, e)] if data: @@ -151,6 +153,8 @@ class SocketBuffer(StateMachine): try: n = event.sock.write(data) except (IOError, OSError), e: + logging.debug( + '%s: _flush(): Exception %s from sock.write()', self, e) return [SocketError(event.sock, e)] self._wbuf.remove(n) if len(self._wbuf) == 0: -- cgit v1.2.1