summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2017-01-10 13:38:50 -0500
committerBen Gamari <ben@smart-cactus.org>2017-01-10 13:39:04 -0500
commit6de7613604216f65fae92d8066a078bf9cd3c088 (patch)
treee60374d193ecf0565294fbe903f47e68ac874db2
parentfe8bc14fdaf1596cef008148cc9ff40cbce5e994 (diff)
downloadhaskell-6de7613604216f65fae92d8066a078bf9cd3c088.tar.gz
event manager: Don't worry if attempt to wake dead manager fails
This fixes #12038, where the TimerManager would attempt to wake up a manager that was already dead, resulting in setnumcapabilities001 occassionally failing during shutdown with unexpected output on stderr. I'm frankly still not entirely confident in this solution but perhaps it will help to get a few more eyes on this. My hypothesis is that the TimerManager is racing: thread TimerManager worker ------- -------------------- requests that thread manager shuts down begins to clean up, closing eventfd calls wakeManager, which tries to write to closed eventfd To prevent this `wakeManager` will need to synchronize with the TimerManger worker to ensure that the worker doesn't clean up the `Control` while another thread is trying to send a wakeup. However, this would add a bit of overhead on every timer interaction, which feels rather costly for what is really a problem only at shutdown. Moreover, it seems that the event manager (e.g. `GHC.Event.Manager`) is also afflicted by a similar race. This patch instead simply tries to catch the write failure after it has happened and silence it in the case that the fd has vanished. It feels rather hacky but it seems to work. Test Plan: Run `setnumcapabilities001` repeatedly Reviewers: austin, hvr, simonmar Reviewed By: simonmar Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2926 GHC Trac Issues: #12038
-rw-r--r--libraries/base/GHC/Event/Control.hs28
1 files changed, 23 insertions, 5 deletions
diff --git a/libraries/base/GHC/Event/Control.hs b/libraries/base/GHC/Event/Control.hs
index 0b0f5587a7..83950c24a6 100644
--- a/libraries/base/GHC/Event/Control.hs
+++ b/libraries/base/GHC/Event/Control.hs
@@ -30,11 +30,12 @@ module GHC.Event.Control
import Foreign.ForeignPtr (ForeignPtr)
import GHC.Base
+import GHC.IORef
import GHC.Conc.Signal (Signal)
import GHC.Real (fromIntegral)
import GHC.Show (Show)
import GHC.Word (Word8)
-import Foreign.C.Error (throwErrnoIfMinus1_)
+import Foreign.C.Error (throwErrnoIfMinus1_, throwErrno, getErrno, eBADF)
import Foreign.C.Types (CInt(..), CSize(..))
import Foreign.ForeignPtr (mallocForeignPtrBytes, withForeignPtr)
import Foreign.Marshal (alloca, allocaBytes)
@@ -69,7 +70,9 @@ data Control = W {
, wakeupWriteFd :: {-# UNPACK #-} !Fd
#endif
, didRegisterWakeupFd :: !Bool
- } deriving (Show)
+ -- | Have this Control's fds been cleaned up?
+ , controlIsDead :: !(IORef Bool)
+ }
#if defined(HAVE_EVENTFD)
wakeupReadFd :: Control -> Fd
@@ -101,6 +104,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
(wake_rd, wake_wr) <- createPipe
when shouldRegister $ c_setIOManagerWakeupFd wake_wr
#endif
+ isDead <- newIORef False
return W { controlReadFd = fromIntegral ctrl_rd
, controlWriteFd = fromIntegral ctrl_wr
#if defined(HAVE_EVENTFD)
@@ -110,6 +114,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
, wakeupWriteFd = fromIntegral wake_wr
#endif
, didRegisterWakeupFd = shouldRegister
+ , controlIsDead = isDead
}
-- | Close the control structure used by the IO manager thread.
@@ -119,6 +124,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
-- file after it has been closed.
closeControl :: Control -> IO ()
closeControl w = do
+ atomicModifyIORef (controlIsDead w) (\_ -> (True, ()))
_ <- c_close . fromIntegral . controlReadFd $ w
_ <- c_close . fromIntegral . controlWriteFd $ w
when (didRegisterWakeupFd w) $ c_setIOManagerWakeupFd (-1)
@@ -172,9 +178,21 @@ readControlMessage ctrl fd
sendWakeup :: Control -> IO ()
#if defined(HAVE_EVENTFD)
-sendWakeup c =
- throwErrnoIfMinus1_ "sendWakeup" $
- c_eventfd_write (fromIntegral (controlEventFd c)) 1
+sendWakeup c = do
+ n <- c_eventfd_write (fromIntegral (controlEventFd c)) 1
+ case n of
+ 0 -> return ()
+ _ -> do errno <- getErrno
+ -- Check that Control is still alive if we failed, since it's
+ -- possible that someone cleaned up the fds behind our backs and
+ -- consequently eventfd_write failed with EBADF. If it is dead
+ -- then just swallow the error since we are shutting down
+ -- anyways. Otherwise we will see failures during shutdown from
+ -- setnumcapabilities001 (#12038)
+ isDead <- readIORef (controlIsDead c)
+ if isDead && errno == eBADF
+ then return ()
+ else throwErrno "sendWakeup"
#else
sendWakeup c = do
n <- sendMessage (wakeupWriteFd c) CMsgWakeup