diff options
34 files changed, 1290 insertions, 124 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 44316cacb0..9e192a0ac8 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -631,6 +631,7 @@ emitBlackHoleCode node = do -- work with profiling. when eager_blackholing $ do + whenUpdRemSetEnabled dflags $ emitUpdRemSetPushThunk node emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr -- See Note [Heap memory barriers] in SMP.h. emitPrimCall [] MO_WriteBarrier [] diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index cdbc8d9fd9..155cdcbf80 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -42,6 +42,7 @@ import BlockId import MkGraph import StgSyn import Cmm +import Module ( rtsUnitId ) import Type ( Type, tyConAppTyCon ) import TyCon import CLabel @@ -339,14 +340,20 @@ dispatchPrimop dflags = \case emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) WriteMutVarOp -> \[mutv, var] -> OpDest_AllDone $ \res@[] -> do + old_val <- CmmLocal <$> newTemp (cmmExprType dflags var) + emitAssign old_val (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) + -- Without this write barrier, other CPUs may see this pointer before -- the writes for the closure it points to have occurred. + -- Note that this also must come after we read the old value to ensure + -- that the read of old_val comes before another core's write to the + -- MutVar's value. emitPrimCall res MO_WriteBarrier [] emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) - [(baseExpr, AddrHint), (mutv,AddrHint)] + [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)] -- #define sizzeofByteArrayzh(r,a) \ -- r = ((StgArrBytes *)(a))->bytes @@ -1983,17 +1990,21 @@ doWritePtrArrayOp :: CmmExpr doWritePtrArrayOp addr idx val = do dflags <- getDynFlags let ty = cmmExprType dflags val + hdr_size = arrPtrsHdrSize dflags + -- Update remembered set for non-moving collector + whenUpdRemSetEnabled dflags + $ emitUpdRemSetPush (cmmLoadIndexOffExpr dflags hdr_size ty addr ty idx) -- This write barrier is to ensure that the heap writes to the object -- referred to by val have happened before we write val into the array. -- See #12469 for details. emitPrimCall [] MO_WriteBarrier [] - mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val + mkBasicIndexedWrite hdr_size Nothing addr ty idx val emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - -- the write barrier. We must write a byte into the mark table: - -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N] + -- the write barrier. We must write a byte into the mark table: + -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N] emit $ mkStore ( cmmOffsetExpr dflags - (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags)) + (cmmOffsetExprW dflags (cmmOffsetB dflags addr hdr_size) (loadArrPtrsSize dflags addr)) (CmmMachOp (mo_wordUShr dflags) [idx, mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)]) @@ -2584,6 +2595,9 @@ emitCopyArray copy src0 src_off dst0 dst_off0 n = dst <- assignTempE dst0 dst_off <- assignTempE dst_off0 + -- Nonmoving collector write barrier + emitCopyUpdRemSetPush dflags (arrPtrsHdrSizeW dflags) dst dst_off n + -- Set the dirty bit in the header. emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) @@ -2646,6 +2660,9 @@ emitCopySmallArray copy src0 src_off dst0 dst_off n = src <- assignTempE src0 dst <- assignTempE dst0 + -- Nonmoving collector write barrier + emitCopyUpdRemSetPush dflags (smallArrPtrsHdrSizeW dflags) dst dst_off n + -- Set the dirty bit in the header. emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) @@ -2774,6 +2791,12 @@ doWriteSmallPtrArrayOp :: CmmExpr doWriteSmallPtrArrayOp addr idx val = do dflags <- getDynFlags let ty = cmmExprType dflags val + + -- Update remembered set for non-moving collector + tmp <- newTemp ty + mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing ty tmp addr ty idx + whenUpdRemSetEnabled dflags $ emitUpdRemSetPush (CmmReg (CmmLocal tmp)) + emitPrimCall [] MO_WriteBarrier [] -- #12469 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) @@ -2953,3 +2976,31 @@ emitCtzCall res x width = do [ res ] (MO_Ctz width) [ x ] + +--------------------------------------------------------------------------- +-- Pushing to the update remembered set +--------------------------------------------------------------------------- + +-- | Push a range of pointer-array elements that are about to be copied over to +-- the update remembered set. +emitCopyUpdRemSetPush :: DynFlags + -> WordOff -- ^ array header size + -> CmmExpr -- ^ destination array + -> CmmExpr -- ^ offset in destination array (in words) + -> Int -- ^ number of elements to copy + -> FCode () +emitCopyUpdRemSetPush _dflags _hdr_size _dst _dst_off 0 = return () +emitCopyUpdRemSetPush dflags hdr_size dst dst_off n = + whenUpdRemSetEnabled dflags $ do + updfr_off <- getUpdFrameOff + graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off [] + emit graph + where + lbl = mkLblExpr $ mkPrimCallLabel + $ PrimCall (fsLit "stg_copyArray_barrier") rtsUnitId + args = + [ mkIntExpr dflags hdr_size + , dst + , dst_off + , mkIntExpr dflags n + ] diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index 30e37bb930..0b3a8d8b08 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -39,6 +39,11 @@ module GHC.StgToCmm.Utils ( mkWordCLit, newStringCLit, newByteStringCLit, blankWord, + + -- * Update remembered set operations + whenUpdRemSetEnabled, + emitUpdRemSetPush, + emitUpdRemSetPushThunk, ) where #include "HsVersions.h" @@ -576,3 +581,40 @@ assignTemp' e let reg = CmmLocal lreg emitAssign reg e return (CmmReg reg) + + +--------------------------------------------------------------------------- +-- Pushing to the update remembered set +--------------------------------------------------------------------------- + +whenUpdRemSetEnabled :: DynFlags -> FCode a -> FCode () +whenUpdRemSetEnabled dflags code = do + do_it <- getCode code + the_if <- mkCmmIfThenElse' is_enabled do_it mkNop (Just False) + emit the_if + where + enabled = CmmLoad (CmmLit $ CmmLabel mkNonmovingWriteBarrierEnabledLabel) (bWord dflags) + zero = zeroExpr dflags + is_enabled = cmmNeWord dflags enabled zero + +-- | Emit code to add an entry to a now-overwritten pointer to the update +-- remembered set. +emitUpdRemSetPush :: CmmExpr -- ^ value of pointer which was overwritten + -> FCode () +emitUpdRemSetPush ptr = do + emitRtsCall + rtsUnitId + (fsLit "updateRemembSetPushClosure_") + [(CmmReg (CmmGlobal BaseReg), AddrHint), + (ptr, AddrHint)] + False + +emitUpdRemSetPushThunk :: CmmExpr -- ^ the thunk + -> FCode () +emitUpdRemSetPushThunk ptr = do + emitRtsCall + rtsUnitId + (fsLit "updateRemembSetPushThunk_") + [(CmmReg (CmmGlobal BaseReg), AddrHint), + (ptr, AddrHint)] + False diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 0c3dae8001..66e39f0d69 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -40,6 +40,7 @@ module CLabel ( mkAsmTempDieLabel, mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, mkUpdInfoLabel, mkBHUpdInfoLabel, mkIndStaticInfoLabel, @@ -484,7 +485,9 @@ mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable -- See Note [Proc-point local block entry-point]. -- Constructing Cmm Labels -mkDirty_MUT_VAR_Label, mkUpdInfoLabel, +mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, + mkUpdInfoLabel, mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel, mkMAP_DIRTY_infoLabel, @@ -494,6 +497,8 @@ mkDirty_MUT_VAR_Label, mkUpdInfoLabel, mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel, mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction +mkNonmovingWriteBarrierEnabledLabel + = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo diff --git a/includes/Cmm.h b/includes/Cmm.h index 21d5da310c..42aa0bf550 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -843,6 +843,10 @@ if (__gen > 0) { recordMutableCap(__p, __gen); } /* ----------------------------------------------------------------------------- + Update remembered set write barrier + -------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------------- Arrays -------------------------------------------------------------------------- */ @@ -944,3 +948,21 @@ prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \ \ return (dst); + + +#if defined(THREADED_RTS) +#define IF_WRITE_BARRIER_ENABLED \ + if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False) +#else +// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C-- +#define IF_WRITE_BARRIER_ENABLED \ + if (0) +#define nonmoving_write_barrier_enabled 0 +#endif + +// A useful helper for pushing a pointer to the update remembered set. +// See Note [Update remembered set] in NonMovingMark.c. +#define updateRemembSetPushPtr(p) \ + IF_WRITE_BARRIER_ENABLED { \ + ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \ + } diff --git a/includes/Rts.h b/includes/Rts.h index 56642e14c5..def06de90d 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -197,6 +197,7 @@ void _assertFail(const char *filename, unsigned int linenum) #include "rts/storage/ClosureMacros.h" #include "rts/storage/MBlock.h" #include "rts/storage/GC.h" +#include "rts/NonMoving.h" /* Other RTS external APIs */ #include "rts/Parallel.h" diff --git a/includes/rts/NonMoving.h b/includes/rts/NonMoving.h new file mode 100644 index 0000000000..6a6d96b2c8 --- /dev/null +++ b/includes/rts/NonMoving.h @@ -0,0 +1,24 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2018-2019 + * + * Non-moving garbage collector + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* This is called by the code generator */ +extern DLL_IMPORT_RTS +void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p); + +void updateRemembSetPushClosure(Capability *cap, StgClosure *p); + +void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p); + +extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled); diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h index a3873cc49d..2af50863d0 100644 --- a/includes/rts/storage/ClosureMacros.h +++ b/includes/rts/storage/ClosureMacros.h @@ -107,6 +107,20 @@ INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c) return CON_INFO_PTR_TO_STRUCT((c)->header.info); } +/* Used when we expect another thread to be mutating the info table pointer of + * a closure (e.g. when busy-waiting on a WHITEHOLE). + */ +INLINE_HEADER const StgInfoTable *get_volatile_itbl(StgClosure *c) { + // The volatile here is import to ensure that the compiler does not + // optimise away multiple loads, e.g. in a busy-wait loop. Note that + // we can't use VOLATILE_LOAD here as the casts result in strict aliasing + // rule violations and this header may be compiled outside of the RTS + // (where we use -fno-strict-aliasing). + StgInfoTable * *volatile p = (StgInfoTable * *volatile) &c->header.info; + return INFO_PTR_TO_STRUCT(*p); +} + + INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con) { return get_itbl(con)->srt; diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 77dbe60297..7931433019 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -234,7 +234,7 @@ void setKeepCAFs (void); and is put on the mutable list. -------------------------------------------------------------------------- */ -void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p); +void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old); /* set to disable CAF garbage collection in GHCi. */ /* (needed when dynamic libraries are used). */ diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 63d2a11e8e..d56ae8ad27 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -185,6 +185,53 @@ typedef struct StgTSO_ { } *StgTSOPtr; // StgTSO defined in rts/Types.h +/* Note [StgStack dirtiness flags and concurrent marking] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Without concurrent collection by the nonmoving collector the stack dirtiness story + * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list) + * or not. + * + * However, things are considerably more complicated with concurrent collection + * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding + * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure + * that stacks are marked in accordance with the nonmoving collector's snapshot + * invariant. This is: every stack alive at the time the snapshot is taken must + * be marked at some point after the moment the snapshot is taken and before it + * is mutated or the commencement of the sweep phase. + * + * This marking may be done by the concurrent mark phase (in the case of a + * thread that never runs during the concurrent mark) or by the mutator when + * dirtying the stack. However, it is unsafe for the concurrent collector to + * traverse the stack while it is under mutation. Consequently, the following + * handshake is obeyed by the mutator's write barrier and the concurrent mark to + * ensure this doesn't happen: + * + * 1. The entity seeking to mark first checks that the stack lives in the nonmoving + * generation; if not then the stack was not alive at the time the snapshot + * was taken and therefore we need not mark it. + * + * 2. The entity seeking to mark checks the stack's mark bit. If it is set then + * no mark is necessary. + * + * 3. The entity seeking to mark tries to lock the stack for marking by + * atomically setting its `marking` field to the current non-moving mark + * epoch: + * + * a. If the mutator finds the concurrent collector has already locked the + * stack then it waits until it is finished (indicated by the mark bit + * being set) before proceeding with execution. + * + * b. If the concurrent collector finds that the mutator has locked the stack + * then it moves on, leaving the mutator to mark it. There is no need to wait; + * the mark is guaranteed to finish before sweep due to the post-mark + * synchronization with mutators. + * + * c. Whoever succeeds in locking the stack is responsible for marking it and + * setting the stack's mark bit (either the BF_MARKED bit for large objects + * or otherwise its bit in its segment's mark bitmap). + * + */ #define STACK_DIRTY 1 // used by sanity checker to verify that all dirty stacks are on the mutable list @@ -193,7 +240,8 @@ typedef struct StgTSO_ { typedef struct StgStack_ { StgHeader header; StgWord32 stack_size; // stack size in *words* - StgWord32 dirty; // non-zero => dirty + StgWord dirty; // non-zero => dirty + StgWord marking; // non-zero => someone is currently marking the stack StgPtr sp; // current stack pointer StgWord stack[]; } StgStack; diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 217b1bc89d..f4ae2245d2 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -542,5 +542,6 @@ void * pushCostCentre (void *ccs, void *cc); // Capability.c extern unsigned int n_capabilities; +extern void updateRemembSetPushThunk_(void *reg, void *p1); #endif diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 8d7fc3c012..eeb760c5ed 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -654,6 +654,8 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") /* someone else beat us to it */ jump ENTRY_LBL(stg_WHITEHOLE) (ap); } + // Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is + // not reachable. StgInd_indirectee(ap) = CurrentTSO; prim_write_barrier; SET_INFO(ap, __stg_EAGER_BLACKHOLE_info); diff --git a/rts/Capability.c b/rts/Capability.c index 23e581359e..0baa4ef205 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -292,6 +292,11 @@ initCapability (Capability *cap, uint32_t i) RtsFlags.GcFlags.generations, "initCapability"); + + // At this point storage manager is not initialized yet, so this will be + // initialized in initStorage(). + cap->upd_rem_set.queue.blocks = NULL; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { cap->mut_lists[g] = NULL; } @@ -861,16 +866,27 @@ yieldCapability (Capability** pCap, Task *task, bool gcAllowed) { PendingSync *sync = pending_sync; - if (sync && sync->type == SYNC_GC_PAR) { - if (! sync->idle[cap->no]) { - traceEventGcStart(cap); - gcWorkerThread(cap); - traceEventGcEnd(cap); - traceSparkCounters(cap); - // See Note [migrated bound threads 2] - if (task->cap == cap) { - return true; + if (sync) { + switch (sync->type) { + case SYNC_GC_PAR: + if (! sync->idle[cap->no]) { + traceEventGcStart(cap); + gcWorkerThread(cap); + traceEventGcEnd(cap); + traceSparkCounters(cap); + // See Note [migrated bound threads 2] + if (task->cap == cap) { + return true; + } } + break; + + case SYNC_FLUSH_UPD_REM_SET: + debugTrace(DEBUG_nonmoving_gc, "Flushing update remembered set blocks..."); + break; + + default: + break; } } } diff --git a/rts/Capability.h b/rts/Capability.h index 0c41456261..e51769f887 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -85,6 +85,9 @@ struct Capability_ { bdescr **mut_lists; bdescr **saved_mut_lists; // tmp use during GC + // The update remembered set for the non-moving collector + UpdRemSet upd_rem_set; + // block for allocating pinned objects into bdescr *pinned_object_block; // full pinned object blocks allocated since the last GC @@ -257,7 +260,8 @@ extern Capability **capabilities; typedef enum { SYNC_OTHER, SYNC_GC_SEQ, - SYNC_GC_PAR + SYNC_GC_PAR, + SYNC_FLUSH_UPD_REM_SET } SyncType; // diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 8ea94b19f2..334d0ef823 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -318,6 +318,7 @@ stg_killThreadzh (P_ target, P_ exception) return (); } else { StgTSO_why_blocked(CurrentTSO) = BlockedOnMsgThrowTo; + updateRemembSetPushPtr(StgTSO_block_info(CurrentTSO)); StgTSO_block_info(CurrentTSO) = msg; // we must block, and unlock the message before returning jump stg_block_throwto (target, exception); @@ -489,6 +490,8 @@ retry_pop_stack: ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); + // No need to push `trec` to update remembered set; it will be no longer + // reachable after we overwrite StgTSO.trec. StgTSO_trec(CurrentTSO) = NO_TREC; if (r != 0) { // Transaction was valid: continue searching for a catch frame @@ -607,6 +610,8 @@ retry_pop_stack: outer = StgTRecHeader_enclosing_trec(trec); ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); + // No need to push `trec` to update remembered set since we just freed + // it; it is no longer reachable. StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchSTMFrame; } diff --git a/rts/Messages.c b/rts/Messages.c index d878db5eda..b16ada53e1 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -244,8 +244,8 @@ loop: // a barrier is necessary to ensure that all writes are visible. // See Note [Heap memory barriers] in SMP.h. write_barrier(); + dirty_TSO(cap, owner); // we will modify owner->bq owner->bq = bq; - dirty_TSO(cap, owner); // we modified owner->bq // If the owner of the blackhole is currently runnable, then // bump it to the front of the run queue. This gives the @@ -262,6 +262,9 @@ loop: // point to the BLOCKING_QUEUE from the BLACKHOLE write_barrier(); // make the BQ visible, see Note [Heap memory barriers]. + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + updateRemembSetPushClosure(cap, (StgClosure*)p); + } ((StgInd*)bh)->indirectee = (StgClosure *)bq; recordClosureMutated(cap,bh); // bh was mutated @@ -290,6 +293,11 @@ loop: } #endif + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + // We are about to overwrite bq->queue; make sure its current value + // makes it into the update remembered set + updateRemembSetPushClosure(cap, (StgClosure*)bq->queue); + } msg->link = bq->queue; bq->queue = msg; // No barrier is necessary here: we are only exposing the diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index a2ab3de586..6d3df0700c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -349,8 +349,13 @@ stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new ) // Compare and Swap Succeeded: SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); len = StgMutArrPtrs_ptrs(arr); + // The write barrier. We must write a byte into the mark table: I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1; + + // Concurrent GC write barrier + updateRemembSetPushPtr(old); + return (0,new); } } @@ -462,16 +467,45 @@ stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n ) cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n) } +// Concurrent GC write barrier for pointer array copies +// +// hdr_size in bytes. dst_off in words, n in words. +stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n) +{ + W_ end, p; + ASSERT(n > 0); // Assumes n==0 is handled by caller + p = dst + hdr_size + WDS(dst_off); + end = p + WDS(n); + +again: + IF_WRITE_BARRIER_ENABLED { + ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr"); + } + p = p + WDS(1); + if (p < end) { + goto again; + } + + return (); +} + stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n) { W_ dst_p, src_p, bytes; - SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + if (n > 0) { + IF_WRITE_BARRIER_ENABLED { + call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs, + dst, dst_off, n); + } - dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); - src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); - bytes = WDS(n); - prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); + SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + + dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); + src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); + bytes = WDS(n); + prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); + } return (); } @@ -480,15 +514,22 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n { W_ dst_p, src_p, bytes; - SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + if (n > 0) { + IF_WRITE_BARRIER_ENABLED { + call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs, + dst, dst_off, n); + } - dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); - src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); - bytes = WDS(n); - if (src == dst) { - prim %memmove(dst_p, src_p, bytes, SIZEOF_W); - } else { - prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); + SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + + dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); + src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); + bytes = WDS(n); + if (src == dst) { + prim %memmove(dst_p, src_p, bytes, SIZEOF_W); + } else { + prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); + } } return (); @@ -510,6 +551,10 @@ stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new ) } else { // Compare and Swap Succeeded: SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS); + + // Concurrent GC write barrier + updateRemembSetPushPtr(old); + return (0,new); } } @@ -549,7 +594,7 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new ) return (1,h); } else { if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { - ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr"); + ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old); } return (0,new); } @@ -562,7 +607,7 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new ) } else { StgMutVar_var(mv) = new; if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { - ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr"); + ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old); } return (0,new); } @@ -629,11 +674,12 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f ) (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y); if (h != x) { goto retry; } #else + h = StgMutVar_var(mv); StgMutVar_var(mv) = y; #endif if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { - ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr"); + ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", h); } return (x,z); @@ -755,6 +801,9 @@ stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer return (0); } + // Write barrier for concurrent non-moving collector + updateRemembSetPushPtr(StgWeak_cfinalizers(w)) + StgCFinalizerList_link(c) = StgWeak_cfinalizers(w); StgWeak_cfinalizers(w) = c; @@ -835,6 +884,8 @@ stg_deRefWeakzh ( gcptr w ) if (info == stg_WEAK_info) { code = 1; val = StgWeak_value(w); + // See Note [Concurrent read barrier on deRefWeak#] in NonMovingMark.c + updateRemembSetPushPtr(val); } else { code = 0; val = w; @@ -1501,7 +1552,7 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr"); } // We want to put the heap check down here in the slow path, @@ -1547,6 +1598,9 @@ loop: // If the MVar is not already dirty, then we don't need to make // it dirty, as it is empty with nothing blocking on it. unlockClosure(mvar, info); + // However, we do need to ensure that the nonmoving collector + // knows about the reference to the value that we just removed... + updateRemembSetPushPtr(val); return (val); } qinfo = StgHeader_info(q); @@ -1560,7 +1614,7 @@ loop: // There are putMVar(s) waiting... wake up the first thread on the queue if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr"); } tso = StgMVarTSOQueue_tso(q); @@ -1629,7 +1683,7 @@ loop: // There are putMVar(s) waiting... wake up the first thread on the queue if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr"); } tso = StgMVarTSOQueue_tso(q); @@ -1667,7 +1721,7 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */ if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr"); } // We want to put the heap check down here in the slow path, @@ -1701,14 +1755,20 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */ jump stg_block_putmvar(mvar,val); } + // We are going to mutate the closure, make sure its current pointers + // are marked. + if (info == stg_MVAR_CLEAN_info) { + ccall update_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr"); + } + q = StgMVar_head(mvar); loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ + StgMVar_value(mvar) = val; if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr"); } - StgMVar_value(mvar) = val; unlockClosure(mvar, stg_MVAR_DIRTY_info); return (); } @@ -1790,7 +1850,7 @@ loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr"); } StgMVar_value(mvar) = val; @@ -1861,7 +1921,7 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ ) if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { if (info == stg_MVAR_CLEAN_info) { - ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar)); } ALLOC_PRIM_WITH_CUSTOM_FAILURE diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 807c3e3d30..50cddff051 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -515,9 +515,9 @@ blockedThrowTo (Capability *cap, StgTSO *target, MessageThrowTo *msg) ASSERT(target->cap == cap); + dirty_TSO(cap,target); // we will modify the blocked_exceptions queue msg->link = target->blocked_exceptions; target->blocked_exceptions = msg; - dirty_TSO(cap,target); // we modified the blocked_exceptions queue } /* ----------------------------------------------------------------------------- diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index ce0fa2d519..d0d08a2495 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -392,7 +392,8 @@ hs_exit_(bool wait_foreign) ioManagerDie(); #endif - /* stop all running tasks */ + /* stop all running tasks. This is also where we stop concurrent non-moving + * collection if it's running */ exitScheduler(wait_foreign); /* run C finalizers for all active weak pointers */ @@ -436,9 +437,6 @@ hs_exit_(bool wait_foreign) /* shutdown the hpc support (if needed) */ exitHpc(); - /* wait for any on-going concurrent GC to finish */ - nonmovingExit(); - // clean up things from the storage manager's point of view. // also outputs the stats (+RTS -s) info. exitStorage(); diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e34fcf03f5..e64c78fbcc 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -14,6 +14,7 @@ #include "HsFFI.h" #include "sm/Storage.h" +#include "sm/NonMovingMark.h" #include <stdbool.h> #if !defined(mingw32_HOST_OS) @@ -682,6 +683,9 @@ SymI_HasProto(stg_shrinkMutableByteArrayzh) \ SymI_HasProto(stg_resizzeMutableByteArrayzh) \ SymI_HasProto(newSpark) \ + SymI_HasProto(updateRemembSetPushThunk) \ + SymI_HasProto(updateRemembSetPushThunk_) \ + SymI_HasProto(updateRemembSetPushClosure_) \ SymI_HasProto(performGC) \ SymI_HasProto(performMajorGC) \ SymI_HasProto(prog_argc) \ @@ -1037,6 +1041,7 @@ RtsSymbolVal rtsSyms[] = { RTS_OPENBSD_ONLY_SYMBOLS RTS_LIBGCC_SYMBOLS RTS_LIBFFI_SYMBOLS + SymI_HasDataProto(nonmoving_write_barrier_enabled) #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH) // dyld stub code contains references to this, // but it should never be called because we treat @@ -182,7 +182,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) { TRACE("%p : unlock_stm()", trec); } -static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED, +static StgClosure *lock_tvar(Capability *cap STG_UNUSED, + StgTRecHeader *trec STG_UNUSED, StgTVar *s STG_UNUSED) { StgClosure *result; TRACE("%p : lock_tvar(%p)", trec, s); @@ -197,12 +198,14 @@ static void unlock_tvar(Capability *cap, StgBool force_update) { TRACE("%p : unlock_tvar(%p)", trec, s); if (force_update) { + StgClosure *old_value = s -> current_value; s -> current_value = c; - dirty_TVAR(cap,s); + dirty_TVAR(cap, s, old_value); } } -static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED, +static StgBool cond_lock_tvar(Capability *cap STG_UNUSED, + StgTRecHeader *trec STG_UNUSED, StgTVar *s STG_UNUSED, StgClosure *expected) { StgClosure *result; @@ -231,7 +234,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) { smp_locked = 0; } -static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED, +static StgClosure *lock_tvar(Capability *cap STG_UNUSED, + StgTRecHeader *trec STG_UNUSED, StgTVar *s STG_UNUSED) { StgClosure *result; TRACE("%p : lock_tvar(%p)", trec, s); @@ -248,12 +252,14 @@ static void *unlock_tvar(Capability *cap, TRACE("%p : unlock_tvar(%p, %p)", trec, s, c); ASSERT(smp_locked == trec); if (force_update) { + StgClosure *old_value = s -> current_value; s -> current_value = c; - dirty_TVAR(cap,s); + dirty_TVAR(cap, s, old_value); } } -static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED, +static StgBool cond_lock_tvar(Capability *cap STG_UNUSED, + StgTRecHeader *trec STG_UNUSED, StgTVar *s STG_UNUSED, StgClosure *expected) { StgClosure *result; @@ -279,7 +285,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) { TRACE("%p : unlock_stm()", trec); } -static StgClosure *lock_tvar(StgTRecHeader *trec, +static StgClosure *lock_tvar(Capability *cap, + StgTRecHeader *trec, StgTVar *s STG_UNUSED) { StgClosure *result; TRACE("%p : lock_tvar(%p)", trec, s); @@ -289,6 +296,10 @@ static StgClosure *lock_tvar(StgTRecHeader *trec, } while (GET_INFO(UNTAG_CLOSURE(result)) == &stg_TREC_HEADER_info); } while (cas((void *)&(s -> current_value), (StgWord)result, (StgWord)trec) != (StgWord)result); + + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) { + updateRemembSetPushClosure(cap, result); + } return result; } @@ -300,10 +311,11 @@ static void unlock_tvar(Capability *cap, TRACE("%p : unlock_tvar(%p, %p)", trec, s, c); ASSERT(s -> current_value == (StgClosure *)trec); s -> current_value = c; - dirty_TVAR(cap,s); + dirty_TVAR(cap, s, (StgClosure *) trec); } -static StgBool cond_lock_tvar(StgTRecHeader *trec, +static StgBool cond_lock_tvar(Capability *cap, + StgTRecHeader *trec, StgTVar *s, StgClosure *expected) { StgClosure *result; @@ -311,6 +323,9 @@ static StgBool cond_lock_tvar(StgTRecHeader *trec, TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected); w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec); result = (StgClosure *)w; + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) { + updateRemembSetPushClosure(cap, expected); + } TRACE("%p : %s", trec, result ? "success" : "failure"); return (result == expected); } @@ -525,7 +540,7 @@ static void build_watch_queue_entries_for_trec(Capability *cap, } s -> first_watch_queue_entry = q; e -> new_value = (StgClosure *) q; - dirty_TVAR(cap,s); // we modified first_watch_queue_entry + dirty_TVAR(cap, s, (StgClosure *) fq); // we modified first_watch_queue_entry }); } @@ -545,7 +560,7 @@ static void remove_watch_queue_entries_for_trec(Capability *cap, StgTVarWatchQueue *q; StgClosure *saw; s = e -> tvar; - saw = lock_tvar(trec, s); + saw = lock_tvar(cap, trec, s); q = (StgTVarWatchQueue *) (e -> new_value); TRACE("%p : removing tso=%p from watch queue for tvar=%p", trec, @@ -562,7 +577,7 @@ static void remove_watch_queue_entries_for_trec(Capability *cap, } else { ASSERT(s -> first_watch_queue_entry == q); s -> first_watch_queue_entry = nq; - dirty_TVAR(cap,s); // we modified first_watch_queue_entry + dirty_TVAR(cap, s, (StgClosure *) q); // we modified first_watch_queue_entry } free_stg_tvar_watch_queue(cap, q); unlock_tvar(cap, trec, s, saw, false); @@ -773,7 +788,7 @@ static StgBool validate_and_acquire_ownership (Capability *cap, s = e -> tvar; if (acquire_all || entry_is_update(e)) { TRACE("%p : trying to acquire %p", trec, s); - if (!cond_lock_tvar(trec, s, e -> expected_value)) { + if (!cond_lock_tvar(cap, trec, s, e -> expected_value)) { TRACE("%p : failed to acquire %p", trec, s); result = false; BREAK_FOR_EACH; diff --git a/rts/Schedule.c b/rts/Schedule.c index 8d7acc963e..8d82daf381 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -44,6 +44,8 @@ #include "StablePtr.h" #include "StableName.h" #include "TopHandler.h" +#include "sm/NonMoving.h" +#include "sm/NonMovingMark.h" #if defined(HAVE_SYS_TYPES_H) #include <sys/types.h> @@ -2497,7 +2499,11 @@ resumeThread (void *task_) tso = incall->suspended_tso; incall->suspended_tso = NULL; incall->suspended_cap = NULL; - tso->_link = END_TSO_QUEUE; // no write barrier reqd + // we will modify tso->_link + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + updateRemembSetPushClosure(cap, (StgClosure *)tso->_link); + } + tso->_link = END_TSO_QUEUE; traceEventRunThread(cap, tso); @@ -2671,6 +2677,8 @@ initScheduler(void) /* Initialise the mutex and condition variables used by * the scheduler. */ initMutex(&sched_mutex); + initMutex(&sync_finished_mutex); + initCondition(&sync_finished_cond); #endif ACQUIRE_LOCK(&sched_mutex); @@ -2706,6 +2714,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS) // If we haven't killed all the threads yet, do it now. if (sched_state < SCHED_SHUTTING_DOWN) { sched_state = SCHED_INTERRUPTING; + nonmovingExit(); Capability *cap = task->cap; waitForCapability(&cap,task); scheduleDoGC(&cap,task,true); diff --git a/rts/StableName.c b/rts/StableName.c index 383d87e3db..4b26fee396 100644 --- a/rts/StableName.c +++ b/rts/StableName.c @@ -263,6 +263,9 @@ threadStableNameTable( evac_fn evac, void *user ) void gcStableNameTable( void ) { + // We must take the stable name lock lest we race with the nonmoving + // collector (namely nonmovingSweepStableNameTable). + stableNameLock(); FOR_EACH_STABLE_NAME( p, { // FOR_EACH_STABLE_NAME traverses free entries too, so @@ -286,6 +289,7 @@ gcStableNameTable( void ) } } }); + stableNameUnlock(); } /* ----------------------------------------------------------------------------- diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index cccc7ad0b0..5cbb3f5595 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -334,6 +334,17 @@ threadPaused(Capability *cap, StgTSO *tso) } #endif + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled + && ip_THUNK(INFO_PTR_TO_STRUCT(bh_info)))) { + // We are about to replace a thunk with a blackhole. + // Add the free variables of the closure we are about to + // overwrite to the update remembered set. + // N.B. We caught the WHITEHOLE case above. + updateRemembSetPushThunkEager(cap, + THUNK_INFO_PTR_TO_STRUCT(bh_info), + (StgThunk *) bh); + } + // The payload of the BLACKHOLE points to the TSO ((StgInd *)bh)->indirectee = (StgClosure *)tso; write_barrier(); diff --git a/rts/Threads.c b/rts/Threads.c index 3d5b463051..2b11a1eb90 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -86,6 +86,7 @@ createThread(Capability *cap, W_ size) stack->stack_size = stack_size - sizeofW(StgStack); stack->sp = stack->stack + stack->stack_size; stack->dirty = STACK_DIRTY; + stack->marking = 0; tso = (StgTSO *)allocate(cap, sizeofW(StgTSO)); TICK_ALLOC_TSO(); @@ -611,6 +612,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso) TICK_ALLOC_STACK(chunk_size); new_stack->dirty = 0; // begin clean, we'll mark it dirty below + new_stack->marking = 0; new_stack->stack_size = chunk_size - sizeofW(StgStack); new_stack->sp = new_stack->stack + new_stack->stack_size; @@ -721,9 +723,17 @@ threadStackUnderflow (Capability *cap, StgTSO *tso) barf("threadStackUnderflow: not enough space for return values"); } - new_stack->sp -= retvals; + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + // ensure that values that we copy into the new stack are marked + // for the nonmoving collector. Note that these values won't + // necessarily form a full closure so we need to handle them + // specially. + for (unsigned int i = 0; i < retvals; i++) { + updateRemembSetPushClosure(cap, (StgClosure *) old_stack->sp[i]); + } + } - memcpy(/* dest */ new_stack->sp, + memcpy(/* dest */ new_stack->sp - retvals, /* src */ old_stack->sp, /* size */ retvals * sizeof(W_)); } @@ -735,8 +745,12 @@ threadStackUnderflow (Capability *cap, StgTSO *tso) // restore the stack parameters, and update tot_stack_size tso->tot_stack_size -= old_stack->stack_size; - // we're about to run it, better mark it dirty + // we're about to run it, better mark it dirty. + // + // N.B. the nonmoving collector may mark the stack, meaning that sp must + // point at a valid stack frame. dirty_STACK(cap, new_stack); + new_stack->sp -= retvals; return retvals; } @@ -768,7 +782,7 @@ loop: if (q == (StgMVarTSOQueue*)&stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ if (info == &stg_MVAR_CLEAN_info) { - dirty_MVAR(&cap->r, (StgClosure*)mvar); + dirty_MVAR(&cap->r, (StgClosure*)mvar, mvar->value); } mvar->value = value; diff --git a/rts/Updates.h b/rts/Updates.h index 1bd3e065af..84d9162868 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -50,6 +50,9 @@ \ prim_write_barrier; \ OVERWRITING_CLOSURE(p1); \ + IF_WRITE_BARRIER_ENABLED { \ + ccall updateRemembSetPushThunk_(BaseReg, p1 "ptr"); \ + } \ StgInd_indirectee(p1) = p2; \ prim_write_barrier; \ SET_INFO(p1, stg_BLACKHOLE_info); \ @@ -62,7 +65,7 @@ } else { \ TICK_UPD_NEW_IND(); \ and_then; \ - } + } #else /* !CMINUSMINUS */ @@ -78,6 +81,9 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, /* See Note [Heap memory barriers] in SMP.h */ write_barrier(); OVERWRITING_CLOSURE(p1); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + updateRemembSetPushThunk(cap, (StgThunk*)p1); + } ((StgInd *)p1)->indirectee = p2; write_barrier(); SET_INFO(p1, &stg_BLACKHOLE_info); diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 7aad5e4385..2c28426d75 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -139,6 +139,7 @@ library rts/Linker.h rts/Main.h rts/Messages.h + rts/NonMoving.h rts/OSThreads.h rts/Parallel.h rts/PrimFloat.h diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index f383949ebf..6bccf7f100 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -33,6 +33,18 @@ static void nonmovingBumpEpoch(void) { nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1; } +#if defined(THREADED_RTS) +/* + * This mutex ensures that only one non-moving collection is active at a time. + */ +Mutex nonmoving_collection_mutex; + +OSThreadId mark_thread; +bool concurrent_coll_running = false; +Condition concurrent_coll_finished; +Mutex concurrent_coll_finished_lock; +#endif + /* * Note [Non-moving garbage collector] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -76,13 +88,12 @@ static void nonmovingBumpEpoch(void) { memcount nonmoving_live_words = 0; +#if defined(THREADED_RTS) +static void* nonmovingConcurrentMark(void *mark_queue); +#endif static void nonmovingClearBitmap(struct NonmovingSegment *seg); static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads); -/* Signals to mutators that they should stop to synchronize with the nonmoving - * collector so it can proceed to sweep phase. */ -bool nonmoving_syncing = false; - static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size) { seg->link = NULL; @@ -283,29 +294,39 @@ static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps) void nonmovingInit(void) { if (! RtsFlags.GcFlags.useNonmoving) return; +#if defined(THREADED_RTS) + initMutex(&nonmoving_collection_mutex); + initCondition(&concurrent_coll_finished); + initMutex(&concurrent_coll_finished_lock); +#endif for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities); } + nonmovingMarkInitUpdRemSet(); } void nonmovingExit(void) { if (! RtsFlags.GcFlags.useNonmoving) return; +#if defined(THREADED_RTS) + if (mark_thread) { + debugTrace(DEBUG_nonmoving_gc, + "waiting for nonmoving collector thread to terminate"); + ACQUIRE_LOCK(&concurrent_coll_finished_lock); + waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock); + } + + closeMutex(&concurrent_coll_finished_lock); + closeCondition(&concurrent_coll_finished); + closeMutex(&nonmoving_collection_mutex); +#endif + for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { stgFree(nonmovingHeap.allocators[i]); } } /* - * Wait for any concurrent collections to finish. Called during shutdown to - * ensure we don't steal capabilities that the nonmoving collector still has yet - * to synchronize with. - */ -void nonmovingWaitUntilFinished(void) -{ -} - -/* * Assumes that no garbage collector or mutator threads are running to safely * resize the nonmoving_allocators. * @@ -443,6 +464,14 @@ static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_p void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads) { +#if defined(THREADED_RTS) + // We can't start a new collection until the old one has finished + // We also don't run in final GC + if (concurrent_coll_running || sched_state > SCHED_RUNNING) { + return; + } +#endif + resizeGenerations(); nonmovingPrepareMark(); @@ -501,9 +530,26 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads) // those lists to mark function in sequential case. In concurrent case we // allocate fresh lists. +#if defined(THREADED_RTS) + // If we're interrupting or shutting down, do not let this capability go and + // run a STW collection. Reason: we won't be able to acquire this capability + // again for the sync if we let it go, because it'll immediately start doing + // a major GC, becuase that's what we do when exiting scheduler (see + // exitScheduler()). + if (sched_state == SCHED_RUNNING) { + concurrent_coll_running = true; + nonmoving_write_barrier_enabled = true; + debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread"); + createOSThread(&mark_thread, "non-moving mark thread", + nonmovingConcurrentMark, mark_queue); + } else { + nonmovingConcurrentMark(mark_queue); + } +#else // Use the weak and thread lists from the preparation for any new weaks and // threads found to be dead in mark. nonmovingMark_(mark_queue, dead_weaks, resurrected_threads); +#endif } /* Mark mark queue, threads, and weak pointers until no more weaks have been @@ -523,13 +569,70 @@ static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue) } } +#if defined(THREADED_RTS) +static void* nonmovingConcurrentMark(void *data) +{ + MarkQueue *mark_queue = (MarkQueue*)data; + StgWeak *dead_weaks = NULL; + StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure; + nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads); + return NULL; +} + +// TODO: Not sure where to put this function. +// Append w2 to the end of w1. +static void appendWeakList( StgWeak **w1, StgWeak *w2 ) +{ + while (*w1) { + w1 = &(*w1)->link; + } + *w1 = w2; +} +#endif + static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads) { + ACQUIRE_LOCK(&nonmoving_collection_mutex); debugTrace(DEBUG_nonmoving_gc, "Starting mark..."); // Do concurrent marking; most of the heap will get marked here. nonmovingMarkThreadsWeaks(mark_queue); +#if defined(THREADED_RTS) + Task *task = newBoundTask(); + + // If at this point if we've decided to exit then just return + if (sched_state > SCHED_RUNNING) { + // Note that we break our invariants here and leave segments in + // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc. + // However because we won't be running mark-sweep in the final GC this + // is OK. + + // This is a RTS shutdown so we need to move our copy (snapshot) of + // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to + // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note + // that there may be more weaks added to oldest_gen->threads since we + // started mark, so we need to append our list to the tail of + // oldest_gen->threads. + appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list); + appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list); + // These lists won't be used again so this is not necessary, but still + nonmoving_old_weak_ptr_list = NULL; + nonmoving_weak_ptr_list = NULL; + + goto finish; + } + + // We're still running, request a sync + nonmovingBeginFlush(task); + + bool all_caps_syncd; + do { + all_caps_syncd = nonmovingWaitForFlush(); + nonmovingMarkThreadsWeaks(mark_queue); + } while (!all_caps_syncd); +#endif + nonmovingResurrectThreads(mark_queue, resurrected_threads); // No more resurrecting threads after this point @@ -555,6 +658,18 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO * debugTrace(DEBUG_nonmoving_gc, "Done marking, resurrecting threads before releasing capabilities"); + + // Schedule finalizers and resurrect threads +#if defined(THREADED_RTS) + // Just pick a random capability. Not sure if this is a good idea -- we use + // only one capability for all finalizers. + scheduleFinalizers(capabilities[0], *dead_weaks); + // Note that this mutates heap and causes running write barriers. + // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c + // for how we deal with this. + resurrectThreads(*resurrected_threads); +#endif + #if defined(DEBUG) // Zap CAFs that we will sweep nonmovingGcCafs(mark_queue); @@ -586,6 +701,12 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO * nonmoving_old_weak_ptr_list = NULL; } + // Everything has been marked; allow the mutators to proceed +#if defined(THREADED_RTS) + nonmoving_write_barrier_enabled = false; + nonmovingFinishFlush(task); +#endif + current_mark_queue = NULL; freeMarkQueue(mark_queue); stgFree(mark_queue); @@ -609,6 +730,20 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO * debugTrace(DEBUG_nonmoving_gc, "Finished sweeping."); // TODO: Remainder of things done by GarbageCollect (update stats) + +#if defined(THREADED_RTS) +finish: + boundTaskExiting(task); + + // We are done... + mark_thread = 0; + + // Signal that the concurrent collection is finished, allowing the next + // non-moving collection to proceed + concurrent_coll_running = false; + signalCondition(&concurrent_coll_finished); + RELEASE_LOCK(&nonmoving_collection_mutex); +#endif } #if defined(DEBUG) @@ -817,6 +952,31 @@ void locate_object(P_ obj) return; } } + + // Search workspaces FIXME only works in non-threaded runtime +#if !defined(THREADED_RTS) + for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) { + gen_workspace *ws = &gct->gens[g]; + for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) { + if (obj >= blk->start && obj < blk->free) { + debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g); + return; + } + } + for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) { + if (obj >= blk->start && obj < blk->free) { + debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g); + return; + } + } + for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) { + if (obj >= blk->start && obj < blk->free) { + debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g); + return; + } + } + } +#endif } void nonmovingPrintSweepList() diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h index a031f3d223..21c69b1ca1 100644 --- a/rts/sm/NonMoving.h +++ b/rts/sm/NonMoving.h @@ -95,7 +95,6 @@ extern memcount nonmoving_live_words; void nonmovingInit(void); void nonmovingExit(void); -void nonmovingWaitUntilFinished(void); // dead_weaks and resurrected_threads lists are used for two things: diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index cf1950471e..b273b09b05 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -67,6 +67,14 @@ bdescr *nonmoving_large_objects = NULL; bdescr *nonmoving_marked_large_objects = NULL; memcount n_nonmoving_large_blocks = 0; memcount n_nonmoving_marked_large_blocks = 0; +#if defined(THREADED_RTS) +/* Protects everything above. Furthermore, we only set the BF_MARKED bit of + * large object blocks when this is held. This ensures that the write barrier + * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to + * move the same large object to nonmoving_marked_large_objects more than once. + */ +static Mutex nonmoving_large_objects_mutex; +#endif /* * Where we keep our threads during collection since we must have a snapshot of @@ -87,11 +95,257 @@ StgWeak *nonmoving_weak_ptr_list = NULL; StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST; #endif +/* Note [Update remembered set] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The concurrent non-moving collector uses a remembered set to ensure + * that its marking is consistent with the snapshot invariant defined in + * the design. This remembered set, known as the update remembered set, + * records all pointers that have been overwritten since the beginning + * of the concurrent mark. This ensures that concurrent mutation cannot hide + * pointers to live objects from the nonmoving garbage collector. + * + * The update remembered set is maintained via a write barrier that + * is enabled whenever a concurrent mark is active. This write barrier + * can be found in a number of places: + * + * - In rts/Primops.cmm in primops responsible for modifying mutable closures + * (e.g. MVARs, MUT_VARs, etc.) + * + * - In rts/STM.c, where + * + * - In the dirty_* functions found in rts/Storage.c where we dirty MVARs, + * MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described + * in Note [StgStack dirtiness flags and concurrent marking] in TSO.h. + * + * - In the code generated by the STG code generator for pointer array writes + * + * There is also a read barrier to handle weak references, as described in + * Note [Concurrent read barrier on deRefWeak#]. + * + * The representation of the update remembered set is the same as that of + * the mark queue. For efficiency, each capability maintains its own local + * accumulator of remembered set entries. When a capability fills its + * accumulator it is linked in to the global remembered set + * (upd_rem_set_block_list), where it is consumed by the mark phase. + * + * The mark phase is responsible for freeing update remembered set block + * allocations. + * + * + * Note [Concurrent read barrier on deRefWeak#] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * In general the non-moving GC assumes that all pointers reachable from a + * marked object are themselves marked (or in the mark queue). However, + * weak pointers are an obvious exception to this rule. In particular, + * deRefWeakPtr# allows the mutator to turn a weak reference into a strong + * reference. This interacts badly with concurrent collection. For + * instance, consider this program: + * + * f :: a -> b -> IO b + * f k v = do + * -- assume that k and v are the only references to the + * -- closures to which they refer. + * weak <- mkWeakPtr k v Nothing + * + * -- N.B. k is now technically dead since the only reference to it is + * -- weak, but we've not yet had a chance to tombstone the WeakPtr + * -- (which will happen in the course of major GC). + * performMajorGC + * -- Now we are running concurrently with the mark... + + * Just x <- deRefWeak weak + * -- We have now introduced a reference to `v`, which will + * -- not be marked as the only reference to `v` when the snapshot was + * -- taken is via a WeakPtr. + * return x + * + */ +static Mutex upd_rem_set_lock; +bdescr *upd_rem_set_block_list = NULL; + +#if defined(THREADED_RTS) +/* Used during the mark/sweep phase transition to track how many capabilities + * have pushed their update remembered sets. Protected by upd_rem_set_lock. + */ +static volatile StgWord upd_rem_set_flush_count = 0; +#endif + + +/* Signaled by each capability when it has flushed its update remembered set */ +static Condition upd_rem_set_flushed_cond; + +/* Indicates to mutators that the write barrier must be respected. Set while + * concurrent mark is running. + */ +StgWord nonmoving_write_barrier_enabled = false; + /* Used to provide the current mark queue to the young generation * collector for scavenging. */ MarkQueue *current_mark_queue = NULL; +/* Initialise update remembered set data structures */ +void nonmovingMarkInitUpdRemSet() { + initMutex(&upd_rem_set_lock); + initCondition(&upd_rem_set_flushed_cond); +#if defined(THREADED_RTS) + initMutex(&nonmoving_large_objects_mutex); +#endif +} + +#if defined(THREADED_RTS) && defined(DEBUG) +static uint32_t markQueueLength(MarkQueue *q); +#endif +static void init_mark_queue_(MarkQueue *queue); + +/* Transfers the given capability's update-remembered set to the global + * remembered set. + * + * Really the argument type should be UpdRemSet* but this would be rather + * inconvenient without polymorphism. + */ +static void nonmovingAddUpdRemSetBlocks(MarkQueue *rset) +{ + if (markQueueIsEmpty(rset)) return; + + // find the tail of the queue + bdescr *start = rset->blocks; + bdescr *end = start; + while (end->link != NULL) + end = end->link; + + // add the blocks to the global remembered set + ACQUIRE_LOCK(&upd_rem_set_lock); + end->link = upd_rem_set_block_list; + upd_rem_set_block_list = start; + RELEASE_LOCK(&upd_rem_set_lock); + + // Reset remembered set + ACQUIRE_SM_LOCK; + init_mark_queue_(rset); + rset->is_upd_rem_set = true; + RELEASE_SM_LOCK; +} + +#if defined(THREADED_RTS) +/* Called by capabilities to flush their update remembered sets when + * synchronising with the non-moving collector as it transitions from mark to + * sweep phase. + */ +void nonmovingFlushCapUpdRemSetBlocks(Capability *cap) +{ + debugTrace(DEBUG_nonmoving_gc, + "Capability %d flushing update remembered set: %d", + cap->no, markQueueLength(&cap->upd_rem_set.queue)); + nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue); + atomic_inc(&upd_rem_set_flush_count, 1); + signalCondition(&upd_rem_set_flushed_cond); + // After this mutation will remain suspended until nonmovingFinishFlush + // releases its capabilities. +} + +/* Request that all capabilities flush their update remembered sets and suspend + * execution until the further notice. + */ +void nonmovingBeginFlush(Task *task) +{ + debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush..."); + upd_rem_set_flush_count = 0; + stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET); + + // XXX: We may have been given a capability via releaseCapability (i.e. a + // task suspended due to a foreign call) in which case our requestSync + // logic won't have been hit. Make sure that everyone so far has flushed. + // Ideally we want to mark asynchronously with syncing. + for (uint32_t i = 0; i < n_capabilities; i++) { + nonmovingFlushCapUpdRemSetBlocks(capabilities[i]); + } +} + +/* Wait until a capability has flushed its update remembered set. Returns true + * if all capabilities have flushed. + */ +bool nonmovingWaitForFlush() +{ + ACQUIRE_LOCK(&upd_rem_set_lock); + debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count); + bool finished = upd_rem_set_flush_count == n_capabilities; + if (!finished) { + waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock); + } + RELEASE_LOCK(&upd_rem_set_lock); + return finished; +} + +/* Note [Unintentional marking in resurrectThreads] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * In both moving and non-moving collectors threads found to be unreachable are + * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads + * raises an exception in the unreachable thread via raiseAsync, which does + * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the + * thread's capability. Here's an example backtrace where this happens: + * + * #0 updateRemembSetPushClosure + * #1 0x000000000072b363 in dirty_TVAR + * #2 0x00000000007162e5 in remove_watch_queue_entries_for_trec + * #3 0x0000000000717098 in stmAbortTransaction + * #4 0x000000000070c6eb in raiseAsync + * #5 0x000000000070b473 in throwToSingleThreaded__ + * #6 0x000000000070b4ab in throwToSingleThreaded + * #7 0x00000000006fce82 in resurrectThreads + * #8 0x00000000007215db in nonmovingMark_ + * #9 0x0000000000721438 in nonmovingConcurrentMark + * #10 0x00007f1ee81cd6db in start_thread + * #11 0x00007f1ee850688f in clone + * + * However we don't really want to run write barriers when calling + * resurrectThreads here, because we're in a GC pause, and overwritten values + * are definitely gone forever (as opposed to being inserted in a marked object + * or kept in registers and used later). + * + * When this happens, if we don't reset the UpdRemSets, what happens is in the + * next mark we see these objects that were added in previous mark's + * resurrectThreads in UpdRemSets, and mark those. This causes keeping + * unreachable objects alive, and effects weak finalization and thread resurrect + * (which rely on things become unreachable). As an example, stm048 fails when + * we get this wrong, because when we do raiseAsync on a thread that was blocked + * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to + * the TSO that was running the STM transaction. If the TSO becomes unreachable + * again in the next GC we don't realize this, because it was added to an + * UpdRemSet in the previous GC's mark phase, because of raiseAsync. + * + * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before + * releasing capabilities. This is somewhat inefficient (we allow adding objects + * to UpdRemSets, only to later reset them), but the only case where we add to + * UpdRemSets during mark is resurrectThreads, and I don't think we do so many + * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So + * pushing an UpdRemSet in this case is really fast, and resetting is even + * faster (we just update a pointer). + * + * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark + * loop? Would that work? Or what would break? + */ + +/* Notify capabilities that the synchronisation is finished; they may resume + * execution. + */ +void nonmovingFinishFlush(Task *task) +{ + // See Note [Unintentional marking in resurrectThreads] + for (uint32_t i = 0; i < n_capabilities; i++) { + reset_upd_rem_set(&capabilities[i]->upd_rem_set); + } + // Also reset upd_rem_set_block_list in case some of the UpdRemSets were + // filled and we flushed them. + freeChain_lock(upd_rem_set_block_list); + upd_rem_set_block_list = NULL; + + debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush..."); + releaseAllCapabilities(n_capabilities, NULL, task); +} +#endif + /********************************************************* * Pushing to either the mark queue or remembered set *********************************************************/ @@ -102,14 +356,18 @@ push (MarkQueue *q, const MarkQueueEnt *ent) // Are we at the end of the block? if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) { // Yes, this block is full. - // allocate a fresh block. - ACQUIRE_SM_LOCK; - bdescr *bd = allocGroup(1); - bd->link = q->blocks; - q->blocks = bd; - q->top = (MarkQueueBlock *) bd->start; - q->top->head = 0; - RELEASE_SM_LOCK; + if (q->is_upd_rem_set) { + nonmovingAddUpdRemSetBlocks(q); + } else { + // allocate a fresh block. + ACQUIRE_SM_LOCK; + bdescr *bd = allocGroup(1); + bd->link = q->blocks; + q->blocks = bd; + q->top = (MarkQueueBlock *) bd->start; + q->top->head = 0; + RELEASE_SM_LOCK; + } } q->top->entries[q->top->head] = *ent; @@ -183,6 +441,183 @@ void push_fun_srt (MarkQueue *q, const StgInfoTable *info) } /********************************************************* + * Pushing to the update remembered set + * + * upd_rem_set_push_* functions are directly called by + * mutators and need to check whether the value is in + * non-moving heap. + *********************************************************/ + +// Check if the object is traced by the non-moving collector. This holds in two +// conditions: +// +// - Object is in non-moving heap +// - Object is a large (BF_LARGE) and marked as BF_NONMOVING +// - Object is static (HEAP_ALLOCED_GC(obj) == false) +// +static +bool check_in_nonmoving_heap(StgClosure *p) { + if (HEAP_ALLOCED_GC(p)) { + // This works for both large and small objects: + return Bdescr((P_)p)->flags & BF_NONMOVING; + } else { + return true; // a static object + } +} + +/* Push the free variables of a (now-evaluated) thunk to the + * update remembered set. + */ +inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk) +{ + const StgInfoTable *info; + do { + info = get_volatile_itbl((StgClosure *) thunk); + } while (info->type == WHITEHOLE); + updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk); +} + +void updateRemembSetPushThunkEager(Capability *cap, + const StgThunkInfoTable *info, + StgThunk *thunk) +{ + /* N.B. info->i.type mustn't be WHITEHOLE */ + switch (info->i.type) { + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + { + MarkQueue *queue = &cap->upd_rem_set.queue; + push_thunk_srt(queue, &info->i); + + // Don't record the origin of objects living outside of the nonmoving + // heap; we can't perform the selector optimisation on them anyways. + bool record_origin = check_in_nonmoving_heap((StgClosure*)thunk); + + for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) { + if (check_in_nonmoving_heap(thunk->payload[i])) { + push_closure(queue, + thunk->payload[i], + record_origin ? &thunk->payload[i] : NULL); + } + } + break; + } + case AP: + { + MarkQueue *queue = &cap->upd_rem_set.queue; + StgAP *ap = (StgAP *) thunk; + push_closure(queue, ap->fun, &ap->fun); + mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args); + break; + } + case THUNK_SELECTOR: + case BLACKHOLE: + // TODO: This is right, right? + break; + default: + barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d", + thunk, info->i.type); + } +} + +void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p) +{ + updateRemembSetPushThunk(regTableToCapability(reg), p); +} + +inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p) +{ + if (!check_in_nonmoving_heap(p)) return; + MarkQueue *queue = &cap->upd_rem_set.queue; + push_closure(queue, p, NULL); +} + +void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p) +{ + updateRemembSetPushClosure(regTableToCapability(reg), p); +} + +STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p) +{ + // TODO: Deduplicate with mark_closure + bdescr *bd = Bdescr((StgPtr) p); + if (bd->gen != oldest_gen) { + return false; + } else if (bd->flags & BF_LARGE) { + if (! (bd->flags & BF_NONMOVING_SWEEPING)) { + return false; + } else { + return ! (bd->flags & BF_MARKED); + } + } else { + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p); + return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch; + } +} + +/* Set the mark bit; only to be called *after* we have fully marked the closure */ +STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p) +{ + bdescr *bd = Bdescr((StgPtr) p); + if (bd->flags & BF_LARGE) { + // Someone else may have already marked it. + ACQUIRE_LOCK(&nonmoving_large_objects_mutex); + if (! (bd->flags & BF_MARKED)) { + bd->flags |= BF_MARKED; + dbl_link_remove(bd, &nonmoving_large_objects); + dbl_link_onto(bd, &nonmoving_marked_large_objects); + n_nonmoving_large_blocks -= bd->blocks; + n_nonmoving_marked_large_blocks += bd->blocks; + } + RELEASE_LOCK(&nonmoving_large_objects_mutex); + } else { + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p); + nonmovingSetMark(seg, block_idx); + } +} + +void updateRemembSetPushTSO(Capability *cap, StgTSO *tso) +{ + if (needs_upd_rem_set_mark((StgClosure *) tso)) { + debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso); + mark_tso(&cap->upd_rem_set.queue, tso); + finish_upd_rem_set_mark((StgClosure *) tso); + } +} + +void updateRemembSetPushStack(Capability *cap, StgStack *stack) +{ + // N.B. caller responsible for checking nonmoving_write_barrier_enabled + if (needs_upd_rem_set_mark((StgClosure *) stack)) { + StgWord marking = stack->marking; + // See Note [StgStack dirtiness flags and concurrent marking] + if (cas(&stack->marking, marking, nonmovingMarkEpoch) + != nonmovingMarkEpoch) { + // We have claimed the right to mark the stack. + debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp); + mark_stack(&cap->upd_rem_set.queue, stack); + finish_upd_rem_set_mark((StgClosure *) stack); + return; + } else { + // The concurrent GC has claimed the right to mark the stack. + // Wait until it finishes marking before proceeding with + // mutation. + while (needs_upd_rem_set_mark((StgClosure *) stack)); +#if defined(PARALLEL_GC) + busy_wait_nop(); // TODO: Spinning here is unfortunate +#endif + return; + } + } +} + +/********************************************************* * Pushing to the mark queue *********************************************************/ @@ -192,8 +627,8 @@ void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent) } void markQueuePushClosure (MarkQueue *q, - StgClosure *p, - StgClosure **origin) + StgClosure *p, + StgClosure **origin) { push_closure(q, p, origin); } @@ -264,7 +699,7 @@ again: } /********************************************************* - * Creating and destroying MarkQueues + * Creating and destroying MarkQueues and UpdRemSets *********************************************************/ /* Must hold sm_mutex. */ @@ -281,22 +716,45 @@ void initMarkQueue (MarkQueue *queue) { init_mark_queue_(queue); queue->marked_objects = allocHashTable(); + queue->is_upd_rem_set = false; +} + +/* Must hold sm_mutex. */ +void init_upd_rem_set (UpdRemSet *rset) +{ + init_mark_queue_(&rset->queue); + // Update remembered sets don't have to worry about static objects + rset->queue.marked_objects = NULL; + rset->queue.is_upd_rem_set = true; +} + +void reset_upd_rem_set (UpdRemSet *rset) +{ + // UpdRemSets always have one block for the mark queue. This assertion is to + // update this code if we change that. + ASSERT(rset->queue.blocks->link == NULL); + rset->queue.top->head = 0; } void freeMarkQueue (MarkQueue *queue) { - bdescr* b = queue->blocks; - ACQUIRE_SM_LOCK; - while (b) - { - bdescr* b_ = b->link; - freeGroup(b); - b = b_; - } - RELEASE_SM_LOCK; + freeChain_lock(queue->blocks); freeHashTable(queue->marked_objects, NULL); } +#if defined(THREADED_RTS) && defined(DEBUG) +static uint32_t +markQueueLength (MarkQueue *q) +{ + uint32_t n = 0; + for (bdescr *block = q->blocks; block; block = block->link) { + MarkQueueBlock *queue = (MarkQueueBlock*)block->start; + n += queue->head; + } + return n; +} +#endif + /********************************************************* * Marking @@ -307,7 +765,8 @@ void freeMarkQueue (MarkQueue *queue) * barrier. Consequently it's quite important that we deeply mark * any outstanding transactions. */ -static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec) +static void +mark_trec_header (MarkQueue *queue, StgTRecHeader *trec) { while (trec != NO_TREC) { StgTRecChunk *chunk = trec->current_chunk; @@ -326,7 +785,8 @@ static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec) } } -static void mark_tso (MarkQueue *queue, StgTSO *tso) +static void +mark_tso (MarkQueue *queue, StgTSO *tso) { // TODO: Clear dirty if contains only old gen objects @@ -535,7 +995,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) p = UNTAG_CLOSURE(p); # define PUSH_FIELD(obj, field) \ - markQueuePushClosure(queue, \ + markQueuePushClosure(queue, \ (StgClosure *) (obj)->field, \ (StgClosure **) &(obj)->field) @@ -592,7 +1052,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) return; case WHITEHOLE: - while (get_itbl(p)->type == WHITEHOLE); + while (get_volatile_itbl(p)->type == WHITEHOLE); // busy_wait_nop(); // FIXME goto try_again; @@ -608,9 +1068,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) // we moved everything to the non-moving heap before starting the major // collection, we know that we don't need to trace it: it was allocated // after we took our snapshot. - +#if !defined(THREADED_RTS) // This should never happen in the non-concurrent case barf("Closure outside of non-moving heap: %p", p); +#else + return; +#endif } ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info); @@ -878,7 +1341,22 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) case STACK: { // See Note [StgStack dirtiness flags and concurrent marking] StgStack *stack = (StgStack *) p; - mark_stack(queue, stack); + StgWord marking = stack->marking; + + // N.B. stack->marking must be != nonmovingMarkEpoch unless + // someone has already marked it. + if (cas(&stack->marking, marking, nonmovingMarkEpoch) + != nonmovingMarkEpoch) { + // We have claimed the right to mark the stack. + mark_stack(queue, stack); + } else { + // A mutator has already started marking the stack; we just let it + // do its thing and move on. There's no reason to wait; we know that + // the stack will be fully marked before we sweep due to the final + // post-mark synchronization. Most importantly, we do not set its + // mark bit, the mutator is responsible for this. + return; + } break; } @@ -905,8 +1383,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) } case WHITEHOLE: - while (get_itbl(p)->type == WHITEHOLE); - // busy_wait_nop(); // FIXME + while (get_volatile_itbl(p)->type == WHITEHOLE); goto try_again; default: @@ -921,6 +1398,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) * mutator waiting for us to finish so it can start execution. */ if (bd->flags & BF_LARGE) { + /* Marking a large object isn't idempotent since we move it to + * nonmoving_marked_large_objects; to ensure that we don't repeatedly + * mark a large object, we only set BF_MARKED on large objects in the + * nonmoving heap while holding nonmoving_large_objects_mutex + */ + ACQUIRE_LOCK(&nonmoving_large_objects_mutex); if (! (bd->flags & BF_MARKED)) { // Remove the object from nonmoving_large_objects and link it to // nonmoving_marked_large_objects @@ -930,6 +1413,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) n_nonmoving_marked_large_blocks += bd->blocks; bd->flags |= BF_MARKED; } + RELEASE_LOCK(&nonmoving_large_objects_mutex); } else { // TODO: Kill repetition struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); @@ -947,7 +1431,8 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) * c. the mark queue has been seeded with a set of roots. * */ -GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue) +GNUC_ATTR_HOT void +nonmovingMark (MarkQueue *queue) { debugTrace(DEBUG_nonmoving_gc, "Starting mark pass"); unsigned int count = 0; @@ -974,9 +1459,23 @@ GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue) break; } case NULL_ENTRY: - // Nothing more to do - debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count); - return; + // Perhaps the update remembered set has more to mark... + if (upd_rem_set_block_list) { + ACQUIRE_LOCK(&upd_rem_set_lock); + bdescr *old = queue->blocks; + queue->blocks = upd_rem_set_block_list; + queue->top = (MarkQueueBlock *) queue->blocks->start; + upd_rem_set_block_list = NULL; + RELEASE_LOCK(&upd_rem_set_lock); + + ACQUIRE_SM_LOCK; + freeGroup(old); + RELEASE_SM_LOCK; + } else { + // Nothing more to do + debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count); + return; + } } } } diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h index 636f41890c..d7066e56d6 100644 --- a/rts/sm/NonMovingMark.h +++ b/rts/sm/NonMovingMark.h @@ -80,11 +80,23 @@ typedef struct MarkQueue_ { // Cached value of blocks->start. MarkQueueBlock *top; + // Is this a mark queue or a capability-local update remembered set? + bool is_upd_rem_set; + // Marked objects outside of nonmoving heap, namely large and static // objects. HashTable *marked_objects; } MarkQueue; +/* While it shares its representation with MarkQueue, UpdRemSet differs in + * behavior when pushing; namely full chunks are immediately pushed to the + * global update remembered set, not accumulated into a chain. We make this + * distinction apparent in the types. + */ +typedef struct { + MarkQueue queue; +} UpdRemSet; + // The length of MarkQueueBlock.entries #define MARK_QUEUE_BLOCK_ENTRIES ((BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt)) @@ -101,6 +113,22 @@ extern StgIndStatic *debug_caf_list_snapshot; #endif extern MarkQueue *current_mark_queue; +extern bdescr *upd_rem_set_block_list; + +void nonmovingMarkInitUpdRemSet(void); + +void init_upd_rem_set(UpdRemSet *rset); +void reset_upd_rem_set(UpdRemSet *rset); +void updateRemembSetPushThunk(Capability *cap, StgThunk *p); +void updateRemembSetPushTSO(Capability *cap, StgTSO *tso); +void updateRemembSetPushStack(Capability *cap, StgStack *stack); + +#if defined(THREADED_RTS) +void nonmovingFlushCapUpdRemSetBlocks(Capability *cap); +void nonmovingBeginFlush(Task *task); +bool nonmovingWaitForFlush(void); +void nonmovingFinishFlush(Task *task); +#endif void markQueueAddRoot(MarkQueue* q, StgClosure** root); @@ -124,6 +152,9 @@ void markQueuePushClosure_(MarkQueue *q, StgClosure *p); void markQueuePushThunkSrt(MarkQueue *q, const StgInfoTable *info); void markQueuePushFunSrt(MarkQueue *q, const StgInfoTable *info); void markQueuePushArray(MarkQueue *q, const StgMutArrPtrs *array, StgWord start_index); +void updateRemembSetPushThunkEager(Capability *cap, + const StgThunkInfoTable *orig_info, + StgThunk *thunk); INLINE_HEADER bool markQueueIsEmpty(MarkQueue *q) { diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 3e1748d5b6..0724a0059a 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -912,9 +912,11 @@ findMemoryLeak (void) for (i = 0; i < n_capabilities; i++) { markBlocks(gc_threads[i]->free_blocks); markBlocks(capabilities[i]->pinned_object_block); + markBlocks(capabilities[i]->upd_rem_set.queue.blocks); } if (RtsFlags.GcFlags.useNonmoving) { + markBlocks(upd_rem_set_block_list); markBlocks(nonmoving_large_objects); markBlocks(nonmoving_marked_large_objects); for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) { @@ -1054,7 +1056,8 @@ memInventory (bool show) uint32_t g, i; W_ gen_blocks[RtsFlags.GcFlags.generations]; W_ nursery_blocks = 0, retainer_blocks = 0, - arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0; + arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0, + upd_rem_set_blocks = 0; W_ live_blocks = 0, free_blocks = 0; bool leak; @@ -1099,12 +1102,19 @@ memInventory (bool show) /* count the blocks on the free list */ free_blocks = countFreeList(); + // count UpdRemSet blocks + for (i = 0; i < n_capabilities; ++i) { + upd_rem_set_blocks += countBlocks(capabilities[i]->upd_rem_set.queue.blocks); + } + upd_rem_set_blocks += countBlocks(upd_rem_set_block_list); + live_blocks = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { live_blocks += gen_blocks[g]; } live_blocks += nursery_blocks + - + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks; + + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks + + upd_rem_set_blocks; #define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_))) @@ -1133,6 +1143,8 @@ memInventory (bool show) gc_free_blocks, MB(gc_free_blocks)); debugBelch(" free : %5" FMT_Word " blocks (%6.1lf MB)\n", free_blocks, MB(free_blocks)); + debugBelch(" UpdRemSet : %5" FMT_Word " blocks (%6.1lf MB)\n", + upd_rem_set_blocks, MB(upd_rem_set_blocks)); debugBelch(" total : %5" FMT_Word " blocks (%6.1lf MB)\n", live_blocks + free_blocks, MB(live_blocks+free_blocks)); if (leak) { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 9fe68e98b7..0199bc58f3 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -281,6 +281,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to) } } + // Initialize NonmovingAllocators and UpdRemSets + if (RtsFlags.GcFlags.useNonmoving) { + nonmovingAddCapabilities(to); + for (i = 0; i < to; ++i) { + init_upd_rem_set(&capabilities[i]->upd_rem_set); + } + } + #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0) newThreadLocalKey(&gctKey); #endif @@ -412,6 +420,22 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf) // successfully claimed by us; overwrite with IND_STATIC #endif + // Push stuff that will become unreachable after updating to UpdRemSet to + // maintain snapshot invariant + const StgInfoTable *orig_info_tbl = INFO_PTR_TO_STRUCT(orig_info); + // OSA: Assertions to make sure my understanding of static thunks is correct + ASSERT(orig_info_tbl->type == THUNK_STATIC); + // Secondly I think static thunks can't have payload: anything that they + // reference should be in SRTs + ASSERT(orig_info_tbl->layout.payload.ptrs == 0); + // Becuase the payload is empty we just push the SRT + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(orig_info_tbl); + if (thunk_info->i.srt) { + updateRemembSetPushClosure(cap, GET_SRT(thunk_info)); + } + } + // For the benefit of revertCAFs(), save the original info pointer caf->saved_info = orig_info; @@ -1083,6 +1107,27 @@ allocatePinned (Capability *cap, W_ n) Write Barriers -------------------------------------------------------------------------- */ +/* These write barriers on heavily mutated objects serve two purposes: + * + * - Efficient maintenance of the generational invariant: Record whether or not + * we have added a particular mutable object to mut_list as they may contain + * references to younger generations. + * + * - Maintenance of the nonmoving collector's snapshot invariant: Record objects + * which are about to no longer be reachable due to mutation. + * + * In each case we record whether the object has been added to the mutable list + * by way of either the info pointer or a dedicated "dirty" flag. The GC will + * clear this flag and remove the object from mut_list (or rather, not re-add it) + * to if it finds the object contains no references into any younger generation. + * + * Note that all dirty objects will be marked as clean during preparation for a + * concurrent collection. Consequently, we can use the dirtiness flag to determine + * whether or not we need to add overwritten pointers to the update remembered + * set (since we need only write the value prior to the first update to maintain + * the snapshot invariant). + */ + /* This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY @@ -1090,25 +1135,39 @@ allocatePinned (Capability *cap, W_ n) and is put on the mutable list. */ void -dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) +dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mvar, StgClosure *old) { Capability *cap = regTableToCapability(reg); // No barrier required here as no other heap object fields are read. See // note [Heap memory barriers] in SMP.h. - if (p->header.info == &stg_MUT_VAR_CLEAN_info) { - p->header.info = &stg_MUT_VAR_DIRTY_info; - recordClosureMutated(cap,p); + if (mvar->header.info == &stg_MUT_VAR_CLEAN_info) { + mvar->header.info = &stg_MUT_VAR_DIRTY_info; + recordClosureMutated(cap, (StgClosure *) mvar); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled != 0)) { + updateRemembSetPushClosure_(reg, old); + } } } +/* + * This is the write barrier for TVARs. + * old is the pointer that we overwrote, which is required by the concurrent + * garbage collector. Note that we, while StgTVars contain multiple pointers, + * only overwrite one per dirty_TVAR call so we only need to take one old + * pointer argument. + */ void -dirty_TVAR(Capability *cap, StgTVar *p) +dirty_TVAR(Capability *cap, StgTVar *p, + StgClosure *old) { // No barrier required here as no other heap object fields are read. See // note [Heap memory barriers] in SMP.h. if (p->header.info == &stg_TVAR_CLEAN_info) { p->header.info = &stg_TVAR_DIRTY_info; recordClosureMutated(cap,(StgClosure*)p); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled != 0)) { + updateRemembSetPushClosure(cap, old); + } } } @@ -1123,6 +1182,8 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) if (tso->dirty == 0) { tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) + updateRemembSetPushClosure(cap, (StgClosure *) tso->_link); } tso->_link = target; } @@ -1133,6 +1194,8 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) if (tso->dirty == 0) { tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) + updateRemembSetPushClosure(cap, (StgClosure *) tso->block_info.prev); } tso->block_info.prev = target; } @@ -1144,15 +1207,47 @@ dirty_TSO (Capability *cap, StgTSO *tso) tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); } + + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) + updateRemembSetPushTSO(cap, tso); } void dirty_STACK (Capability *cap, StgStack *stack) { + // First push to upd_rem_set before we set stack->dirty since we + // the nonmoving collector may already be marking the stack. + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) + updateRemembSetPushStack(cap, stack); + if (! (stack->dirty & STACK_DIRTY)) { stack->dirty = STACK_DIRTY; recordClosureMutated(cap,(StgClosure*)stack); } + +} + +/* + * This is the concurrent collector's write barrier for MVARs. In the other + * write barriers above this is folded into the dirty_* functions. However, in + * the case of MVars we need to separate the acts of adding the MVar to the + * mutable list and adding its fields to the update remembered set. + * + * Specifically, the wakeup loop in stg_putMVarzh wants to freely mutate the + * pointers of the MVar but needs to keep its lock, meaning we can't yet add it + * to the mutable list lest the assertion checking for clean MVars on the + * mutable list would fail. + */ +void +update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val) +{ + Capability *cap = regTableToCapability(reg); + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { + StgMVar *mvar = (StgMVar *) p; + updateRemembSetPushClosure(cap, old_val); + updateRemembSetPushClosure(cap, (StgClosure *) mvar->head); + updateRemembSetPushClosure(cap, (StgClosure *) mvar->tail); + } } /* @@ -1164,9 +1259,11 @@ dirty_STACK (Capability *cap, StgStack *stack) such as Chaneneos and cheap-concurrency. */ void -dirty_MVAR(StgRegTable *reg, StgClosure *p) +dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val) { - recordClosureMutated(regTableToCapability(reg),p); + Capability *cap = regTableToCapability(reg); + update_MVAR(reg, p, old_val); + recordClosureMutated(cap, p); } /* ----------------------------------------------------------------------------- diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 08bdb37ba3..cdb9720650 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -47,8 +47,9 @@ extern Mutex sm_mutex; The write barrier for MVARs and TVARs -------------------------------------------------------------------------- */ -void dirty_MVAR(StgRegTable *reg, StgClosure *p); -void dirty_TVAR(Capability *cap, StgTVar *p); +void update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val); +void dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old); +void dirty_TVAR(Capability *cap, StgTVar *p, StgClosure *old); /* ----------------------------------------------------------------------------- Nursery manipulation |