summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs1
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs61
-rw-r--r--compiler/GHC/StgToCmm/Utils.hs42
-rw-r--r--compiler/cmm/CLabel.hs7
-rw-r--r--includes/Cmm.h22
-rw-r--r--includes/Rts.h1
-rw-r--r--includes/rts/NonMoving.h24
-rw-r--r--includes/rts/storage/ClosureMacros.h14
-rw-r--r--includes/rts/storage/GC.h2
-rw-r--r--includes/rts/storage/TSO.h50
-rw-r--r--includes/stg/MiscClosures.h1
-rw-r--r--rts/Apply.cmm2
-rw-r--r--rts/Capability.c34
-rw-r--r--rts/Capability.h6
-rw-r--r--rts/Exception.cmm5
-rw-r--r--rts/Messages.c10
-rw-r--r--rts/PrimOps.cmm108
-rw-r--r--rts/RaiseAsync.c2
-rw-r--r--rts/RtsStartup.c6
-rw-r--r--rts/RtsSymbols.c5
-rw-r--r--rts/STM.c41
-rw-r--r--rts/Schedule.c11
-rw-r--r--rts/StableName.c4
-rw-r--r--rts/ThreadPaused.c11
-rw-r--r--rts/Threads.c22
-rw-r--r--rts/Updates.h8
-rw-r--r--rts/rts.cabal.in1
-rw-r--r--rts/sm/NonMoving.c186
-rw-r--r--rts/sm/NonMoving.h1
-rw-r--r--rts/sm/NonMovingMark.c563
-rw-r--r--rts/sm/NonMovingMark.h31
-rw-r--r--rts/sm/Sanity.c16
-rw-r--r--rts/sm/Storage.c111
-rw-r--r--rts/sm/Storage.h5
34 files changed, 1290 insertions, 124 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 44316cacb0..9e192a0ac8 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -631,6 +631,7 @@ emitBlackHoleCode node = do
-- work with profiling.
when eager_blackholing $ do
+ whenUpdRemSetEnabled dflags $ emitUpdRemSetPushThunk node
emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
-- See Note [Heap memory barriers] in SMP.h.
emitPrimCall [] MO_WriteBarrier []
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index cdbc8d9fd9..155cdcbf80 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -42,6 +42,7 @@ import BlockId
import MkGraph
import StgSyn
import Cmm
+import Module ( rtsUnitId )
import Type ( Type, tyConAppTyCon )
import TyCon
import CLabel
@@ -339,14 +340,20 @@ dispatchPrimop dflags = \case
emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
WriteMutVarOp -> \[mutv, var] -> OpDest_AllDone $ \res@[] -> do
+ old_val <- CmmLocal <$> newTemp (cmmExprType dflags var)
+ emitAssign old_val (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
+
-- Without this write barrier, other CPUs may see this pointer before
-- the writes for the closure it points to have occurred.
+ -- Note that this also must come after we read the old value to ensure
+ -- that the read of old_val comes before another core's write to the
+ -- MutVar's value.
emitPrimCall res MO_WriteBarrier []
emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
- [(baseExpr, AddrHint), (mutv,AddrHint)]
+ [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
-- #define sizzeofByteArrayzh(r,a) \
-- r = ((StgArrBytes *)(a))->bytes
@@ -1983,17 +1990,21 @@ doWritePtrArrayOp :: CmmExpr
doWritePtrArrayOp addr idx val
= do dflags <- getDynFlags
let ty = cmmExprType dflags val
+ hdr_size = arrPtrsHdrSize dflags
+ -- Update remembered set for non-moving collector
+ whenUpdRemSetEnabled dflags
+ $ emitUpdRemSetPush (cmmLoadIndexOffExpr dflags hdr_size ty addr ty idx)
-- This write barrier is to ensure that the heap writes to the object
-- referred to by val have happened before we write val into the array.
-- See #12469 for details.
emitPrimCall [] MO_WriteBarrier []
- mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
+ mkBasicIndexedWrite hdr_size Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
- -- the write barrier. We must write a byte into the mark table:
- -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
+ -- the write barrier. We must write a byte into the mark table:
+ -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
emit $ mkStore (
cmmOffsetExpr dflags
- (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
+ (cmmOffsetExprW dflags (cmmOffsetB dflags addr hdr_size)
(loadArrPtrsSize dflags addr))
(CmmMachOp (mo_wordUShr dflags) [idx,
mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
@@ -2584,6 +2595,9 @@ emitCopyArray copy src0 src_off dst0 dst_off0 n =
dst <- assignTempE dst0
dst_off <- assignTempE dst_off0
+ -- Nonmoving collector write barrier
+ emitCopyUpdRemSetPush dflags (arrPtrsHdrSizeW dflags) dst dst_off n
+
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
@@ -2646,6 +2660,9 @@ emitCopySmallArray copy src0 src_off dst0 dst_off n =
src <- assignTempE src0
dst <- assignTempE dst0
+ -- Nonmoving collector write barrier
+ emitCopyUpdRemSetPush dflags (smallArrPtrsHdrSizeW dflags) dst dst_off n
+
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
@@ -2774,6 +2791,12 @@ doWriteSmallPtrArrayOp :: CmmExpr
doWriteSmallPtrArrayOp addr idx val = do
dflags <- getDynFlags
let ty = cmmExprType dflags val
+
+ -- Update remembered set for non-moving collector
+ tmp <- newTemp ty
+ mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing ty tmp addr ty idx
+ whenUpdRemSetEnabled dflags $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
+
emitPrimCall [] MO_WriteBarrier [] -- #12469
mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
@@ -2953,3 +2976,31 @@ emitCtzCall res x width = do
[ res ]
(MO_Ctz width)
[ x ]
+
+---------------------------------------------------------------------------
+-- Pushing to the update remembered set
+---------------------------------------------------------------------------
+
+-- | Push a range of pointer-array elements that are about to be copied over to
+-- the update remembered set.
+emitCopyUpdRemSetPush :: DynFlags
+ -> WordOff -- ^ array header size
+ -> CmmExpr -- ^ destination array
+ -> CmmExpr -- ^ offset in destination array (in words)
+ -> Int -- ^ number of elements to copy
+ -> FCode ()
+emitCopyUpdRemSetPush _dflags _hdr_size _dst _dst_off 0 = return ()
+emitCopyUpdRemSetPush dflags hdr_size dst dst_off n =
+ whenUpdRemSetEnabled dflags $ do
+ updfr_off <- getUpdFrameOff
+ graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off []
+ emit graph
+ where
+ lbl = mkLblExpr $ mkPrimCallLabel
+ $ PrimCall (fsLit "stg_copyArray_barrier") rtsUnitId
+ args =
+ [ mkIntExpr dflags hdr_size
+ , dst
+ , dst_off
+ , mkIntExpr dflags n
+ ]
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 30e37bb930..0b3a8d8b08 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -39,6 +39,11 @@ module GHC.StgToCmm.Utils (
mkWordCLit,
newStringCLit, newByteStringCLit,
blankWord,
+
+ -- * Update remembered set operations
+ whenUpdRemSetEnabled,
+ emitUpdRemSetPush,
+ emitUpdRemSetPushThunk,
) where
#include "HsVersions.h"
@@ -576,3 +581,40 @@ assignTemp' e
let reg = CmmLocal lreg
emitAssign reg e
return (CmmReg reg)
+
+
+---------------------------------------------------------------------------
+-- Pushing to the update remembered set
+---------------------------------------------------------------------------
+
+whenUpdRemSetEnabled :: DynFlags -> FCode a -> FCode ()
+whenUpdRemSetEnabled dflags code = do
+ do_it <- getCode code
+ the_if <- mkCmmIfThenElse' is_enabled do_it mkNop (Just False)
+ emit the_if
+ where
+ enabled = CmmLoad (CmmLit $ CmmLabel mkNonmovingWriteBarrierEnabledLabel) (bWord dflags)
+ zero = zeroExpr dflags
+ is_enabled = cmmNeWord dflags enabled zero
+
+-- | Emit code to add an entry to a now-overwritten pointer to the update
+-- remembered set.
+emitUpdRemSetPush :: CmmExpr -- ^ value of pointer which was overwritten
+ -> FCode ()
+emitUpdRemSetPush ptr = do
+ emitRtsCall
+ rtsUnitId
+ (fsLit "updateRemembSetPushClosure_")
+ [(CmmReg (CmmGlobal BaseReg), AddrHint),
+ (ptr, AddrHint)]
+ False
+
+emitUpdRemSetPushThunk :: CmmExpr -- ^ the thunk
+ -> FCode ()
+emitUpdRemSetPushThunk ptr = do
+ emitRtsCall
+ rtsUnitId
+ (fsLit "updateRemembSetPushThunk_")
+ [(CmmReg (CmmGlobal BaseReg), AddrHint),
+ (ptr, AddrHint)]
+ False
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 0c3dae8001..66e39f0d69 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -40,6 +40,7 @@ module CLabel (
mkAsmTempDieLabel,
mkDirty_MUT_VAR_Label,
+ mkNonmovingWriteBarrierEnabledLabel,
mkUpdInfoLabel,
mkBHUpdInfoLabel,
mkIndStaticInfoLabel,
@@ -484,7 +485,9 @@ mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-- See Note [Proc-point local block entry-point].
-- Constructing Cmm Labels
-mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
+mkDirty_MUT_VAR_Label,
+ mkNonmovingWriteBarrierEnabledLabel,
+ mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
mkMAP_DIRTY_infoLabel,
@@ -494,6 +497,8 @@ mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
+mkNonmovingWriteBarrierEnabledLabel
+ = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo
mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 21d5da310c..42aa0bf550 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -843,6 +843,10 @@
if (__gen > 0) { recordMutableCap(__p, __gen); }
/* -----------------------------------------------------------------------------
+ Update remembered set write barrier
+ -------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------------
Arrays
-------------------------------------------------------------------------- */
@@ -944,3 +948,21 @@
prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
\
return (dst);
+
+
+#if defined(THREADED_RTS)
+#define IF_WRITE_BARRIER_ENABLED \
+ if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
+#else
+// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
+#define IF_WRITE_BARRIER_ENABLED \
+ if (0)
+#define nonmoving_write_barrier_enabled 0
+#endif
+
+// A useful helper for pushing a pointer to the update remembered set.
+// See Note [Update remembered set] in NonMovingMark.c.
+#define updateRemembSetPushPtr(p) \
+ IF_WRITE_BARRIER_ENABLED { \
+ ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
+ }
diff --git a/includes/Rts.h b/includes/Rts.h
index 56642e14c5..def06de90d 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -197,6 +197,7 @@ void _assertFail(const char *filename, unsigned int linenum)
#include "rts/storage/ClosureMacros.h"
#include "rts/storage/MBlock.h"
#include "rts/storage/GC.h"
+#include "rts/NonMoving.h"
/* Other RTS external APIs */
#include "rts/Parallel.h"
diff --git a/includes/rts/NonMoving.h b/includes/rts/NonMoving.h
new file mode 100644
index 0000000000..6a6d96b2c8
--- /dev/null
+++ b/includes/rts/NonMoving.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2018-2019
+ *
+ * Non-moving garbage collector
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* This is called by the code generator */
+extern DLL_IMPORT_RTS
+void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p);
+
+void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
+
+void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
+
+extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
index a3873cc49d..2af50863d0 100644
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -107,6 +107,20 @@ INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
return CON_INFO_PTR_TO_STRUCT((c)->header.info);
}
+/* Used when we expect another thread to be mutating the info table pointer of
+ * a closure (e.g. when busy-waiting on a WHITEHOLE).
+ */
+INLINE_HEADER const StgInfoTable *get_volatile_itbl(StgClosure *c) {
+ // The volatile here is import to ensure that the compiler does not
+ // optimise away multiple loads, e.g. in a busy-wait loop. Note that
+ // we can't use VOLATILE_LOAD here as the casts result in strict aliasing
+ // rule violations and this header may be compiled outside of the RTS
+ // (where we use -fno-strict-aliasing).
+ StgInfoTable * *volatile p = (StgInfoTable * *volatile) &c->header.info;
+ return INFO_PTR_TO_STRUCT(*p);
+}
+
+
INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
{
return get_itbl(con)->srt;
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 77dbe60297..7931433019 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -234,7 +234,7 @@ void setKeepCAFs (void);
and is put on the mutable list.
-------------------------------------------------------------------------- */
-void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
+void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
/* set to disable CAF garbage collection in GHCi. */
/* (needed when dynamic libraries are used). */
diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h
index 63d2a11e8e..d56ae8ad27 100644
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -185,6 +185,53 @@ typedef struct StgTSO_ {
} *StgTSOPtr; // StgTSO defined in rts/Types.h
+/* Note [StgStack dirtiness flags and concurrent marking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Without concurrent collection by the nonmoving collector the stack dirtiness story
+ * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
+ * or not.
+ *
+ * However, things are considerably more complicated with concurrent collection
+ * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
+ * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
+ * that stacks are marked in accordance with the nonmoving collector's snapshot
+ * invariant. This is: every stack alive at the time the snapshot is taken must
+ * be marked at some point after the moment the snapshot is taken and before it
+ * is mutated or the commencement of the sweep phase.
+ *
+ * This marking may be done by the concurrent mark phase (in the case of a
+ * thread that never runs during the concurrent mark) or by the mutator when
+ * dirtying the stack. However, it is unsafe for the concurrent collector to
+ * traverse the stack while it is under mutation. Consequently, the following
+ * handshake is obeyed by the mutator's write barrier and the concurrent mark to
+ * ensure this doesn't happen:
+ *
+ * 1. The entity seeking to mark first checks that the stack lives in the nonmoving
+ * generation; if not then the stack was not alive at the time the snapshot
+ * was taken and therefore we need not mark it.
+ *
+ * 2. The entity seeking to mark checks the stack's mark bit. If it is set then
+ * no mark is necessary.
+ *
+ * 3. The entity seeking to mark tries to lock the stack for marking by
+ * atomically setting its `marking` field to the current non-moving mark
+ * epoch:
+ *
+ * a. If the mutator finds the concurrent collector has already locked the
+ * stack then it waits until it is finished (indicated by the mark bit
+ * being set) before proceeding with execution.
+ *
+ * b. If the concurrent collector finds that the mutator has locked the stack
+ * then it moves on, leaving the mutator to mark it. There is no need to wait;
+ * the mark is guaranteed to finish before sweep due to the post-mark
+ * synchronization with mutators.
+ *
+ * c. Whoever succeeds in locking the stack is responsible for marking it and
+ * setting the stack's mark bit (either the BF_MARKED bit for large objects
+ * or otherwise its bit in its segment's mark bitmap).
+ *
+ */
#define STACK_DIRTY 1
// used by sanity checker to verify that all dirty stacks are on the mutable list
@@ -193,7 +240,8 @@ typedef struct StgTSO_ {
typedef struct StgStack_ {
StgHeader header;
StgWord32 stack_size; // stack size in *words*
- StgWord32 dirty; // non-zero => dirty
+ StgWord dirty; // non-zero => dirty
+ StgWord marking; // non-zero => someone is currently marking the stack
StgPtr sp; // current stack pointer
StgWord stack[];
} StgStack;
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 217b1bc89d..f4ae2245d2 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -542,5 +542,6 @@ void * pushCostCentre (void *ccs, void *cc);
// Capability.c
extern unsigned int n_capabilities;
+extern void updateRemembSetPushThunk_(void *reg, void *p1);
#endif
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 8d7fc3c012..eeb760c5ed 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -654,6 +654,8 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
/* someone else beat us to it */
jump ENTRY_LBL(stg_WHITEHOLE) (ap);
}
+ // Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is
+ // not reachable.
StgInd_indirectee(ap) = CurrentTSO;
prim_write_barrier;
SET_INFO(ap, __stg_EAGER_BLACKHOLE_info);
diff --git a/rts/Capability.c b/rts/Capability.c
index 23e581359e..0baa4ef205 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -292,6 +292,11 @@ initCapability (Capability *cap, uint32_t i)
RtsFlags.GcFlags.generations,
"initCapability");
+
+ // At this point storage manager is not initialized yet, so this will be
+ // initialized in initStorage().
+ cap->upd_rem_set.queue.blocks = NULL;
+
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
cap->mut_lists[g] = NULL;
}
@@ -861,16 +866,27 @@ yieldCapability (Capability** pCap, Task *task, bool gcAllowed)
{
PendingSync *sync = pending_sync;
- if (sync && sync->type == SYNC_GC_PAR) {
- if (! sync->idle[cap->no]) {
- traceEventGcStart(cap);
- gcWorkerThread(cap);
- traceEventGcEnd(cap);
- traceSparkCounters(cap);
- // See Note [migrated bound threads 2]
- if (task->cap == cap) {
- return true;
+ if (sync) {
+ switch (sync->type) {
+ case SYNC_GC_PAR:
+ if (! sync->idle[cap->no]) {
+ traceEventGcStart(cap);
+ gcWorkerThread(cap);
+ traceEventGcEnd(cap);
+ traceSparkCounters(cap);
+ // See Note [migrated bound threads 2]
+ if (task->cap == cap) {
+ return true;
+ }
}
+ break;
+
+ case SYNC_FLUSH_UPD_REM_SET:
+ debugTrace(DEBUG_nonmoving_gc, "Flushing update remembered set blocks...");
+ break;
+
+ default:
+ break;
}
}
}
diff --git a/rts/Capability.h b/rts/Capability.h
index 0c41456261..e51769f887 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -85,6 +85,9 @@ struct Capability_ {
bdescr **mut_lists;
bdescr **saved_mut_lists; // tmp use during GC
+ // The update remembered set for the non-moving collector
+ UpdRemSet upd_rem_set;
+
// block for allocating pinned objects into
bdescr *pinned_object_block;
// full pinned object blocks allocated since the last GC
@@ -257,7 +260,8 @@ extern Capability **capabilities;
typedef enum {
SYNC_OTHER,
SYNC_GC_SEQ,
- SYNC_GC_PAR
+ SYNC_GC_PAR,
+ SYNC_FLUSH_UPD_REM_SET
} SyncType;
//
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index 8ea94b19f2..334d0ef823 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -318,6 +318,7 @@ stg_killThreadzh (P_ target, P_ exception)
return ();
} else {
StgTSO_why_blocked(CurrentTSO) = BlockedOnMsgThrowTo;
+ updateRemembSetPushPtr(StgTSO_block_info(CurrentTSO));
StgTSO_block_info(CurrentTSO) = msg;
// we must block, and unlock the message before returning
jump stg_block_throwto (target, exception);
@@ -489,6 +490,8 @@ retry_pop_stack:
ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
+ // No need to push `trec` to update remembered set; it will be no longer
+ // reachable after we overwrite StgTSO.trec.
StgTSO_trec(CurrentTSO) = NO_TREC;
if (r != 0) {
// Transaction was valid: continue searching for a catch frame
@@ -607,6 +610,8 @@ retry_pop_stack:
outer = StgTRecHeader_enclosing_trec(trec);
ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
+ // No need to push `trec` to update remembered set since we just freed
+ // it; it is no longer reachable.
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchSTMFrame;
}
diff --git a/rts/Messages.c b/rts/Messages.c
index d878db5eda..b16ada53e1 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -244,8 +244,8 @@ loop:
// a barrier is necessary to ensure that all writes are visible.
// See Note [Heap memory barriers] in SMP.h.
write_barrier();
+ dirty_TSO(cap, owner); // we will modify owner->bq
owner->bq = bq;
- dirty_TSO(cap, owner); // we modified owner->bq
// If the owner of the blackhole is currently runnable, then
// bump it to the front of the run queue. This gives the
@@ -262,6 +262,9 @@ loop:
// point to the BLOCKING_QUEUE from the BLACKHOLE
write_barrier(); // make the BQ visible, see Note [Heap memory barriers].
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ updateRemembSetPushClosure(cap, (StgClosure*)p);
+ }
((StgInd*)bh)->indirectee = (StgClosure *)bq;
recordClosureMutated(cap,bh); // bh was mutated
@@ -290,6 +293,11 @@ loop:
}
#endif
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ // We are about to overwrite bq->queue; make sure its current value
+ // makes it into the update remembered set
+ updateRemembSetPushClosure(cap, (StgClosure*)bq->queue);
+ }
msg->link = bq->queue;
bq->queue = msg;
// No barrier is necessary here: we are only exposing the
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index a2ab3de586..6d3df0700c 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -349,8 +349,13 @@ stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
// Compare and Swap Succeeded:
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
len = StgMutArrPtrs_ptrs(arr);
+
// The write barrier. We must write a byte into the mark table:
I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
+
+ // Concurrent GC write barrier
+ updateRemembSetPushPtr(old);
+
return (0,new);
}
}
@@ -462,16 +467,45 @@ stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
+// Concurrent GC write barrier for pointer array copies
+//
+// hdr_size in bytes. dst_off in words, n in words.
+stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n)
+{
+ W_ end, p;
+ ASSERT(n > 0); // Assumes n==0 is handled by caller
+ p = dst + hdr_size + WDS(dst_off);
+ end = p + WDS(n);
+
+again:
+ IF_WRITE_BARRIER_ENABLED {
+ ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr");
+ }
+ p = p + WDS(1);
+ if (p < end) {
+ goto again;
+ }
+
+ return ();
+}
+
stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
W_ dst_p, src_p, bytes;
- SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+ if (n > 0) {
+ IF_WRITE_BARRIER_ENABLED {
+ call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
+ dst, dst_off, n);
+ }
- dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
- src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
- bytes = WDS(n);
- prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
+ SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+
+ dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
+ src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
+ bytes = WDS(n);
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
+ }
return ();
}
@@ -480,15 +514,22 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
{
W_ dst_p, src_p, bytes;
- SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+ if (n > 0) {
+ IF_WRITE_BARRIER_ENABLED {
+ call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
+ dst, dst_off, n);
+ }
- dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
- src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
- bytes = WDS(n);
- if (src == dst) {
- prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
- } else {
- prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
+ SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+
+ dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
+ src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
+ bytes = WDS(n);
+ if (src == dst) {
+ prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
+ } else {
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
+ }
}
return ();
@@ -510,6 +551,10 @@ stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
} else {
// Compare and Swap Succeeded:
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
+
+ // Concurrent GC write barrier
+ updateRemembSetPushPtr(old);
+
return (0,new);
}
}
@@ -549,7 +594,7 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
return (1,h);
} else {
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
- ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
+ ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old);
}
return (0,new);
}
@@ -562,7 +607,7 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
} else {
StgMutVar_var(mv) = new;
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
- ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
+ ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old);
}
return (0,new);
}
@@ -629,11 +674,12 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
(h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
if (h != x) { goto retry; }
#else
+ h = StgMutVar_var(mv);
StgMutVar_var(mv) = y;
#endif
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
- ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
+ ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", h);
}
return (x,z);
@@ -755,6 +801,9 @@ stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer
return (0);
}
+ // Write barrier for concurrent non-moving collector
+ updateRemembSetPushPtr(StgWeak_cfinalizers(w))
+
StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
StgWeak_cfinalizers(w) = c;
@@ -835,6 +884,8 @@ stg_deRefWeakzh ( gcptr w )
if (info == stg_WEAK_info) {
code = 1;
val = StgWeak_value(w);
+ // See Note [Concurrent read barrier on deRefWeak#] in NonMovingMark.c
+ updateRemembSetPushPtr(val);
} else {
code = 0;
val = w;
@@ -1501,7 +1552,7 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
*/
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
// We want to put the heap check down here in the slow path,
@@ -1547,6 +1598,9 @@ loop:
// If the MVar is not already dirty, then we don't need to make
// it dirty, as it is empty with nothing blocking on it.
unlockClosure(mvar, info);
+ // However, we do need to ensure that the nonmoving collector
+ // knows about the reference to the value that we just removed...
+ updateRemembSetPushPtr(val);
return (val);
}
qinfo = StgHeader_info(q);
@@ -1560,7 +1614,7 @@ loop:
// There are putMVar(s) waiting... wake up the first thread on the queue
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr");
}
tso = StgMVarTSOQueue_tso(q);
@@ -1629,7 +1683,7 @@ loop:
// There are putMVar(s) waiting... wake up the first thread on the queue
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr");
}
tso = StgMVarTSOQueue_tso(q);
@@ -1667,7 +1721,7 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
// We want to put the heap check down here in the slow path,
@@ -1701,14 +1755,20 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
jump stg_block_putmvar(mvar,val);
}
+ // We are going to mutate the closure, make sure its current pointers
+ // are marked.
+ if (info == stg_MVAR_CLEAN_info) {
+ ccall update_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
+ }
+
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
+ StgMVar_value(mvar) = val;
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
- StgMVar_value(mvar) = val;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return ();
}
@@ -1790,7 +1850,7 @@ loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
StgMVar_value(mvar) = val;
@@ -1861,7 +1921,7 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
if (info == stg_MVAR_CLEAN_info) {
- ccall dirty_MVAR(BaseReg "ptr", mvar "ptr");
+ ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar));
}
ALLOC_PRIM_WITH_CUSTOM_FAILURE
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 807c3e3d30..50cddff051 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -515,9 +515,9 @@ blockedThrowTo (Capability *cap, StgTSO *target, MessageThrowTo *msg)
ASSERT(target->cap == cap);
+ dirty_TSO(cap,target); // we will modify the blocked_exceptions queue
msg->link = target->blocked_exceptions;
target->blocked_exceptions = msg;
- dirty_TSO(cap,target); // we modified the blocked_exceptions queue
}
/* -----------------------------------------------------------------------------
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index ce0fa2d519..d0d08a2495 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -392,7 +392,8 @@ hs_exit_(bool wait_foreign)
ioManagerDie();
#endif
- /* stop all running tasks */
+ /* stop all running tasks. This is also where we stop concurrent non-moving
+ * collection if it's running */
exitScheduler(wait_foreign);
/* run C finalizers for all active weak pointers */
@@ -436,9 +437,6 @@ hs_exit_(bool wait_foreign)
/* shutdown the hpc support (if needed) */
exitHpc();
- /* wait for any on-going concurrent GC to finish */
- nonmovingExit();
-
// clean up things from the storage manager's point of view.
// also outputs the stats (+RTS -s) info.
exitStorage();
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index e34fcf03f5..e64c78fbcc 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -14,6 +14,7 @@
#include "HsFFI.h"
#include "sm/Storage.h"
+#include "sm/NonMovingMark.h"
#include <stdbool.h>
#if !defined(mingw32_HOST_OS)
@@ -682,6 +683,9 @@
SymI_HasProto(stg_shrinkMutableByteArrayzh) \
SymI_HasProto(stg_resizzeMutableByteArrayzh) \
SymI_HasProto(newSpark) \
+ SymI_HasProto(updateRemembSetPushThunk) \
+ SymI_HasProto(updateRemembSetPushThunk_) \
+ SymI_HasProto(updateRemembSetPushClosure_) \
SymI_HasProto(performGC) \
SymI_HasProto(performMajorGC) \
SymI_HasProto(prog_argc) \
@@ -1037,6 +1041,7 @@ RtsSymbolVal rtsSyms[] = {
RTS_OPENBSD_ONLY_SYMBOLS
RTS_LIBGCC_SYMBOLS
RTS_LIBFFI_SYMBOLS
+ SymI_HasDataProto(nonmoving_write_barrier_enabled)
#if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
// dyld stub code contains references to this,
// but it should never be called because we treat
diff --git a/rts/STM.c b/rts/STM.c
index dc0b0ebb78..c17f33aaa0 100644
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -182,7 +182,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
TRACE("%p : unlock_stm()", trec);
}
-static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED,
+static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
+ StgTRecHeader *trec STG_UNUSED,
StgTVar *s STG_UNUSED) {
StgClosure *result;
TRACE("%p : lock_tvar(%p)", trec, s);
@@ -197,12 +198,14 @@ static void unlock_tvar(Capability *cap,
StgBool force_update) {
TRACE("%p : unlock_tvar(%p)", trec, s);
if (force_update) {
+ StgClosure *old_value = s -> current_value;
s -> current_value = c;
- dirty_TVAR(cap,s);
+ dirty_TVAR(cap, s, old_value);
}
}
-static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
+static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
+ StgTRecHeader *trec STG_UNUSED,
StgTVar *s STG_UNUSED,
StgClosure *expected) {
StgClosure *result;
@@ -231,7 +234,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
smp_locked = 0;
}
-static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED,
+static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
+ StgTRecHeader *trec STG_UNUSED,
StgTVar *s STG_UNUSED) {
StgClosure *result;
TRACE("%p : lock_tvar(%p)", trec, s);
@@ -248,12 +252,14 @@ static void *unlock_tvar(Capability *cap,
TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
ASSERT(smp_locked == trec);
if (force_update) {
+ StgClosure *old_value = s -> current_value;
s -> current_value = c;
- dirty_TVAR(cap,s);
+ dirty_TVAR(cap, s, old_value);
}
}
-static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
+static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
+ StgTRecHeader *trec STG_UNUSED,
StgTVar *s STG_UNUSED,
StgClosure *expected) {
StgClosure *result;
@@ -279,7 +285,8 @@ static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
TRACE("%p : unlock_stm()", trec);
}
-static StgClosure *lock_tvar(StgTRecHeader *trec,
+static StgClosure *lock_tvar(Capability *cap,
+ StgTRecHeader *trec,
StgTVar *s STG_UNUSED) {
StgClosure *result;
TRACE("%p : lock_tvar(%p)", trec, s);
@@ -289,6 +296,10 @@ static StgClosure *lock_tvar(StgTRecHeader *trec,
} while (GET_INFO(UNTAG_CLOSURE(result)) == &stg_TREC_HEADER_info);
} while (cas((void *)&(s -> current_value),
(StgWord)result, (StgWord)trec) != (StgWord)result);
+
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) {
+ updateRemembSetPushClosure(cap, result);
+ }
return result;
}
@@ -300,10 +311,11 @@ static void unlock_tvar(Capability *cap,
TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
ASSERT(s -> current_value == (StgClosure *)trec);
s -> current_value = c;
- dirty_TVAR(cap,s);
+ dirty_TVAR(cap, s, (StgClosure *) trec);
}
-static StgBool cond_lock_tvar(StgTRecHeader *trec,
+static StgBool cond_lock_tvar(Capability *cap,
+ StgTRecHeader *trec,
StgTVar *s,
StgClosure *expected) {
StgClosure *result;
@@ -311,6 +323,9 @@ static StgBool cond_lock_tvar(StgTRecHeader *trec,
TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec);
result = (StgClosure *)w;
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) {
+ updateRemembSetPushClosure(cap, expected);
+ }
TRACE("%p : %s", trec, result ? "success" : "failure");
return (result == expected);
}
@@ -525,7 +540,7 @@ static void build_watch_queue_entries_for_trec(Capability *cap,
}
s -> first_watch_queue_entry = q;
e -> new_value = (StgClosure *) q;
- dirty_TVAR(cap,s); // we modified first_watch_queue_entry
+ dirty_TVAR(cap, s, (StgClosure *) fq); // we modified first_watch_queue_entry
});
}
@@ -545,7 +560,7 @@ static void remove_watch_queue_entries_for_trec(Capability *cap,
StgTVarWatchQueue *q;
StgClosure *saw;
s = e -> tvar;
- saw = lock_tvar(trec, s);
+ saw = lock_tvar(cap, trec, s);
q = (StgTVarWatchQueue *) (e -> new_value);
TRACE("%p : removing tso=%p from watch queue for tvar=%p",
trec,
@@ -562,7 +577,7 @@ static void remove_watch_queue_entries_for_trec(Capability *cap,
} else {
ASSERT(s -> first_watch_queue_entry == q);
s -> first_watch_queue_entry = nq;
- dirty_TVAR(cap,s); // we modified first_watch_queue_entry
+ dirty_TVAR(cap, s, (StgClosure *) q); // we modified first_watch_queue_entry
}
free_stg_tvar_watch_queue(cap, q);
unlock_tvar(cap, trec, s, saw, false);
@@ -773,7 +788,7 @@ static StgBool validate_and_acquire_ownership (Capability *cap,
s = e -> tvar;
if (acquire_all || entry_is_update(e)) {
TRACE("%p : trying to acquire %p", trec, s);
- if (!cond_lock_tvar(trec, s, e -> expected_value)) {
+ if (!cond_lock_tvar(cap, trec, s, e -> expected_value)) {
TRACE("%p : failed to acquire %p", trec, s);
result = false;
BREAK_FOR_EACH;
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 8d7acc963e..8d82daf381 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -44,6 +44,8 @@
#include "StablePtr.h"
#include "StableName.h"
#include "TopHandler.h"
+#include "sm/NonMoving.h"
+#include "sm/NonMovingMark.h"
#if defined(HAVE_SYS_TYPES_H)
#include <sys/types.h>
@@ -2497,7 +2499,11 @@ resumeThread (void *task_)
tso = incall->suspended_tso;
incall->suspended_tso = NULL;
incall->suspended_cap = NULL;
- tso->_link = END_TSO_QUEUE; // no write barrier reqd
+ // we will modify tso->_link
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ updateRemembSetPushClosure(cap, (StgClosure *)tso->_link);
+ }
+ tso->_link = END_TSO_QUEUE;
traceEventRunThread(cap, tso);
@@ -2671,6 +2677,8 @@ initScheduler(void)
/* Initialise the mutex and condition variables used by
* the scheduler. */
initMutex(&sched_mutex);
+ initMutex(&sync_finished_mutex);
+ initCondition(&sync_finished_cond);
#endif
ACQUIRE_LOCK(&sched_mutex);
@@ -2706,6 +2714,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS)
// If we haven't killed all the threads yet, do it now.
if (sched_state < SCHED_SHUTTING_DOWN) {
sched_state = SCHED_INTERRUPTING;
+ nonmovingExit();
Capability *cap = task->cap;
waitForCapability(&cap,task);
scheduleDoGC(&cap,task,true);
diff --git a/rts/StableName.c b/rts/StableName.c
index 383d87e3db..4b26fee396 100644
--- a/rts/StableName.c
+++ b/rts/StableName.c
@@ -263,6 +263,9 @@ threadStableNameTable( evac_fn evac, void *user )
void
gcStableNameTable( void )
{
+ // We must take the stable name lock lest we race with the nonmoving
+ // collector (namely nonmovingSweepStableNameTable).
+ stableNameLock();
FOR_EACH_STABLE_NAME(
p, {
// FOR_EACH_STABLE_NAME traverses free entries too, so
@@ -286,6 +289,7 @@ gcStableNameTable( void )
}
}
});
+ stableNameUnlock();
}
/* -----------------------------------------------------------------------------
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index cccc7ad0b0..5cbb3f5595 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -334,6 +334,17 @@ threadPaused(Capability *cap, StgTSO *tso)
}
#endif
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled
+ && ip_THUNK(INFO_PTR_TO_STRUCT(bh_info)))) {
+ // We are about to replace a thunk with a blackhole.
+ // Add the free variables of the closure we are about to
+ // overwrite to the update remembered set.
+ // N.B. We caught the WHITEHOLE case above.
+ updateRemembSetPushThunkEager(cap,
+ THUNK_INFO_PTR_TO_STRUCT(bh_info),
+ (StgThunk *) bh);
+ }
+
// The payload of the BLACKHOLE points to the TSO
((StgInd *)bh)->indirectee = (StgClosure *)tso;
write_barrier();
diff --git a/rts/Threads.c b/rts/Threads.c
index 3d5b463051..2b11a1eb90 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -86,6 +86,7 @@ createThread(Capability *cap, W_ size)
stack->stack_size = stack_size - sizeofW(StgStack);
stack->sp = stack->stack + stack->stack_size;
stack->dirty = STACK_DIRTY;
+ stack->marking = 0;
tso = (StgTSO *)allocate(cap, sizeofW(StgTSO));
TICK_ALLOC_TSO();
@@ -611,6 +612,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
TICK_ALLOC_STACK(chunk_size);
new_stack->dirty = 0; // begin clean, we'll mark it dirty below
+ new_stack->marking = 0;
new_stack->stack_size = chunk_size - sizeofW(StgStack);
new_stack->sp = new_stack->stack + new_stack->stack_size;
@@ -721,9 +723,17 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
barf("threadStackUnderflow: not enough space for return values");
}
- new_stack->sp -= retvals;
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ // ensure that values that we copy into the new stack are marked
+ // for the nonmoving collector. Note that these values won't
+ // necessarily form a full closure so we need to handle them
+ // specially.
+ for (unsigned int i = 0; i < retvals; i++) {
+ updateRemembSetPushClosure(cap, (StgClosure *) old_stack->sp[i]);
+ }
+ }
- memcpy(/* dest */ new_stack->sp,
+ memcpy(/* dest */ new_stack->sp - retvals,
/* src */ old_stack->sp,
/* size */ retvals * sizeof(W_));
}
@@ -735,8 +745,12 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
// restore the stack parameters, and update tot_stack_size
tso->tot_stack_size -= old_stack->stack_size;
- // we're about to run it, better mark it dirty
+ // we're about to run it, better mark it dirty.
+ //
+ // N.B. the nonmoving collector may mark the stack, meaning that sp must
+ // point at a valid stack frame.
dirty_STACK(cap, new_stack);
+ new_stack->sp -= retvals;
return retvals;
}
@@ -768,7 +782,7 @@ loop:
if (q == (StgMVarTSOQueue*)&stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
if (info == &stg_MVAR_CLEAN_info) {
- dirty_MVAR(&cap->r, (StgClosure*)mvar);
+ dirty_MVAR(&cap->r, (StgClosure*)mvar, mvar->value);
}
mvar->value = value;
diff --git a/rts/Updates.h b/rts/Updates.h
index 1bd3e065af..84d9162868 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -50,6 +50,9 @@
\
prim_write_barrier; \
OVERWRITING_CLOSURE(p1); \
+ IF_WRITE_BARRIER_ENABLED { \
+ ccall updateRemembSetPushThunk_(BaseReg, p1 "ptr"); \
+ } \
StgInd_indirectee(p1) = p2; \
prim_write_barrier; \
SET_INFO(p1, stg_BLACKHOLE_info); \
@@ -62,7 +65,7 @@
} else { \
TICK_UPD_NEW_IND(); \
and_then; \
- }
+ }
#else /* !CMINUSMINUS */
@@ -78,6 +81,9 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
/* See Note [Heap memory barriers] in SMP.h */
write_barrier();
OVERWRITING_CLOSURE(p1);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ updateRemembSetPushThunk(cap, (StgThunk*)p1);
+ }
((StgInd *)p1)->indirectee = p2;
write_barrier();
SET_INFO(p1, &stg_BLACKHOLE_info);
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 7aad5e4385..2c28426d75 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -139,6 +139,7 @@ library
rts/Linker.h
rts/Main.h
rts/Messages.h
+ rts/NonMoving.h
rts/OSThreads.h
rts/Parallel.h
rts/PrimFloat.h
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index f383949ebf..6bccf7f100 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -33,6 +33,18 @@ static void nonmovingBumpEpoch(void) {
nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
}
+#if defined(THREADED_RTS)
+/*
+ * This mutex ensures that only one non-moving collection is active at a time.
+ */
+Mutex nonmoving_collection_mutex;
+
+OSThreadId mark_thread;
+bool concurrent_coll_running = false;
+Condition concurrent_coll_finished;
+Mutex concurrent_coll_finished_lock;
+#endif
+
/*
* Note [Non-moving garbage collector]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -76,13 +88,12 @@ static void nonmovingBumpEpoch(void) {
memcount nonmoving_live_words = 0;
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *mark_queue);
+#endif
static void nonmovingClearBitmap(struct NonmovingSegment *seg);
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
-/* Signals to mutators that they should stop to synchronize with the nonmoving
- * collector so it can proceed to sweep phase. */
-bool nonmoving_syncing = false;
-
static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size)
{
seg->link = NULL;
@@ -283,29 +294,39 @@ static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
void nonmovingInit(void)
{
if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_collection_mutex);
+ initCondition(&concurrent_coll_finished);
+ initMutex(&concurrent_coll_finished_lock);
+#endif
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities);
}
+ nonmovingMarkInitUpdRemSet();
}
void nonmovingExit(void)
{
if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ if (mark_thread) {
+ debugTrace(DEBUG_nonmoving_gc,
+ "waiting for nonmoving collector thread to terminate");
+ ACQUIRE_LOCK(&concurrent_coll_finished_lock);
+ waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+ }
+
+ closeMutex(&concurrent_coll_finished_lock);
+ closeCondition(&concurrent_coll_finished);
+ closeMutex(&nonmoving_collection_mutex);
+#endif
+
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
stgFree(nonmovingHeap.allocators[i]);
}
}
/*
- * Wait for any concurrent collections to finish. Called during shutdown to
- * ensure we don't steal capabilities that the nonmoving collector still has yet
- * to synchronize with.
- */
-void nonmovingWaitUntilFinished(void)
-{
-}
-
-/*
* Assumes that no garbage collector or mutator threads are running to safely
* resize the nonmoving_allocators.
*
@@ -443,6 +464,14 @@ static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_p
void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
{
+#if defined(THREADED_RTS)
+ // We can't start a new collection until the old one has finished
+ // We also don't run in final GC
+ if (concurrent_coll_running || sched_state > SCHED_RUNNING) {
+ return;
+ }
+#endif
+
resizeGenerations();
nonmovingPrepareMark();
@@ -501,9 +530,26 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
// those lists to mark function in sequential case. In concurrent case we
// allocate fresh lists.
+#if defined(THREADED_RTS)
+ // If we're interrupting or shutting down, do not let this capability go and
+ // run a STW collection. Reason: we won't be able to acquire this capability
+ // again for the sync if we let it go, because it'll immediately start doing
+ // a major GC, becuase that's what we do when exiting scheduler (see
+ // exitScheduler()).
+ if (sched_state == SCHED_RUNNING) {
+ concurrent_coll_running = true;
+ nonmoving_write_barrier_enabled = true;
+ debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
+ createOSThread(&mark_thread, "non-moving mark thread",
+ nonmovingConcurrentMark, mark_queue);
+ } else {
+ nonmovingConcurrentMark(mark_queue);
+ }
+#else
// Use the weak and thread lists from the preparation for any new weaks and
// threads found to be dead in mark.
nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
+#endif
}
/* Mark mark queue, threads, and weak pointers until no more weaks have been
@@ -523,13 +569,70 @@ static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
}
}
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *data)
+{
+ MarkQueue *mark_queue = (MarkQueue*)data;
+ StgWeak *dead_weaks = NULL;
+ StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
+ nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
+ return NULL;
+}
+
+// TODO: Not sure where to put this function.
+// Append w2 to the end of w1.
+static void appendWeakList( StgWeak **w1, StgWeak *w2 )
+{
+ while (*w1) {
+ w1 = &(*w1)->link;
+ }
+ *w1 = w2;
+}
+#endif
+
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
{
+ ACQUIRE_LOCK(&nonmoving_collection_mutex);
debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
// Do concurrent marking; most of the heap will get marked here.
nonmovingMarkThreadsWeaks(mark_queue);
+#if defined(THREADED_RTS)
+ Task *task = newBoundTask();
+
+ // If at this point if we've decided to exit then just return
+ if (sched_state > SCHED_RUNNING) {
+ // Note that we break our invariants here and leave segments in
+ // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
+ // However because we won't be running mark-sweep in the final GC this
+ // is OK.
+
+ // This is a RTS shutdown so we need to move our copy (snapshot) of
+ // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to
+ // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note
+ // that there may be more weaks added to oldest_gen->threads since we
+ // started mark, so we need to append our list to the tail of
+ // oldest_gen->threads.
+ appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list);
+ appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list);
+ // These lists won't be used again so this is not necessary, but still
+ nonmoving_old_weak_ptr_list = NULL;
+ nonmoving_weak_ptr_list = NULL;
+
+ goto finish;
+ }
+
+ // We're still running, request a sync
+ nonmovingBeginFlush(task);
+
+ bool all_caps_syncd;
+ do {
+ all_caps_syncd = nonmovingWaitForFlush();
+ nonmovingMarkThreadsWeaks(mark_queue);
+ } while (!all_caps_syncd);
+#endif
+
nonmovingResurrectThreads(mark_queue, resurrected_threads);
// No more resurrecting threads after this point
@@ -555,6 +658,18 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
debugTrace(DEBUG_nonmoving_gc,
"Done marking, resurrecting threads before releasing capabilities");
+
+ // Schedule finalizers and resurrect threads
+#if defined(THREADED_RTS)
+ // Just pick a random capability. Not sure if this is a good idea -- we use
+ // only one capability for all finalizers.
+ scheduleFinalizers(capabilities[0], *dead_weaks);
+ // Note that this mutates heap and causes running write barriers.
+ // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
+ // for how we deal with this.
+ resurrectThreads(*resurrected_threads);
+#endif
+
#if defined(DEBUG)
// Zap CAFs that we will sweep
nonmovingGcCafs(mark_queue);
@@ -586,6 +701,12 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmoving_old_weak_ptr_list = NULL;
}
+ // Everything has been marked; allow the mutators to proceed
+#if defined(THREADED_RTS)
+ nonmoving_write_barrier_enabled = false;
+ nonmovingFinishFlush(task);
+#endif
+
current_mark_queue = NULL;
freeMarkQueue(mark_queue);
stgFree(mark_queue);
@@ -609,6 +730,20 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
// TODO: Remainder of things done by GarbageCollect (update stats)
+
+#if defined(THREADED_RTS)
+finish:
+ boundTaskExiting(task);
+
+ // We are done...
+ mark_thread = 0;
+
+ // Signal that the concurrent collection is finished, allowing the next
+ // non-moving collection to proceed
+ concurrent_coll_running = false;
+ signalCondition(&concurrent_coll_finished);
+ RELEASE_LOCK(&nonmoving_collection_mutex);
+#endif
}
#if defined(DEBUG)
@@ -817,6 +952,31 @@ void locate_object(P_ obj)
return;
}
}
+
+ // Search workspaces FIXME only works in non-threaded runtime
+#if !defined(THREADED_RTS)
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) {
+ gen_workspace *ws = &gct->gens[g];
+ for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g);
+ return;
+ }
+ }
+ }
+#endif
}
void nonmovingPrintSweepList()
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index a031f3d223..21c69b1ca1 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -95,7 +95,6 @@ extern memcount nonmoving_live_words;
void nonmovingInit(void);
void nonmovingExit(void);
-void nonmovingWaitUntilFinished(void);
// dead_weaks and resurrected_threads lists are used for two things:
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index cf1950471e..b273b09b05 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -67,6 +67,14 @@ bdescr *nonmoving_large_objects = NULL;
bdescr *nonmoving_marked_large_objects = NULL;
memcount n_nonmoving_large_blocks = 0;
memcount n_nonmoving_marked_large_blocks = 0;
+#if defined(THREADED_RTS)
+/* Protects everything above. Furthermore, we only set the BF_MARKED bit of
+ * large object blocks when this is held. This ensures that the write barrier
+ * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to
+ * move the same large object to nonmoving_marked_large_objects more than once.
+ */
+static Mutex nonmoving_large_objects_mutex;
+#endif
/*
* Where we keep our threads during collection since we must have a snapshot of
@@ -87,11 +95,257 @@ StgWeak *nonmoving_weak_ptr_list = NULL;
StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
#endif
+/* Note [Update remembered set]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The concurrent non-moving collector uses a remembered set to ensure
+ * that its marking is consistent with the snapshot invariant defined in
+ * the design. This remembered set, known as the update remembered set,
+ * records all pointers that have been overwritten since the beginning
+ * of the concurrent mark. This ensures that concurrent mutation cannot hide
+ * pointers to live objects from the nonmoving garbage collector.
+ *
+ * The update remembered set is maintained via a write barrier that
+ * is enabled whenever a concurrent mark is active. This write barrier
+ * can be found in a number of places:
+ *
+ * - In rts/Primops.cmm in primops responsible for modifying mutable closures
+ * (e.g. MVARs, MUT_VARs, etc.)
+ *
+ * - In rts/STM.c, where
+ *
+ * - In the dirty_* functions found in rts/Storage.c where we dirty MVARs,
+ * MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described
+ * in Note [StgStack dirtiness flags and concurrent marking] in TSO.h.
+ *
+ * - In the code generated by the STG code generator for pointer array writes
+ *
+ * There is also a read barrier to handle weak references, as described in
+ * Note [Concurrent read barrier on deRefWeak#].
+ *
+ * The representation of the update remembered set is the same as that of
+ * the mark queue. For efficiency, each capability maintains its own local
+ * accumulator of remembered set entries. When a capability fills its
+ * accumulator it is linked in to the global remembered set
+ * (upd_rem_set_block_list), where it is consumed by the mark phase.
+ *
+ * The mark phase is responsible for freeing update remembered set block
+ * allocations.
+ *
+ *
+ * Note [Concurrent read barrier on deRefWeak#]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * In general the non-moving GC assumes that all pointers reachable from a
+ * marked object are themselves marked (or in the mark queue). However,
+ * weak pointers are an obvious exception to this rule. In particular,
+ * deRefWeakPtr# allows the mutator to turn a weak reference into a strong
+ * reference. This interacts badly with concurrent collection. For
+ * instance, consider this program:
+ *
+ * f :: a -> b -> IO b
+ * f k v = do
+ * -- assume that k and v are the only references to the
+ * -- closures to which they refer.
+ * weak <- mkWeakPtr k v Nothing
+ *
+ * -- N.B. k is now technically dead since the only reference to it is
+ * -- weak, but we've not yet had a chance to tombstone the WeakPtr
+ * -- (which will happen in the course of major GC).
+ * performMajorGC
+ * -- Now we are running concurrently with the mark...
+
+ * Just x <- deRefWeak weak
+ * -- We have now introduced a reference to `v`, which will
+ * -- not be marked as the only reference to `v` when the snapshot was
+ * -- taken is via a WeakPtr.
+ * return x
+ *
+ */
+static Mutex upd_rem_set_lock;
+bdescr *upd_rem_set_block_list = NULL;
+
+#if defined(THREADED_RTS)
+/* Used during the mark/sweep phase transition to track how many capabilities
+ * have pushed their update remembered sets. Protected by upd_rem_set_lock.
+ */
+static volatile StgWord upd_rem_set_flush_count = 0;
+#endif
+
+
+/* Signaled by each capability when it has flushed its update remembered set */
+static Condition upd_rem_set_flushed_cond;
+
+/* Indicates to mutators that the write barrier must be respected. Set while
+ * concurrent mark is running.
+ */
+StgWord nonmoving_write_barrier_enabled = false;
+
/* Used to provide the current mark queue to the young generation
* collector for scavenging.
*/
MarkQueue *current_mark_queue = NULL;
+/* Initialise update remembered set data structures */
+void nonmovingMarkInitUpdRemSet() {
+ initMutex(&upd_rem_set_lock);
+ initCondition(&upd_rem_set_flushed_cond);
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_large_objects_mutex);
+#endif
+}
+
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t markQueueLength(MarkQueue *q);
+#endif
+static void init_mark_queue_(MarkQueue *queue);
+
+/* Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ */
+static void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+{
+ if (markQueueIsEmpty(rset)) return;
+
+ // find the tail of the queue
+ bdescr *start = rset->blocks;
+ bdescr *end = start;
+ while (end->link != NULL)
+ end = end->link;
+
+ // add the blocks to the global remembered set
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ end->link = upd_rem_set_block_list;
+ upd_rem_set_block_list = start;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ // Reset remembered set
+ ACQUIRE_SM_LOCK;
+ init_mark_queue_(rset);
+ rset->is_upd_rem_set = true;
+ RELEASE_SM_LOCK;
+}
+
+#if defined(THREADED_RTS)
+/* Called by capabilities to flush their update remembered sets when
+ * synchronising with the non-moving collector as it transitions from mark to
+ * sweep phase.
+ */
+void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
+{
+ debugTrace(DEBUG_nonmoving_gc,
+ "Capability %d flushing update remembered set: %d",
+ cap->no, markQueueLength(&cap->upd_rem_set.queue));
+ nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
+ atomic_inc(&upd_rem_set_flush_count, 1);
+ signalCondition(&upd_rem_set_flushed_cond);
+ // After this mutation will remain suspended until nonmovingFinishFlush
+ // releases its capabilities.
+}
+
+/* Request that all capabilities flush their update remembered sets and suspend
+ * execution until the further notice.
+ */
+void nonmovingBeginFlush(Task *task)
+{
+ debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
+ upd_rem_set_flush_count = 0;
+ stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
+
+ // XXX: We may have been given a capability via releaseCapability (i.e. a
+ // task suspended due to a foreign call) in which case our requestSync
+ // logic won't have been hit. Make sure that everyone so far has flushed.
+ // Ideally we want to mark asynchronously with syncing.
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ nonmovingFlushCapUpdRemSetBlocks(capabilities[i]);
+ }
+}
+
+/* Wait until a capability has flushed its update remembered set. Returns true
+ * if all capabilities have flushed.
+ */
+bool nonmovingWaitForFlush()
+{
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count);
+ bool finished = upd_rem_set_flush_count == n_capabilities;
+ if (!finished) {
+ waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock);
+ }
+ RELEASE_LOCK(&upd_rem_set_lock);
+ return finished;
+}
+
+/* Note [Unintentional marking in resurrectThreads]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In both moving and non-moving collectors threads found to be unreachable are
+ * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads
+ * raises an exception in the unreachable thread via raiseAsync, which does
+ * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the
+ * thread's capability. Here's an example backtrace where this happens:
+ *
+ * #0 updateRemembSetPushClosure
+ * #1 0x000000000072b363 in dirty_TVAR
+ * #2 0x00000000007162e5 in remove_watch_queue_entries_for_trec
+ * #3 0x0000000000717098 in stmAbortTransaction
+ * #4 0x000000000070c6eb in raiseAsync
+ * #5 0x000000000070b473 in throwToSingleThreaded__
+ * #6 0x000000000070b4ab in throwToSingleThreaded
+ * #7 0x00000000006fce82 in resurrectThreads
+ * #8 0x00000000007215db in nonmovingMark_
+ * #9 0x0000000000721438 in nonmovingConcurrentMark
+ * #10 0x00007f1ee81cd6db in start_thread
+ * #11 0x00007f1ee850688f in clone
+ *
+ * However we don't really want to run write barriers when calling
+ * resurrectThreads here, because we're in a GC pause, and overwritten values
+ * are definitely gone forever (as opposed to being inserted in a marked object
+ * or kept in registers and used later).
+ *
+ * When this happens, if we don't reset the UpdRemSets, what happens is in the
+ * next mark we see these objects that were added in previous mark's
+ * resurrectThreads in UpdRemSets, and mark those. This causes keeping
+ * unreachable objects alive, and effects weak finalization and thread resurrect
+ * (which rely on things become unreachable). As an example, stm048 fails when
+ * we get this wrong, because when we do raiseAsync on a thread that was blocked
+ * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to
+ * the TSO that was running the STM transaction. If the TSO becomes unreachable
+ * again in the next GC we don't realize this, because it was added to an
+ * UpdRemSet in the previous GC's mark phase, because of raiseAsync.
+ *
+ * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before
+ * releasing capabilities. This is somewhat inefficient (we allow adding objects
+ * to UpdRemSets, only to later reset them), but the only case where we add to
+ * UpdRemSets during mark is resurrectThreads, and I don't think we do so many
+ * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So
+ * pushing an UpdRemSet in this case is really fast, and resetting is even
+ * faster (we just update a pointer).
+ *
+ * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark
+ * loop? Would that work? Or what would break?
+ */
+
+/* Notify capabilities that the synchronisation is finished; they may resume
+ * execution.
+ */
+void nonmovingFinishFlush(Task *task)
+{
+ // See Note [Unintentional marking in resurrectThreads]
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ reset_upd_rem_set(&capabilities[i]->upd_rem_set);
+ }
+ // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
+ // filled and we flushed them.
+ freeChain_lock(upd_rem_set_block_list);
+ upd_rem_set_block_list = NULL;
+
+ debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
+ releaseAllCapabilities(n_capabilities, NULL, task);
+}
+#endif
+
/*********************************************************
* Pushing to either the mark queue or remembered set
*********************************************************/
@@ -102,14 +356,18 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
// Are we at the end of the block?
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
- // allocate a fresh block.
- ACQUIRE_SM_LOCK;
- bdescr *bd = allocGroup(1);
- bd->link = q->blocks;
- q->blocks = bd;
- q->top = (MarkQueueBlock *) bd->start;
- q->top->head = 0;
- RELEASE_SM_LOCK;
+ if (q->is_upd_rem_set) {
+ nonmovingAddUpdRemSetBlocks(q);
+ } else {
+ // allocate a fresh block.
+ ACQUIRE_SM_LOCK;
+ bdescr *bd = allocGroup(1);
+ bd->link = q->blocks;
+ q->blocks = bd;
+ q->top = (MarkQueueBlock *) bd->start;
+ q->top->head = 0;
+ RELEASE_SM_LOCK;
+ }
}
q->top->entries[q->top->head] = *ent;
@@ -183,6 +441,183 @@ void push_fun_srt (MarkQueue *q, const StgInfoTable *info)
}
/*********************************************************
+ * Pushing to the update remembered set
+ *
+ * upd_rem_set_push_* functions are directly called by
+ * mutators and need to check whether the value is in
+ * non-moving heap.
+ *********************************************************/
+
+// Check if the object is traced by the non-moving collector. This holds in two
+// conditions:
+//
+// - Object is in non-moving heap
+// - Object is a large (BF_LARGE) and marked as BF_NONMOVING
+// - Object is static (HEAP_ALLOCED_GC(obj) == false)
+//
+static
+bool check_in_nonmoving_heap(StgClosure *p) {
+ if (HEAP_ALLOCED_GC(p)) {
+ // This works for both large and small objects:
+ return Bdescr((P_)p)->flags & BF_NONMOVING;
+ } else {
+ return true; // a static object
+ }
+}
+
+/* Push the free variables of a (now-evaluated) thunk to the
+ * update remembered set.
+ */
+inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
+{
+ const StgInfoTable *info;
+ do {
+ info = get_volatile_itbl((StgClosure *) thunk);
+ } while (info->type == WHITEHOLE);
+ updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk);
+}
+
+void updateRemembSetPushThunkEager(Capability *cap,
+ const StgThunkInfoTable *info,
+ StgThunk *thunk)
+{
+ /* N.B. info->i.type mustn't be WHITEHOLE */
+ switch (info->i.type) {
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_thunk_srt(queue, &info->i);
+
+ // Don't record the origin of objects living outside of the nonmoving
+ // heap; we can't perform the selector optimisation on them anyways.
+ bool record_origin = check_in_nonmoving_heap((StgClosure*)thunk);
+
+ for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
+ if (check_in_nonmoving_heap(thunk->payload[i])) {
+ push_closure(queue,
+ thunk->payload[i],
+ record_origin ? &thunk->payload[i] : NULL);
+ }
+ }
+ break;
+ }
+ case AP:
+ {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ StgAP *ap = (StgAP *) thunk;
+ push_closure(queue, ap->fun, &ap->fun);
+ mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
+ break;
+ }
+ case THUNK_SELECTOR:
+ case BLACKHOLE:
+ // TODO: This is right, right?
+ break;
+ default:
+ barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
+ thunk, info->i.type);
+ }
+}
+
+void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
+{
+ updateRemembSetPushThunk(regTableToCapability(reg), p);
+}
+
+inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
+{
+ if (!check_in_nonmoving_heap(p)) return;
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_closure(queue, p, NULL);
+}
+
+void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p)
+{
+ updateRemembSetPushClosure(regTableToCapability(reg), p);
+}
+
+STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
+{
+ // TODO: Deduplicate with mark_closure
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->gen != oldest_gen) {
+ return false;
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
+ return false;
+ } else {
+ return ! (bd->flags & BF_MARKED);
+ }
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch;
+ }
+}
+
+/* Set the mark bit; only to be called *after* we have fully marked the closure */
+STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p)
+{
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->flags & BF_LARGE) {
+ // Someone else may have already marked it.
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
+ if (! (bd->flags & BF_MARKED)) {
+ bd->flags |= BF_MARKED;
+ dbl_link_remove(bd, &nonmoving_large_objects);
+ dbl_link_onto(bd, &nonmoving_marked_large_objects);
+ n_nonmoving_large_blocks -= bd->blocks;
+ n_nonmoving_marked_large_blocks += bd->blocks;
+ }
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ nonmovingSetMark(seg, block_idx);
+ }
+}
+
+void updateRemembSetPushTSO(Capability *cap, StgTSO *tso)
+{
+ if (needs_upd_rem_set_mark((StgClosure *) tso)) {
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso);
+ mark_tso(&cap->upd_rem_set.queue, tso);
+ finish_upd_rem_set_mark((StgClosure *) tso);
+ }
+}
+
+void updateRemembSetPushStack(Capability *cap, StgStack *stack)
+{
+ // N.B. caller responsible for checking nonmoving_write_barrier_enabled
+ if (needs_upd_rem_set_mark((StgClosure *) stack)) {
+ StgWord marking = stack->marking;
+ // See Note [StgStack dirtiness flags and concurrent marking]
+ if (cas(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
+ mark_stack(&cap->upd_rem_set.queue, stack);
+ finish_upd_rem_set_mark((StgClosure *) stack);
+ return;
+ } else {
+ // The concurrent GC has claimed the right to mark the stack.
+ // Wait until it finishes marking before proceeding with
+ // mutation.
+ while (needs_upd_rem_set_mark((StgClosure *) stack));
+#if defined(PARALLEL_GC)
+ busy_wait_nop(); // TODO: Spinning here is unfortunate
+#endif
+ return;
+ }
+ }
+}
+
+/*********************************************************
* Pushing to the mark queue
*********************************************************/
@@ -192,8 +627,8 @@ void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent)
}
void markQueuePushClosure (MarkQueue *q,
- StgClosure *p,
- StgClosure **origin)
+ StgClosure *p,
+ StgClosure **origin)
{
push_closure(q, p, origin);
}
@@ -264,7 +699,7 @@ again:
}
/*********************************************************
- * Creating and destroying MarkQueues
+ * Creating and destroying MarkQueues and UpdRemSets
*********************************************************/
/* Must hold sm_mutex. */
@@ -281,22 +716,45 @@ void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
queue->marked_objects = allocHashTable();
+ queue->is_upd_rem_set = false;
+}
+
+/* Must hold sm_mutex. */
+void init_upd_rem_set (UpdRemSet *rset)
+{
+ init_mark_queue_(&rset->queue);
+ // Update remembered sets don't have to worry about static objects
+ rset->queue.marked_objects = NULL;
+ rset->queue.is_upd_rem_set = true;
+}
+
+void reset_upd_rem_set (UpdRemSet *rset)
+{
+ // UpdRemSets always have one block for the mark queue. This assertion is to
+ // update this code if we change that.
+ ASSERT(rset->queue.blocks->link == NULL);
+ rset->queue.top->head = 0;
}
void freeMarkQueue (MarkQueue *queue)
{
- bdescr* b = queue->blocks;
- ACQUIRE_SM_LOCK;
- while (b)
- {
- bdescr* b_ = b->link;
- freeGroup(b);
- b = b_;
- }
- RELEASE_SM_LOCK;
+ freeChain_lock(queue->blocks);
freeHashTable(queue->marked_objects, NULL);
}
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t
+markQueueLength (MarkQueue *q)
+{
+ uint32_t n = 0;
+ for (bdescr *block = q->blocks; block; block = block->link) {
+ MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
+ n += queue->head;
+ }
+ return n;
+}
+#endif
+
/*********************************************************
* Marking
@@ -307,7 +765,8 @@ void freeMarkQueue (MarkQueue *queue)
* barrier. Consequently it's quite important that we deeply mark
* any outstanding transactions.
*/
-static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
+static void
+mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
{
while (trec != NO_TREC) {
StgTRecChunk *chunk = trec->current_chunk;
@@ -326,7 +785,8 @@ static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
}
}
-static void mark_tso (MarkQueue *queue, StgTSO *tso)
+static void
+mark_tso (MarkQueue *queue, StgTSO *tso)
{
// TODO: Clear dirty if contains only old gen objects
@@ -535,7 +995,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
p = UNTAG_CLOSURE(p);
# define PUSH_FIELD(obj, field) \
- markQueuePushClosure(queue, \
+ markQueuePushClosure(queue, \
(StgClosure *) (obj)->field, \
(StgClosure **) &(obj)->field)
@@ -592,7 +1052,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
return;
case WHITEHOLE:
- while (get_itbl(p)->type == WHITEHOLE);
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
// busy_wait_nop(); // FIXME
goto try_again;
@@ -608,9 +1068,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
// we moved everything to the non-moving heap before starting the major
// collection, we know that we don't need to trace it: it was allocated
// after we took our snapshot.
-
+#if !defined(THREADED_RTS)
// This should never happen in the non-concurrent case
barf("Closure outside of non-moving heap: %p", p);
+#else
+ return;
+#endif
}
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
@@ -878,7 +1341,22 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
case STACK: {
// See Note [StgStack dirtiness flags and concurrent marking]
StgStack *stack = (StgStack *) p;
- mark_stack(queue, stack);
+ StgWord marking = stack->marking;
+
+ // N.B. stack->marking must be != nonmovingMarkEpoch unless
+ // someone has already marked it.
+ if (cas(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ mark_stack(queue, stack);
+ } else {
+ // A mutator has already started marking the stack; we just let it
+ // do its thing and move on. There's no reason to wait; we know that
+ // the stack will be fully marked before we sweep due to the final
+ // post-mark synchronization. Most importantly, we do not set its
+ // mark bit, the mutator is responsible for this.
+ return;
+ }
break;
}
@@ -905,8 +1383,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
case WHITEHOLE:
- while (get_itbl(p)->type == WHITEHOLE);
- // busy_wait_nop(); // FIXME
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
goto try_again;
default:
@@ -921,6 +1398,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* mutator waiting for us to finish so it can start execution.
*/
if (bd->flags & BF_LARGE) {
+ /* Marking a large object isn't idempotent since we move it to
+ * nonmoving_marked_large_objects; to ensure that we don't repeatedly
+ * mark a large object, we only set BF_MARKED on large objects in the
+ * nonmoving heap while holding nonmoving_large_objects_mutex
+ */
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
if (! (bd->flags & BF_MARKED)) {
// Remove the object from nonmoving_large_objects and link it to
// nonmoving_marked_large_objects
@@ -930,6 +1413,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
n_nonmoving_marked_large_blocks += bd->blocks;
bd->flags |= BF_MARKED;
}
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
} else {
// TODO: Kill repetition
struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
@@ -947,7 +1431,8 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* c. the mark queue has been seeded with a set of roots.
*
*/
-GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue)
+GNUC_ATTR_HOT void
+nonmovingMark (MarkQueue *queue)
{
debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
unsigned int count = 0;
@@ -974,9 +1459,23 @@ GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue)
break;
}
case NULL_ENTRY:
- // Nothing more to do
- debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
- return;
+ // Perhaps the update remembered set has more to mark...
+ if (upd_rem_set_block_list) {
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ bdescr *old = queue->blocks;
+ queue->blocks = upd_rem_set_block_list;
+ queue->top = (MarkQueueBlock *) queue->blocks->start;
+ upd_rem_set_block_list = NULL;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ ACQUIRE_SM_LOCK;
+ freeGroup(old);
+ RELEASE_SM_LOCK;
+ } else {
+ // Nothing more to do
+ debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
+ return;
+ }
}
}
}
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index 636f41890c..d7066e56d6 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -80,11 +80,23 @@ typedef struct MarkQueue_ {
// Cached value of blocks->start.
MarkQueueBlock *top;
+ // Is this a mark queue or a capability-local update remembered set?
+ bool is_upd_rem_set;
+
// Marked objects outside of nonmoving heap, namely large and static
// objects.
HashTable *marked_objects;
} MarkQueue;
+/* While it shares its representation with MarkQueue, UpdRemSet differs in
+ * behavior when pushing; namely full chunks are immediately pushed to the
+ * global update remembered set, not accumulated into a chain. We make this
+ * distinction apparent in the types.
+ */
+typedef struct {
+ MarkQueue queue;
+} UpdRemSet;
+
// The length of MarkQueueBlock.entries
#define MARK_QUEUE_BLOCK_ENTRIES ((BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
@@ -101,6 +113,22 @@ extern StgIndStatic *debug_caf_list_snapshot;
#endif
extern MarkQueue *current_mark_queue;
+extern bdescr *upd_rem_set_block_list;
+
+void nonmovingMarkInitUpdRemSet(void);
+
+void init_upd_rem_set(UpdRemSet *rset);
+void reset_upd_rem_set(UpdRemSet *rset);
+void updateRemembSetPushThunk(Capability *cap, StgThunk *p);
+void updateRemembSetPushTSO(Capability *cap, StgTSO *tso);
+void updateRemembSetPushStack(Capability *cap, StgStack *stack);
+
+#if defined(THREADED_RTS)
+void nonmovingFlushCapUpdRemSetBlocks(Capability *cap);
+void nonmovingBeginFlush(Task *task);
+bool nonmovingWaitForFlush(void);
+void nonmovingFinishFlush(Task *task);
+#endif
void markQueueAddRoot(MarkQueue* q, StgClosure** root);
@@ -124,6 +152,9 @@ void markQueuePushClosure_(MarkQueue *q, StgClosure *p);
void markQueuePushThunkSrt(MarkQueue *q, const StgInfoTable *info);
void markQueuePushFunSrt(MarkQueue *q, const StgInfoTable *info);
void markQueuePushArray(MarkQueue *q, const StgMutArrPtrs *array, StgWord start_index);
+void updateRemembSetPushThunkEager(Capability *cap,
+ const StgThunkInfoTable *orig_info,
+ StgThunk *thunk);
INLINE_HEADER bool markQueueIsEmpty(MarkQueue *q)
{
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 3e1748d5b6..0724a0059a 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -912,9 +912,11 @@ findMemoryLeak (void)
for (i = 0; i < n_capabilities; i++) {
markBlocks(gc_threads[i]->free_blocks);
markBlocks(capabilities[i]->pinned_object_block);
+ markBlocks(capabilities[i]->upd_rem_set.queue.blocks);
}
if (RtsFlags.GcFlags.useNonmoving) {
+ markBlocks(upd_rem_set_block_list);
markBlocks(nonmoving_large_objects);
markBlocks(nonmoving_marked_large_objects);
for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
@@ -1054,7 +1056,8 @@ memInventory (bool show)
uint32_t g, i;
W_ gen_blocks[RtsFlags.GcFlags.generations];
W_ nursery_blocks = 0, retainer_blocks = 0,
- arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0;
+ arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0,
+ upd_rem_set_blocks = 0;
W_ live_blocks = 0, free_blocks = 0;
bool leak;
@@ -1099,12 +1102,19 @@ memInventory (bool show)
/* count the blocks on the free list */
free_blocks = countFreeList();
+ // count UpdRemSet blocks
+ for (i = 0; i < n_capabilities; ++i) {
+ upd_rem_set_blocks += countBlocks(capabilities[i]->upd_rem_set.queue.blocks);
+ }
+ upd_rem_set_blocks += countBlocks(upd_rem_set_block_list);
+
live_blocks = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
live_blocks += gen_blocks[g];
}
live_blocks += nursery_blocks +
- + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks;
+ + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks
+ + upd_rem_set_blocks;
#define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
@@ -1133,6 +1143,8 @@ memInventory (bool show)
gc_free_blocks, MB(gc_free_blocks));
debugBelch(" free : %5" FMT_Word " blocks (%6.1lf MB)\n",
free_blocks, MB(free_blocks));
+ debugBelch(" UpdRemSet : %5" FMT_Word " blocks (%6.1lf MB)\n",
+ upd_rem_set_blocks, MB(upd_rem_set_blocks));
debugBelch(" total : %5" FMT_Word " blocks (%6.1lf MB)\n",
live_blocks + free_blocks, MB(live_blocks+free_blocks));
if (leak) {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 9fe68e98b7..0199bc58f3 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -281,6 +281,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
}
}
+ // Initialize NonmovingAllocators and UpdRemSets
+ if (RtsFlags.GcFlags.useNonmoving) {
+ nonmovingAddCapabilities(to);
+ for (i = 0; i < to; ++i) {
+ init_upd_rem_set(&capabilities[i]->upd_rem_set);
+ }
+ }
+
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
newThreadLocalKey(&gctKey);
#endif
@@ -412,6 +420,22 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// successfully claimed by us; overwrite with IND_STATIC
#endif
+ // Push stuff that will become unreachable after updating to UpdRemSet to
+ // maintain snapshot invariant
+ const StgInfoTable *orig_info_tbl = INFO_PTR_TO_STRUCT(orig_info);
+ // OSA: Assertions to make sure my understanding of static thunks is correct
+ ASSERT(orig_info_tbl->type == THUNK_STATIC);
+ // Secondly I think static thunks can't have payload: anything that they
+ // reference should be in SRTs
+ ASSERT(orig_info_tbl->layout.payload.ptrs == 0);
+ // Becuase the payload is empty we just push the SRT
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(orig_info_tbl);
+ if (thunk_info->i.srt) {
+ updateRemembSetPushClosure(cap, GET_SRT(thunk_info));
+ }
+ }
+
// For the benefit of revertCAFs(), save the original info pointer
caf->saved_info = orig_info;
@@ -1083,6 +1107,27 @@ allocatePinned (Capability *cap, W_ n)
Write Barriers
-------------------------------------------------------------------------- */
+/* These write barriers on heavily mutated objects serve two purposes:
+ *
+ * - Efficient maintenance of the generational invariant: Record whether or not
+ * we have added a particular mutable object to mut_list as they may contain
+ * references to younger generations.
+ *
+ * - Maintenance of the nonmoving collector's snapshot invariant: Record objects
+ * which are about to no longer be reachable due to mutation.
+ *
+ * In each case we record whether the object has been added to the mutable list
+ * by way of either the info pointer or a dedicated "dirty" flag. The GC will
+ * clear this flag and remove the object from mut_list (or rather, not re-add it)
+ * to if it finds the object contains no references into any younger generation.
+ *
+ * Note that all dirty objects will be marked as clean during preparation for a
+ * concurrent collection. Consequently, we can use the dirtiness flag to determine
+ * whether or not we need to add overwritten pointers to the update remembered
+ * set (since we need only write the value prior to the first update to maintain
+ * the snapshot invariant).
+ */
+
/*
This is the write barrier for MUT_VARs, a.k.a. IORefs. A
MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
@@ -1090,25 +1135,39 @@ allocatePinned (Capability *cap, W_ n)
and is put on the mutable list.
*/
void
-dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
+dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mvar, StgClosure *old)
{
Capability *cap = regTableToCapability(reg);
// No barrier required here as no other heap object fields are read. See
// note [Heap memory barriers] in SMP.h.
- if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
- p->header.info = &stg_MUT_VAR_DIRTY_info;
- recordClosureMutated(cap,p);
+ if (mvar->header.info == &stg_MUT_VAR_CLEAN_info) {
+ mvar->header.info = &stg_MUT_VAR_DIRTY_info;
+ recordClosureMutated(cap, (StgClosure *) mvar);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled != 0)) {
+ updateRemembSetPushClosure_(reg, old);
+ }
}
}
+/*
+ * This is the write barrier for TVARs.
+ * old is the pointer that we overwrote, which is required by the concurrent
+ * garbage collector. Note that we, while StgTVars contain multiple pointers,
+ * only overwrite one per dirty_TVAR call so we only need to take one old
+ * pointer argument.
+ */
void
-dirty_TVAR(Capability *cap, StgTVar *p)
+dirty_TVAR(Capability *cap, StgTVar *p,
+ StgClosure *old)
{
// No barrier required here as no other heap object fields are read. See
// note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_TVAR_CLEAN_info) {
p->header.info = &stg_TVAR_DIRTY_info;
recordClosureMutated(cap,(StgClosure*)p);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled != 0)) {
+ updateRemembSetPushClosure(cap, old);
+ }
}
}
@@ -1123,6 +1182,8 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
if (tso->dirty == 0) {
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+ updateRemembSetPushClosure(cap, (StgClosure *) tso->_link);
}
tso->_link = target;
}
@@ -1133,6 +1194,8 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
if (tso->dirty == 0) {
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+ updateRemembSetPushClosure(cap, (StgClosure *) tso->block_info.prev);
}
tso->block_info.prev = target;
}
@@ -1144,15 +1207,47 @@ dirty_TSO (Capability *cap, StgTSO *tso)
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
}
+
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+ updateRemembSetPushTSO(cap, tso);
}
void
dirty_STACK (Capability *cap, StgStack *stack)
{
+ // First push to upd_rem_set before we set stack->dirty since we
+ // the nonmoving collector may already be marking the stack.
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+ updateRemembSetPushStack(cap, stack);
+
if (! (stack->dirty & STACK_DIRTY)) {
stack->dirty = STACK_DIRTY;
recordClosureMutated(cap,(StgClosure*)stack);
}
+
+}
+
+/*
+ * This is the concurrent collector's write barrier for MVARs. In the other
+ * write barriers above this is folded into the dirty_* functions. However, in
+ * the case of MVars we need to separate the acts of adding the MVar to the
+ * mutable list and adding its fields to the update remembered set.
+ *
+ * Specifically, the wakeup loop in stg_putMVarzh wants to freely mutate the
+ * pointers of the MVar but needs to keep its lock, meaning we can't yet add it
+ * to the mutable list lest the assertion checking for clean MVars on the
+ * mutable list would fail.
+ */
+void
+update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val)
+{
+ Capability *cap = regTableToCapability(reg);
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
+ StgMVar *mvar = (StgMVar *) p;
+ updateRemembSetPushClosure(cap, old_val);
+ updateRemembSetPushClosure(cap, (StgClosure *) mvar->head);
+ updateRemembSetPushClosure(cap, (StgClosure *) mvar->tail);
+ }
}
/*
@@ -1164,9 +1259,11 @@ dirty_STACK (Capability *cap, StgStack *stack)
such as Chaneneos and cheap-concurrency.
*/
void
-dirty_MVAR(StgRegTable *reg, StgClosure *p)
+dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val)
{
- recordClosureMutated(regTableToCapability(reg),p);
+ Capability *cap = regTableToCapability(reg);
+ update_MVAR(reg, p, old_val);
+ recordClosureMutated(cap, p);
}
/* -----------------------------------------------------------------------------
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 08bdb37ba3..cdb9720650 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -47,8 +47,9 @@ extern Mutex sm_mutex;
The write barrier for MVARs and TVARs
-------------------------------------------------------------------------- */
-void dirty_MVAR(StgRegTable *reg, StgClosure *p);
-void dirty_TVAR(Capability *cap, StgTVar *p);
+void update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val);
+void dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old);
+void dirty_TVAR(Capability *cap, StgTVar *p, StgClosure *old);
/* -----------------------------------------------------------------------------
Nursery manipulation