summaryrefslogtreecommitdiff
path: root/rts/sm/NonMovingMark.c
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/NonMovingMark.c')
-rw-r--r--rts/sm/NonMovingMark.c563
1 files changed, 531 insertions, 32 deletions
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index cf1950471e..b273b09b05 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -67,6 +67,14 @@ bdescr *nonmoving_large_objects = NULL;
bdescr *nonmoving_marked_large_objects = NULL;
memcount n_nonmoving_large_blocks = 0;
memcount n_nonmoving_marked_large_blocks = 0;
+#if defined(THREADED_RTS)
+/* Protects everything above. Furthermore, we only set the BF_MARKED bit of
+ * large object blocks when this is held. This ensures that the write barrier
+ * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to
+ * move the same large object to nonmoving_marked_large_objects more than once.
+ */
+static Mutex nonmoving_large_objects_mutex;
+#endif
/*
* Where we keep our threads during collection since we must have a snapshot of
@@ -87,11 +95,257 @@ StgWeak *nonmoving_weak_ptr_list = NULL;
StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
#endif
+/* Note [Update remembered set]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The concurrent non-moving collector uses a remembered set to ensure
+ * that its marking is consistent with the snapshot invariant defined in
+ * the design. This remembered set, known as the update remembered set,
+ * records all pointers that have been overwritten since the beginning
+ * of the concurrent mark. This ensures that concurrent mutation cannot hide
+ * pointers to live objects from the nonmoving garbage collector.
+ *
+ * The update remembered set is maintained via a write barrier that
+ * is enabled whenever a concurrent mark is active. This write barrier
+ * can be found in a number of places:
+ *
+ * - In rts/Primops.cmm in primops responsible for modifying mutable closures
+ * (e.g. MVARs, MUT_VARs, etc.)
+ *
+ * - In rts/STM.c, where
+ *
+ * - In the dirty_* functions found in rts/Storage.c where we dirty MVARs,
+ * MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described
+ * in Note [StgStack dirtiness flags and concurrent marking] in TSO.h.
+ *
+ * - In the code generated by the STG code generator for pointer array writes
+ *
+ * There is also a read barrier to handle weak references, as described in
+ * Note [Concurrent read barrier on deRefWeak#].
+ *
+ * The representation of the update remembered set is the same as that of
+ * the mark queue. For efficiency, each capability maintains its own local
+ * accumulator of remembered set entries. When a capability fills its
+ * accumulator it is linked in to the global remembered set
+ * (upd_rem_set_block_list), where it is consumed by the mark phase.
+ *
+ * The mark phase is responsible for freeing update remembered set block
+ * allocations.
+ *
+ *
+ * Note [Concurrent read barrier on deRefWeak#]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * In general the non-moving GC assumes that all pointers reachable from a
+ * marked object are themselves marked (or in the mark queue). However,
+ * weak pointers are an obvious exception to this rule. In particular,
+ * deRefWeakPtr# allows the mutator to turn a weak reference into a strong
+ * reference. This interacts badly with concurrent collection. For
+ * instance, consider this program:
+ *
+ * f :: a -> b -> IO b
+ * f k v = do
+ * -- assume that k and v are the only references to the
+ * -- closures to which they refer.
+ * weak <- mkWeakPtr k v Nothing
+ *
+ * -- N.B. k is now technically dead since the only reference to it is
+ * -- weak, but we've not yet had a chance to tombstone the WeakPtr
+ * -- (which will happen in the course of major GC).
+ * performMajorGC
+ * -- Now we are running concurrently with the mark...
+
+ * Just x <- deRefWeak weak
+ * -- We have now introduced a reference to `v`, which will
+ * -- not be marked as the only reference to `v` when the snapshot was
+ * -- taken is via a WeakPtr.
+ * return x
+ *
+ */
+static Mutex upd_rem_set_lock;
+bdescr *upd_rem_set_block_list = NULL;
+
+#if defined(THREADED_RTS)
+/* Used during the mark/sweep phase transition to track how many capabilities
+ * have pushed their update remembered sets. Protected by upd_rem_set_lock.
+ */
+static volatile StgWord upd_rem_set_flush_count = 0;
+#endif
+
+
+/* Signaled by each capability when it has flushed its update remembered set */
+static Condition upd_rem_set_flushed_cond;
+
+/* Indicates to mutators that the write barrier must be respected. Set while
+ * concurrent mark is running.
+ */
+StgWord nonmoving_write_barrier_enabled = false;
+
/* Used to provide the current mark queue to the young generation
* collector for scavenging.
*/
MarkQueue *current_mark_queue = NULL;
+/* Initialise update remembered set data structures */
+void nonmovingMarkInitUpdRemSet() {
+ initMutex(&upd_rem_set_lock);
+ initCondition(&upd_rem_set_flushed_cond);
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_large_objects_mutex);
+#endif
+}
+
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t markQueueLength(MarkQueue *q);
+#endif
+static void init_mark_queue_(MarkQueue *queue);
+
+/* Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ */
+static void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+{
+ if (markQueueIsEmpty(rset)) return;
+
+ // find the tail of the queue
+ bdescr *start = rset->blocks;
+ bdescr *end = start;
+ while (end->link != NULL)
+ end = end->link;
+
+ // add the blocks to the global remembered set
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ end->link = upd_rem_set_block_list;
+ upd_rem_set_block_list = start;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ // Reset remembered set
+ ACQUIRE_SM_LOCK;
+ init_mark_queue_(rset);
+ rset->is_upd_rem_set = true;
+ RELEASE_SM_LOCK;
+}
+
+#if defined(THREADED_RTS)
+/* Called by capabilities to flush their update remembered sets when
+ * synchronising with the non-moving collector as it transitions from mark to
+ * sweep phase.
+ */
+void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
+{
+ debugTrace(DEBUG_nonmoving_gc,
+ "Capability %d flushing update remembered set: %d",
+ cap->no, markQueueLength(&cap->upd_rem_set.queue));
+ nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
+ atomic_inc(&upd_rem_set_flush_count, 1);
+ signalCondition(&upd_rem_set_flushed_cond);
+ // After this mutation will remain suspended until nonmovingFinishFlush
+ // releases its capabilities.
+}
+
+/* Request that all capabilities flush their update remembered sets and suspend
+ * execution until the further notice.
+ */
+void nonmovingBeginFlush(Task *task)
+{
+ debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
+ upd_rem_set_flush_count = 0;
+ stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
+
+ // XXX: We may have been given a capability via releaseCapability (i.e. a
+ // task suspended due to a foreign call) in which case our requestSync
+ // logic won't have been hit. Make sure that everyone so far has flushed.
+ // Ideally we want to mark asynchronously with syncing.
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ nonmovingFlushCapUpdRemSetBlocks(capabilities[i]);
+ }
+}
+
+/* Wait until a capability has flushed its update remembered set. Returns true
+ * if all capabilities have flushed.
+ */
+bool nonmovingWaitForFlush()
+{
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count);
+ bool finished = upd_rem_set_flush_count == n_capabilities;
+ if (!finished) {
+ waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock);
+ }
+ RELEASE_LOCK(&upd_rem_set_lock);
+ return finished;
+}
+
+/* Note [Unintentional marking in resurrectThreads]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In both moving and non-moving collectors threads found to be unreachable are
+ * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads
+ * raises an exception in the unreachable thread via raiseAsync, which does
+ * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the
+ * thread's capability. Here's an example backtrace where this happens:
+ *
+ * #0 updateRemembSetPushClosure
+ * #1 0x000000000072b363 in dirty_TVAR
+ * #2 0x00000000007162e5 in remove_watch_queue_entries_for_trec
+ * #3 0x0000000000717098 in stmAbortTransaction
+ * #4 0x000000000070c6eb in raiseAsync
+ * #5 0x000000000070b473 in throwToSingleThreaded__
+ * #6 0x000000000070b4ab in throwToSingleThreaded
+ * #7 0x00000000006fce82 in resurrectThreads
+ * #8 0x00000000007215db in nonmovingMark_
+ * #9 0x0000000000721438 in nonmovingConcurrentMark
+ * #10 0x00007f1ee81cd6db in start_thread
+ * #11 0x00007f1ee850688f in clone
+ *
+ * However we don't really want to run write barriers when calling
+ * resurrectThreads here, because we're in a GC pause, and overwritten values
+ * are definitely gone forever (as opposed to being inserted in a marked object
+ * or kept in registers and used later).
+ *
+ * When this happens, if we don't reset the UpdRemSets, what happens is in the
+ * next mark we see these objects that were added in previous mark's
+ * resurrectThreads in UpdRemSets, and mark those. This causes keeping
+ * unreachable objects alive, and effects weak finalization and thread resurrect
+ * (which rely on things become unreachable). As an example, stm048 fails when
+ * we get this wrong, because when we do raiseAsync on a thread that was blocked
+ * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to
+ * the TSO that was running the STM transaction. If the TSO becomes unreachable
+ * again in the next GC we don't realize this, because it was added to an
+ * UpdRemSet in the previous GC's mark phase, because of raiseAsync.
+ *
+ * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before
+ * releasing capabilities. This is somewhat inefficient (we allow adding objects
+ * to UpdRemSets, only to later reset them), but the only case where we add to
+ * UpdRemSets during mark is resurrectThreads, and I don't think we do so many
+ * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So
+ * pushing an UpdRemSet in this case is really fast, and resetting is even
+ * faster (we just update a pointer).
+ *
+ * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark
+ * loop? Would that work? Or what would break?
+ */
+
+/* Notify capabilities that the synchronisation is finished; they may resume
+ * execution.
+ */
+void nonmovingFinishFlush(Task *task)
+{
+ // See Note [Unintentional marking in resurrectThreads]
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ reset_upd_rem_set(&capabilities[i]->upd_rem_set);
+ }
+ // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
+ // filled and we flushed them.
+ freeChain_lock(upd_rem_set_block_list);
+ upd_rem_set_block_list = NULL;
+
+ debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
+ releaseAllCapabilities(n_capabilities, NULL, task);
+}
+#endif
+
/*********************************************************
* Pushing to either the mark queue or remembered set
*********************************************************/
@@ -102,14 +356,18 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
// Are we at the end of the block?
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
- // allocate a fresh block.
- ACQUIRE_SM_LOCK;
- bdescr *bd = allocGroup(1);
- bd->link = q->blocks;
- q->blocks = bd;
- q->top = (MarkQueueBlock *) bd->start;
- q->top->head = 0;
- RELEASE_SM_LOCK;
+ if (q->is_upd_rem_set) {
+ nonmovingAddUpdRemSetBlocks(q);
+ } else {
+ // allocate a fresh block.
+ ACQUIRE_SM_LOCK;
+ bdescr *bd = allocGroup(1);
+ bd->link = q->blocks;
+ q->blocks = bd;
+ q->top = (MarkQueueBlock *) bd->start;
+ q->top->head = 0;
+ RELEASE_SM_LOCK;
+ }
}
q->top->entries[q->top->head] = *ent;
@@ -183,6 +441,183 @@ void push_fun_srt (MarkQueue *q, const StgInfoTable *info)
}
/*********************************************************
+ * Pushing to the update remembered set
+ *
+ * upd_rem_set_push_* functions are directly called by
+ * mutators and need to check whether the value is in
+ * non-moving heap.
+ *********************************************************/
+
+// Check if the object is traced by the non-moving collector. This holds in two
+// conditions:
+//
+// - Object is in non-moving heap
+// - Object is a large (BF_LARGE) and marked as BF_NONMOVING
+// - Object is static (HEAP_ALLOCED_GC(obj) == false)
+//
+static
+bool check_in_nonmoving_heap(StgClosure *p) {
+ if (HEAP_ALLOCED_GC(p)) {
+ // This works for both large and small objects:
+ return Bdescr((P_)p)->flags & BF_NONMOVING;
+ } else {
+ return true; // a static object
+ }
+}
+
+/* Push the free variables of a (now-evaluated) thunk to the
+ * update remembered set.
+ */
+inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
+{
+ const StgInfoTable *info;
+ do {
+ info = get_volatile_itbl((StgClosure *) thunk);
+ } while (info->type == WHITEHOLE);
+ updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk);
+}
+
+void updateRemembSetPushThunkEager(Capability *cap,
+ const StgThunkInfoTable *info,
+ StgThunk *thunk)
+{
+ /* N.B. info->i.type mustn't be WHITEHOLE */
+ switch (info->i.type) {
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_thunk_srt(queue, &info->i);
+
+ // Don't record the origin of objects living outside of the nonmoving
+ // heap; we can't perform the selector optimisation on them anyways.
+ bool record_origin = check_in_nonmoving_heap((StgClosure*)thunk);
+
+ for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
+ if (check_in_nonmoving_heap(thunk->payload[i])) {
+ push_closure(queue,
+ thunk->payload[i],
+ record_origin ? &thunk->payload[i] : NULL);
+ }
+ }
+ break;
+ }
+ case AP:
+ {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ StgAP *ap = (StgAP *) thunk;
+ push_closure(queue, ap->fun, &ap->fun);
+ mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
+ break;
+ }
+ case THUNK_SELECTOR:
+ case BLACKHOLE:
+ // TODO: This is right, right?
+ break;
+ default:
+ barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
+ thunk, info->i.type);
+ }
+}
+
+void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
+{
+ updateRemembSetPushThunk(regTableToCapability(reg), p);
+}
+
+inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
+{
+ if (!check_in_nonmoving_heap(p)) return;
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_closure(queue, p, NULL);
+}
+
+void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p)
+{
+ updateRemembSetPushClosure(regTableToCapability(reg), p);
+}
+
+STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
+{
+ // TODO: Deduplicate with mark_closure
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->gen != oldest_gen) {
+ return false;
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
+ return false;
+ } else {
+ return ! (bd->flags & BF_MARKED);
+ }
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch;
+ }
+}
+
+/* Set the mark bit; only to be called *after* we have fully marked the closure */
+STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p)
+{
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->flags & BF_LARGE) {
+ // Someone else may have already marked it.
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
+ if (! (bd->flags & BF_MARKED)) {
+ bd->flags |= BF_MARKED;
+ dbl_link_remove(bd, &nonmoving_large_objects);
+ dbl_link_onto(bd, &nonmoving_marked_large_objects);
+ n_nonmoving_large_blocks -= bd->blocks;
+ n_nonmoving_marked_large_blocks += bd->blocks;
+ }
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ nonmovingSetMark(seg, block_idx);
+ }
+}
+
+void updateRemembSetPushTSO(Capability *cap, StgTSO *tso)
+{
+ if (needs_upd_rem_set_mark((StgClosure *) tso)) {
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso);
+ mark_tso(&cap->upd_rem_set.queue, tso);
+ finish_upd_rem_set_mark((StgClosure *) tso);
+ }
+}
+
+void updateRemembSetPushStack(Capability *cap, StgStack *stack)
+{
+ // N.B. caller responsible for checking nonmoving_write_barrier_enabled
+ if (needs_upd_rem_set_mark((StgClosure *) stack)) {
+ StgWord marking = stack->marking;
+ // See Note [StgStack dirtiness flags and concurrent marking]
+ if (cas(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
+ mark_stack(&cap->upd_rem_set.queue, stack);
+ finish_upd_rem_set_mark((StgClosure *) stack);
+ return;
+ } else {
+ // The concurrent GC has claimed the right to mark the stack.
+ // Wait until it finishes marking before proceeding with
+ // mutation.
+ while (needs_upd_rem_set_mark((StgClosure *) stack));
+#if defined(PARALLEL_GC)
+ busy_wait_nop(); // TODO: Spinning here is unfortunate
+#endif
+ return;
+ }
+ }
+}
+
+/*********************************************************
* Pushing to the mark queue
*********************************************************/
@@ -192,8 +627,8 @@ void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent)
}
void markQueuePushClosure (MarkQueue *q,
- StgClosure *p,
- StgClosure **origin)
+ StgClosure *p,
+ StgClosure **origin)
{
push_closure(q, p, origin);
}
@@ -264,7 +699,7 @@ again:
}
/*********************************************************
- * Creating and destroying MarkQueues
+ * Creating and destroying MarkQueues and UpdRemSets
*********************************************************/
/* Must hold sm_mutex. */
@@ -281,22 +716,45 @@ void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
queue->marked_objects = allocHashTable();
+ queue->is_upd_rem_set = false;
+}
+
+/* Must hold sm_mutex. */
+void init_upd_rem_set (UpdRemSet *rset)
+{
+ init_mark_queue_(&rset->queue);
+ // Update remembered sets don't have to worry about static objects
+ rset->queue.marked_objects = NULL;
+ rset->queue.is_upd_rem_set = true;
+}
+
+void reset_upd_rem_set (UpdRemSet *rset)
+{
+ // UpdRemSets always have one block for the mark queue. This assertion is to
+ // update this code if we change that.
+ ASSERT(rset->queue.blocks->link == NULL);
+ rset->queue.top->head = 0;
}
void freeMarkQueue (MarkQueue *queue)
{
- bdescr* b = queue->blocks;
- ACQUIRE_SM_LOCK;
- while (b)
- {
- bdescr* b_ = b->link;
- freeGroup(b);
- b = b_;
- }
- RELEASE_SM_LOCK;
+ freeChain_lock(queue->blocks);
freeHashTable(queue->marked_objects, NULL);
}
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t
+markQueueLength (MarkQueue *q)
+{
+ uint32_t n = 0;
+ for (bdescr *block = q->blocks; block; block = block->link) {
+ MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
+ n += queue->head;
+ }
+ return n;
+}
+#endif
+
/*********************************************************
* Marking
@@ -307,7 +765,8 @@ void freeMarkQueue (MarkQueue *queue)
* barrier. Consequently it's quite important that we deeply mark
* any outstanding transactions.
*/
-static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
+static void
+mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
{
while (trec != NO_TREC) {
StgTRecChunk *chunk = trec->current_chunk;
@@ -326,7 +785,8 @@ static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
}
}
-static void mark_tso (MarkQueue *queue, StgTSO *tso)
+static void
+mark_tso (MarkQueue *queue, StgTSO *tso)
{
// TODO: Clear dirty if contains only old gen objects
@@ -535,7 +995,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
p = UNTAG_CLOSURE(p);
# define PUSH_FIELD(obj, field) \
- markQueuePushClosure(queue, \
+ markQueuePushClosure(queue, \
(StgClosure *) (obj)->field, \
(StgClosure **) &(obj)->field)
@@ -592,7 +1052,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
return;
case WHITEHOLE:
- while (get_itbl(p)->type == WHITEHOLE);
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
// busy_wait_nop(); // FIXME
goto try_again;
@@ -608,9 +1068,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
// we moved everything to the non-moving heap before starting the major
// collection, we know that we don't need to trace it: it was allocated
// after we took our snapshot.
-
+#if !defined(THREADED_RTS)
// This should never happen in the non-concurrent case
barf("Closure outside of non-moving heap: %p", p);
+#else
+ return;
+#endif
}
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
@@ -878,7 +1341,22 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
case STACK: {
// See Note [StgStack dirtiness flags and concurrent marking]
StgStack *stack = (StgStack *) p;
- mark_stack(queue, stack);
+ StgWord marking = stack->marking;
+
+ // N.B. stack->marking must be != nonmovingMarkEpoch unless
+ // someone has already marked it.
+ if (cas(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ mark_stack(queue, stack);
+ } else {
+ // A mutator has already started marking the stack; we just let it
+ // do its thing and move on. There's no reason to wait; we know that
+ // the stack will be fully marked before we sweep due to the final
+ // post-mark synchronization. Most importantly, we do not set its
+ // mark bit, the mutator is responsible for this.
+ return;
+ }
break;
}
@@ -905,8 +1383,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
case WHITEHOLE:
- while (get_itbl(p)->type == WHITEHOLE);
- // busy_wait_nop(); // FIXME
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
goto try_again;
default:
@@ -921,6 +1398,12 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* mutator waiting for us to finish so it can start execution.
*/
if (bd->flags & BF_LARGE) {
+ /* Marking a large object isn't idempotent since we move it to
+ * nonmoving_marked_large_objects; to ensure that we don't repeatedly
+ * mark a large object, we only set BF_MARKED on large objects in the
+ * nonmoving heap while holding nonmoving_large_objects_mutex
+ */
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
if (! (bd->flags & BF_MARKED)) {
// Remove the object from nonmoving_large_objects and link it to
// nonmoving_marked_large_objects
@@ -930,6 +1413,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
n_nonmoving_marked_large_blocks += bd->blocks;
bd->flags |= BF_MARKED;
}
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
} else {
// TODO: Kill repetition
struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
@@ -947,7 +1431,8 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* c. the mark queue has been seeded with a set of roots.
*
*/
-GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue)
+GNUC_ATTR_HOT void
+nonmovingMark (MarkQueue *queue)
{
debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
unsigned int count = 0;
@@ -974,9 +1459,23 @@ GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue)
break;
}
case NULL_ENTRY:
- // Nothing more to do
- debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
- return;
+ // Perhaps the update remembered set has more to mark...
+ if (upd_rem_set_block_list) {
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ bdescr *old = queue->blocks;
+ queue->blocks = upd_rem_set_block_list;
+ queue->top = (MarkQueueBlock *) queue->blocks->start;
+ upd_rem_set_block_list = NULL;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ ACQUIRE_SM_LOCK;
+ freeGroup(old);
+ RELEASE_SM_LOCK;
+ } else {
+ // Nothing more to do
+ debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
+ return;
+ }
}
}
}