summaryrefslogtreecommitdiff
path: root/rts/sm/NonMoving.c
diff options
context:
space:
mode:
authorBen Gamari <ben@well-typed.com>2019-02-05 11:51:14 -0500
committerBen Gamari <ben@smart-cactus.org>2019-10-20 21:15:52 -0400
commitbd8e3ff43b64a72ed1c820e89691d0a83a1c6e96 (patch)
tree8b07778e3c09460edce24750ae6da4d487eb5774 /rts/sm/NonMoving.c
parentf8f77a070f4a9a93944dff0b7270162a40931c58 (diff)
downloadhaskell-bd8e3ff43b64a72ed1c820e89691d0a83a1c6e96.tar.gz
rts: Implement concurrent collection in the nonmoving collector
This extends the non-moving collector to allow concurrent collection. The full design of the collector implemented here is described in detail in a technical note B. Gamari. "A Concurrent Garbage Collector For the Glasgow Haskell Compiler" (2018) This extension involves the introduction of a capability-local remembered set, known as the /update remembered set/, which tracks objects which may no longer be visible to the collector due to mutation. To maintain this remembered set we introduce a write barrier on mutations which is enabled while a concurrent mark is underway. The update remembered set representation is similar to that of the nonmoving mark queue, being a chunked array of `MarkEntry`s. Each `Capability` maintains a single accumulator chunk, which it flushed when it (a) is filled, or (b) when the nonmoving collector enters its post-mark synchronization phase. While the write barrier touches a significant amount of code it is conceptually straightforward: the mutator must ensure that the referee of any pointer it overwrites is added to the update remembered set. However, there are a few details: * In the case of objects with a dirty flag (e.g. `MVar`s) we can exploit the fact that only the *first* mutation requires a write barrier. * Weak references, as usual, complicate things. In particular, we must ensure that the referee of a weak object is marked if dereferenced by the mutator. For this we (unfortunately) must introduce a read barrier, as described in Note [Concurrent read barrier on deRefWeak#] (in `NonMovingMark.c`). * Stable names are also a bit tricky as described in Note [Sweeping stable names in the concurrent collector] (`NonMovingSweep.c`). We take quite some pains to ensure that the high thread count often seen in parallel Haskell applications doesn't affect pause times. To this end we allow thread stacks to be marked either by the thread itself (when it is executed or stack-underflows) or the concurrent mark thread (if the thread owning the stack is never scheduled). There is a non-trivial handshake to ensure that this happens without racing which is described in Note [StgStack dirtiness flags and concurrent marking]. Co-Authored-by: Ömer Sinan Ağacan <omer@well-typed.com>
Diffstat (limited to 'rts/sm/NonMoving.c')
-rw-r--r--rts/sm/NonMoving.c186
1 files changed, 173 insertions, 13 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index f383949ebf..6bccf7f100 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -33,6 +33,18 @@ static void nonmovingBumpEpoch(void) {
nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
}
+#if defined(THREADED_RTS)
+/*
+ * This mutex ensures that only one non-moving collection is active at a time.
+ */
+Mutex nonmoving_collection_mutex;
+
+OSThreadId mark_thread;
+bool concurrent_coll_running = false;
+Condition concurrent_coll_finished;
+Mutex concurrent_coll_finished_lock;
+#endif
+
/*
* Note [Non-moving garbage collector]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -76,13 +88,12 @@ static void nonmovingBumpEpoch(void) {
memcount nonmoving_live_words = 0;
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *mark_queue);
+#endif
static void nonmovingClearBitmap(struct NonmovingSegment *seg);
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
-/* Signals to mutators that they should stop to synchronize with the nonmoving
- * collector so it can proceed to sweep phase. */
-bool nonmoving_syncing = false;
-
static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size)
{
seg->link = NULL;
@@ -283,29 +294,39 @@ static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
void nonmovingInit(void)
{
if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_collection_mutex);
+ initCondition(&concurrent_coll_finished);
+ initMutex(&concurrent_coll_finished_lock);
+#endif
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities);
}
+ nonmovingMarkInitUpdRemSet();
}
void nonmovingExit(void)
{
if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ if (mark_thread) {
+ debugTrace(DEBUG_nonmoving_gc,
+ "waiting for nonmoving collector thread to terminate");
+ ACQUIRE_LOCK(&concurrent_coll_finished_lock);
+ waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+ }
+
+ closeMutex(&concurrent_coll_finished_lock);
+ closeCondition(&concurrent_coll_finished);
+ closeMutex(&nonmoving_collection_mutex);
+#endif
+
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
stgFree(nonmovingHeap.allocators[i]);
}
}
/*
- * Wait for any concurrent collections to finish. Called during shutdown to
- * ensure we don't steal capabilities that the nonmoving collector still has yet
- * to synchronize with.
- */
-void nonmovingWaitUntilFinished(void)
-{
-}
-
-/*
* Assumes that no garbage collector or mutator threads are running to safely
* resize the nonmoving_allocators.
*
@@ -443,6 +464,14 @@ static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_p
void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
{
+#if defined(THREADED_RTS)
+ // We can't start a new collection until the old one has finished
+ // We also don't run in final GC
+ if (concurrent_coll_running || sched_state > SCHED_RUNNING) {
+ return;
+ }
+#endif
+
resizeGenerations();
nonmovingPrepareMark();
@@ -501,9 +530,26 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
// those lists to mark function in sequential case. In concurrent case we
// allocate fresh lists.
+#if defined(THREADED_RTS)
+ // If we're interrupting or shutting down, do not let this capability go and
+ // run a STW collection. Reason: we won't be able to acquire this capability
+ // again for the sync if we let it go, because it'll immediately start doing
+ // a major GC, becuase that's what we do when exiting scheduler (see
+ // exitScheduler()).
+ if (sched_state == SCHED_RUNNING) {
+ concurrent_coll_running = true;
+ nonmoving_write_barrier_enabled = true;
+ debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
+ createOSThread(&mark_thread, "non-moving mark thread",
+ nonmovingConcurrentMark, mark_queue);
+ } else {
+ nonmovingConcurrentMark(mark_queue);
+ }
+#else
// Use the weak and thread lists from the preparation for any new weaks and
// threads found to be dead in mark.
nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
+#endif
}
/* Mark mark queue, threads, and weak pointers until no more weaks have been
@@ -523,13 +569,70 @@ static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
}
}
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *data)
+{
+ MarkQueue *mark_queue = (MarkQueue*)data;
+ StgWeak *dead_weaks = NULL;
+ StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
+ nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
+ return NULL;
+}
+
+// TODO: Not sure where to put this function.
+// Append w2 to the end of w1.
+static void appendWeakList( StgWeak **w1, StgWeak *w2 )
+{
+ while (*w1) {
+ w1 = &(*w1)->link;
+ }
+ *w1 = w2;
+}
+#endif
+
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
{
+ ACQUIRE_LOCK(&nonmoving_collection_mutex);
debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
// Do concurrent marking; most of the heap will get marked here.
nonmovingMarkThreadsWeaks(mark_queue);
+#if defined(THREADED_RTS)
+ Task *task = newBoundTask();
+
+ // If at this point if we've decided to exit then just return
+ if (sched_state > SCHED_RUNNING) {
+ // Note that we break our invariants here and leave segments in
+ // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
+ // However because we won't be running mark-sweep in the final GC this
+ // is OK.
+
+ // This is a RTS shutdown so we need to move our copy (snapshot) of
+ // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to
+ // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note
+ // that there may be more weaks added to oldest_gen->threads since we
+ // started mark, so we need to append our list to the tail of
+ // oldest_gen->threads.
+ appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list);
+ appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list);
+ // These lists won't be used again so this is not necessary, but still
+ nonmoving_old_weak_ptr_list = NULL;
+ nonmoving_weak_ptr_list = NULL;
+
+ goto finish;
+ }
+
+ // We're still running, request a sync
+ nonmovingBeginFlush(task);
+
+ bool all_caps_syncd;
+ do {
+ all_caps_syncd = nonmovingWaitForFlush();
+ nonmovingMarkThreadsWeaks(mark_queue);
+ } while (!all_caps_syncd);
+#endif
+
nonmovingResurrectThreads(mark_queue, resurrected_threads);
// No more resurrecting threads after this point
@@ -555,6 +658,18 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
debugTrace(DEBUG_nonmoving_gc,
"Done marking, resurrecting threads before releasing capabilities");
+
+ // Schedule finalizers and resurrect threads
+#if defined(THREADED_RTS)
+ // Just pick a random capability. Not sure if this is a good idea -- we use
+ // only one capability for all finalizers.
+ scheduleFinalizers(capabilities[0], *dead_weaks);
+ // Note that this mutates heap and causes running write barriers.
+ // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
+ // for how we deal with this.
+ resurrectThreads(*resurrected_threads);
+#endif
+
#if defined(DEBUG)
// Zap CAFs that we will sweep
nonmovingGcCafs(mark_queue);
@@ -586,6 +701,12 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmoving_old_weak_ptr_list = NULL;
}
+ // Everything has been marked; allow the mutators to proceed
+#if defined(THREADED_RTS)
+ nonmoving_write_barrier_enabled = false;
+ nonmovingFinishFlush(task);
+#endif
+
current_mark_queue = NULL;
freeMarkQueue(mark_queue);
stgFree(mark_queue);
@@ -609,6 +730,20 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
// TODO: Remainder of things done by GarbageCollect (update stats)
+
+#if defined(THREADED_RTS)
+finish:
+ boundTaskExiting(task);
+
+ // We are done...
+ mark_thread = 0;
+
+ // Signal that the concurrent collection is finished, allowing the next
+ // non-moving collection to proceed
+ concurrent_coll_running = false;
+ signalCondition(&concurrent_coll_finished);
+ RELEASE_LOCK(&nonmoving_collection_mutex);
+#endif
}
#if defined(DEBUG)
@@ -817,6 +952,31 @@ void locate_object(P_ obj)
return;
}
}
+
+ // Search workspaces FIXME only works in non-threaded runtime
+#if !defined(THREADED_RTS)
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) {
+ gen_workspace *ws = &gct->gens[g];
+ for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g);
+ return;
+ }
+ }
+ }
+#endif
}
void nonmovingPrintSweepList()