summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/BlockAlloc.c144
-rw-r--r--rts/sm/CNF.c5
-rw-r--r--rts/sm/Evac.c109
-rw-r--r--rts/sm/GC.c370
-rw-r--r--rts/sm/GC.h11
-rw-r--r--rts/sm/GCAux.c12
-rw-r--r--rts/sm/GCThread.h4
-rw-r--r--rts/sm/NonMoving.c1390
-rw-r--r--rts/sm/NonMoving.h335
-rw-r--r--rts/sm/NonMovingCensus.c129
-rw-r--r--rts/sm/NonMovingCensus.h28
-rw-r--r--rts/sm/NonMovingMark.c1958
-rw-r--r--rts/sm/NonMovingMark.h205
-rw-r--r--rts/sm/NonMovingScav.c389
-rw-r--r--rts/sm/NonMovingScav.h10
-rw-r--r--rts/sm/NonMovingShortcut.c326
-rw-r--r--rts/sm/NonMovingShortcut.h17
-rw-r--r--rts/sm/NonMovingSweep.c402
-rw-r--r--rts/sm/NonMovingSweep.h31
-rw-r--r--rts/sm/Sanity.c202
-rw-r--r--rts/sm/Sanity.h1
-rw-r--r--rts/sm/Scav.c62
-rw-r--r--rts/sm/Scav.h16
-rw-r--r--rts/sm/Storage.c220
-rw-r--r--rts/sm/Storage.h6
25 files changed, 6176 insertions, 206 deletions
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index f9e3d11407..b3e1e2ce75 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -310,7 +310,7 @@ setup_tail (bdescr *bd)
// Take a free block group bd, and split off a group of size n from
// it. Adjust the free list as necessary, and return the new group.
static bdescr *
-split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
+split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln /* log_2_ceil(n) */)
{
bdescr *fg; // free group
@@ -325,6 +325,46 @@ split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
return fg;
}
+// Take N blocks off the end, free the rest.
+static bdescr *
+split_block_high (bdescr *bd, W_ n)
+{
+ ASSERT(bd->blocks > n);
+
+ bdescr* ret = bd + bd->blocks - n; // take n blocks off the end
+ ret->blocks = n;
+ ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W;
+ ret->link = NULL;
+
+ bd->blocks -= n;
+
+ setup_tail(ret);
+ setup_tail(bd);
+ freeGroup(bd);
+
+ return ret;
+}
+
+// Like `split_block_high`, but takes n blocks off the beginning rather
+// than the end.
+static bdescr *
+split_block_low (bdescr *bd, W_ n)
+{
+ ASSERT(bd->blocks > n);
+
+ bdescr* bd_ = bd + n;
+ bd_->blocks = bd->blocks - n;
+ bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W;
+
+ bd->blocks = n;
+
+ setup_tail(bd_);
+ setup_tail(bd);
+ freeGroup(bd_);
+
+ return bd;
+}
+
/* Only initializes the start pointers on the first megablock and the
* blocks field of the first bdescr; callers are responsible for calling
* initGroup afterwards.
@@ -461,6 +501,108 @@ finish:
return bd;
}
+// Allocate `n` blocks aligned to `n` blocks, e.g. when n = 8, the blocks will
+// be aligned at `8 * BLOCK_SIZE`. For a group with `n` blocks this can be used
+// for easily accessing the beginning of the group from a location p in the
+// group with
+//
+// p % (BLOCK_SIZE*n)
+//
+// Used by the non-moving collector for allocating segments.
+//
+// Because the storage manager does not support aligned allocations, we have to
+// allocate `2*n - 1` blocks here to make sure we'll be able to find an aligned
+// region in the allocated blocks. After finding the aligned area we want to
+// free slop on the low and high sides, and block allocator doesn't support
+// freeing only some portion of a megablock (we can only free whole megablocks).
+// So we disallow allocating megablocks here, and allow allocating at most
+// `BLOCKS_PER_MBLOCK / 2` blocks.
+bdescr *
+allocAlignedGroupOnNode (uint32_t node, W_ n)
+{
+ // allocate enough blocks to have enough space aligned at n-block boundary
+ // free any slops on the low and high side of this space
+
+ // number of blocks to allocate to make sure we have enough aligned space
+ W_ num_blocks = 2*n - 1;
+
+ if (num_blocks >= BLOCKS_PER_MBLOCK) {
+ barf("allocAlignedGroupOnNode: allocating megablocks is not supported\n"
+ " requested blocks: %" FMT_Word "\n"
+ " required for alignment: %" FMT_Word "\n"
+ " megablock size (in blocks): %" FMT_Word,
+ n, num_blocks, (W_) BLOCKS_PER_MBLOCK);
+ }
+
+ W_ group_size = n * BLOCK_SIZE;
+
+ // To reduce splitting and fragmentation we use allocLargeChunkOnNode here.
+ // Tweak the max allocation to avoid allocating megablocks. Splitting slop
+ // below doesn't work with megablocks (freeGroup can't free only a portion
+ // of a megablock so we can't allocate megablocks and free some parts of
+ // them).
+ W_ max_blocks = stg_min(num_blocks * 3, BLOCKS_PER_MBLOCK - 1);
+ bdescr *bd = allocLargeChunkOnNode(node, num_blocks, max_blocks);
+ // We may allocate more than num_blocks, so update it
+ num_blocks = bd->blocks;
+
+ // slop on the low side
+ W_ slop_low = 0;
+ if ((uintptr_t)bd->start % group_size != 0) {
+ slop_low = group_size - ((uintptr_t)bd->start % group_size);
+ }
+
+ W_ slop_high = (num_blocks * BLOCK_SIZE) - group_size - slop_low;
+
+ ASSERT((slop_low % BLOCK_SIZE) == 0);
+ ASSERT((slop_high % BLOCK_SIZE) == 0);
+
+ W_ slop_low_blocks = slop_low / BLOCK_SIZE;
+ W_ slop_high_blocks = slop_high / BLOCK_SIZE;
+
+ ASSERT(slop_low_blocks + slop_high_blocks + n == num_blocks);
+
+#if defined(DEBUG)
+ checkFreeListSanity();
+ W_ free_before = countFreeList();
+#endif
+
+ if (slop_low_blocks != 0) {
+ bd = split_block_high(bd, num_blocks - slop_low_blocks);
+ ASSERT(countBlocks(bd) == num_blocks - slop_low_blocks);
+ }
+
+#if defined(DEBUG)
+ ASSERT(countFreeList() == free_before + slop_low_blocks);
+ checkFreeListSanity();
+#endif
+
+ // At this point the bd should be aligned, but we may have slop on the high side
+ ASSERT((uintptr_t)bd->start % group_size == 0);
+
+#if defined(DEBUG)
+ free_before = countFreeList();
+#endif
+
+ if (slop_high_blocks != 0) {
+ bd = split_block_low(bd, n);
+ ASSERT(bd->blocks == n);
+ }
+
+#if defined(DEBUG)
+ ASSERT(countFreeList() == free_before + slop_high_blocks);
+ checkFreeListSanity();
+#endif
+
+ // Should still be aligned
+ ASSERT((uintptr_t)bd->start % group_size == 0);
+
+ // Just to make sure I get this right
+ ASSERT(Bdescr(bd->start) == bd);
+
+ return bd;
+}
+
STATIC_INLINE
uint32_t nodeWithLeastBlocks (void)
{
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index f85b390414..87d1d84f50 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -276,7 +276,10 @@ compactFree(StgCompactNFData *str)
for ( ; block; block = next) {
next = block->next;
bd = Bdescr((StgPtr)block);
- ASSERT((bd->flags & BF_EVACUATED) == 0);
+ ASSERT(RtsFlags.GcFlags.useNonmoving || ((bd->flags & BF_EVACUATED) == 0));
+ // When using the non-moving collector we leave compact object
+ // evacuated to the oldset gen as BF_EVACUATED to avoid evacuating
+ // objects in the non-moving heap.
freeGroup(bd);
}
}
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 53a473d26c..521fd4eef4 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -27,6 +27,7 @@
#include "LdvProfile.h"
#include "CNF.h"
#include "Scav.h"
+#include "NonMoving.h"
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
@@ -39,7 +40,19 @@
copy_tag(p, info, src, size, stp, tag)
#endif
-/* Used to avoid long recursion due to selector thunks
+/* Note [Selector optimisation depth limit]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * MAX_THUNK_SELECTOR_DEPTH is used to avoid long recursion of
+ * eval_thunk_selector due to nested selector thunks. Note that this *only*
+ * counts nested selector thunks, e.g. `fst (fst (... (fst x)))`. The collector
+ * will traverse interleaved selector-constructor pairs without limit, e.g.
+ *
+ * a = (fst b, _)
+ * b = (fst c, _)
+ * c = (fst d, _)
+ * d = (x, _)
+ *
*/
#define MAX_THUNK_SELECTOR_DEPTH 16
@@ -50,9 +63,12 @@ STATIC_INLINE void evacuate_large(StgPtr p);
Allocate some space in which to copy an object.
-------------------------------------------------------------------------- */
+/* size is in words */
STATIC_INLINE StgPtr
alloc_for_copy (uint32_t size, uint32_t gen_no)
{
+ ASSERT(gen_no < RtsFlags.GcFlags.generations);
+
StgPtr to;
gen_workspace *ws;
@@ -69,6 +85,36 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
}
}
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
+ if (deadlock_detect_gc)
+ gen_no = oldest_gen->no;
+
+ if (gen_no == oldest_gen->no) {
+ gct->copied += size;
+ to = nonmovingAllocate(gct->cap, size);
+
+ // Add segment to the todo list unless it's already there
+ // current->todo_link == NULL means not in todo list
+ struct NonmovingSegment *seg = nonmovingGetSegment(to);
+ if (!seg->todo_link) {
+ gen_workspace *ws = &gct->gens[oldest_gen->no];
+ seg->todo_link = ws->todo_seg;
+ ws->todo_seg = seg;
+ }
+
+ // The object which refers to this closure may have been aged (i.e.
+ // retained in a younger generation). Consequently, we must add the
+ // closure to the mark queue to ensure that it will be marked.
+ //
+ // However, if we are in a deadlock detection GC then we disable aging
+ // so there is no need.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) to);
+ return to;
+ }
+ }
+
ws = &gct->gens[gen_no]; // zero memory references here
/* chain a new block onto the to-space for the destination gen if
@@ -88,6 +134,7 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
The evacuate() code
-------------------------------------------------------------------------- */
+/* size is in words */
STATIC_INLINE GNUC_ATTR_HOT void
copy_tag(StgClosure **p, const StgInfoTable *info,
StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag)
@@ -284,7 +331,10 @@ evacuate_large(StgPtr p)
*/
new_gen_no = bd->dest_no;
- if (new_gen_no < gct->evac_gen_no) {
+ if (RTS_UNLIKELY(deadlock_detect_gc)) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
+ new_gen_no = oldest_gen->no;
+ } else if (new_gen_no < gct->evac_gen_no) {
if (gct->eager_promotion) {
new_gen_no = gct->evac_gen_no;
} else {
@@ -296,6 +346,9 @@ evacuate_large(StgPtr p)
new_gen = &generations[new_gen_no];
bd->flags |= BF_EVACUATED;
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
+ bd->flags |= BF_NONMOVING;
+ }
initBdescr(bd, new_gen, new_gen->to);
// If this is a block of pinned or compact objects, we don't have to scan
@@ -330,6 +383,13 @@ evacuate_large(StgPtr p)
STATIC_INLINE void
evacuate_static_object (StgClosure **link_field, StgClosure *q)
{
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
+ // See Note [Static objects under the nonmoving collector] in Storage.c.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ return;
+ }
+
StgWord link = (StgWord)*link_field;
// See Note [STATIC_LINK fields] for how the link field bits work
@@ -376,12 +436,22 @@ evacuate_compact (StgPtr p)
bd = Bdescr((StgPtr)str);
gen_no = bd->gen_no;
+ if (bd->flags & BF_NONMOVING) {
+ // We may have evacuated the block to the nonmoving generation. If so
+ // we need to make sure it is added to the mark queue since the only
+ // reference to it may be from the moving heap.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) str);
+ return;
+ }
+
// already evacuated? (we're about to do the same check,
// but we avoid taking the spin-lock)
if (bd->flags & BF_EVACUATED) {
/* Don't forget to set the gct->failed_to_evac flag if we didn't get
* the desired destination (see comments in evacuate()).
*/
+ debugTrace(DEBUG_compact, "Compact %p already evacuated", str);
if (gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
@@ -430,9 +500,15 @@ evacuate_compact (StgPtr p)
// for that - the only code touching the generation of the block is
// in the GC, and that should never see blocks other than the first)
bd->flags |= BF_EVACUATED;
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
+ bd->flags |= BF_NONMOVING;
+ }
initBdescr(bd, new_gen, new_gen->to);
if (str->hash) {
+ // If there is a hash-table for sharing preservation then we need to add
+ // the compact to the scavenging work list to ensure that the hashtable
+ // is scavenged.
gen_workspace *ws = &gct->gens[new_gen_no];
bd->link = ws->todo_large_objects;
ws->todo_large_objects = bd;
@@ -563,7 +639,18 @@ loop:
bd = Bdescr((P_)q);
- if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT)) != 0) {
+ if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) {
+ // Pointer to non-moving heap. Non-moving heap is collected using
+ // mark-sweep so this object should be marked and then retained in sweep.
+ if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) {
+ // NOTE: large objects in nonmoving heap are also marked with
+ // BF_NONMOVING. Those are moved to scavenged_large_objects list in
+ // mark phase.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ return;
+ }
+
// pointer into to-space: just return it. It might be a pointer
// into a generation that we aren't collecting (> N), or it
// might just be a pointer into to-space. The latter doesn't
@@ -594,6 +681,13 @@ loop:
*/
if (bd->flags & BF_LARGE) {
evacuate_large((P_)q);
+
+ // We may have evacuated the block to the nonmoving generation. If so
+ // we need to make sure it is added to the mark queue since the only
+ // reference to it may be from the moving heap.
+ if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ }
return;
}
@@ -894,6 +988,12 @@ evacuate_BLACKHOLE(StgClosure **p)
// blackholes can't be in a compact
ASSERT((bd->flags & BF_COMPACT) == 0);
+ if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) {
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ return;
+ }
+
// blackholes *can* be in a large object: when raiseAsync() creates an
// AP_STACK the payload might be large enough to create a large object.
// See #14497.
@@ -1044,7 +1144,7 @@ selector_chain:
// save any space in any case, and updating with an indirection is
// trickier in a non-collected gen: we would have to update the
// mutable list.
- if (bd->flags & BF_EVACUATED) {
+ if (bd->flags & (BF_EVACUATED | BF_NONMOVING)) {
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
*q = (StgClosure *)p;
// shortcut, behave as for: if (evac) evacuate(q);
@@ -1257,6 +1357,7 @@ selector_loop:
// recursively evaluate this selector. We don't want to
// recurse indefinitely, so we impose a depth bound.
+ // See Note [Selector optimisation depth limit].
if (gct->thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) {
goto bale_out;
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 76237f35c2..83e9c97bd9 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -51,6 +51,7 @@
#include "CheckUnload.h"
#include "CNF.h"
#include "RtsFlags.h"
+#include "NonMoving.h"
#include <string.h> // for memset()
#include <unistd.h>
@@ -103,6 +104,7 @@
*/
uint32_t N;
bool major_gc;
+bool deadlock_detect_gc;
/* Data used for allocation area sizing.
*/
@@ -159,7 +161,6 @@ static void mark_root (void *user, StgClosure **root);
static void prepare_collected_gen (generation *gen);
static void prepare_uncollected_gen (generation *gen);
static void init_gc_thread (gc_thread *t);
-static void resize_generations (void);
static void resize_nursery (void);
static void start_gc_threads (void);
static void scavenge_until_all_done (void);
@@ -193,7 +194,8 @@ StgPtr mark_sp; // pointer to the next unallocated mark stack entry
void
GarbageCollect (uint32_t collect_gen,
- bool do_heap_census,
+ const bool do_heap_census,
+ const bool deadlock_detect,
uint32_t gc_type USED_IF_THREADS,
Capability *cap,
bool idle_cap[])
@@ -263,7 +265,25 @@ GarbageCollect (uint32_t collect_gen,
N = collect_gen;
major_gc = (N == RtsFlags.GcFlags.generations-1);
- if (major_gc) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
+ deadlock_detect_gc = deadlock_detect;
+
+#if defined(THREADED_RTS)
+ if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
+ /* If there is already a concurrent major collection running then
+ * there is no benefit to starting another.
+ * TODO: Catch heap-size runaway.
+ */
+ N--;
+ collect_gen--;
+ major_gc = false;
+ }
+#endif
+
+ /* N.B. The nonmoving collector works a bit differently. See
+ * Note [Static objects under the nonmoving collector].
+ */
+ if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
prev_static_flag = static_flag;
static_flag =
static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
@@ -572,7 +592,7 @@ GarbageCollect (uint32_t collect_gen,
gen = &generations[g];
// for generations we collected...
- if (g <= N) {
+ if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
/* free old memory and shift to-space into from-space for all
* the collected generations (except the allocation area). These
@@ -710,8 +730,55 @@ GarbageCollect (uint32_t collect_gen,
}
} // for all generations
- // update the max size of older generations after a major GC
- resize_generations();
+ // Flush the update remembered set. See Note [Eager update remembered set
+ // flushing] in NonMovingMark.c
+ if (RtsFlags.GcFlags.useNonmoving) {
+ RELEASE_SM_LOCK;
+ nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+ ACQUIRE_SM_LOCK;
+ }
+
+ // Mark and sweep the oldest generation.
+ // N.B. This can only happen after we've moved
+ // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
+ ASSERT(oldest_gen->scavenged_large_objects == NULL);
+ if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+ // All threads in non-moving heap should be found to be alive, becuase
+ // threads in the non-moving generation's list should live in the
+ // non-moving heap, and we consider non-moving objects alive during
+ // preparation.
+ ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
+ // For weaks, remember that we evacuated all weaks to the non-moving heap
+ // in markWeakPtrList(), and then moved the weak_ptr_list list to
+ // old_weak_ptr_list. We then moved weaks with live keys to the
+ // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in
+ // old_weak_ptr_list to dead_weak_ptr_list. So at this point
+ // old_weak_ptr_list should be empty.
+ ASSERT(oldest_gen->old_weak_ptr_list == NULL);
+
+ // we may need to take the lock to allocate mark queue blocks
+ RELEASE_SM_LOCK;
+ // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
+ // be kept alive because we'll use them in finalizeSchedulers(). Similarly
+ // resurrected_threads are also going to be used in resurrectedThreads()
+ // so we need to mark those too.
+ // Note that in sequential case these lists will be appended with more
+ // weaks and threads found to be dead in mark.
+#if !defined(THREADED_RTS)
+ // In the non-threaded runtime this is the only time we push to the
+ // upd_rem_set
+ nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+#endif
+ nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
+ ACQUIRE_SM_LOCK;
+ }
+
+ // Update the max size of older generations after a major GC:
+ // We can't resize here in the case of the concurrent collector since we
+ // don't yet know how much live data we have. This will be instead done
+ // once we finish marking.
+ if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
+ resizeGenerations();
// Free the mark stack.
if (mark_stack_top_bd != NULL) {
@@ -735,7 +802,7 @@ GarbageCollect (uint32_t collect_gen,
// mark the garbage collected CAFs as dead
#if defined(DEBUG)
- if (major_gc) { gcCAFs(); }
+ if (major_gc && !RtsFlags.GcFlags.useNonmoving) { gcCAFs(); }
#endif
// Update the stable name hash table
@@ -768,8 +835,9 @@ GarbageCollect (uint32_t collect_gen,
// check sanity after GC
// before resurrectThreads(), because that might overwrite some
// closures, which will cause problems with THREADED where we don't
- // fill slop.
- IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
+ // fill slop. If we are using the nonmoving collector then we can't claim to
+ // be *after* the major GC; it's now running concurrently.
+ IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
// If a heap census is due, we need to do it before
// resurrectThreads(), for the same reason as checkSanity above:
@@ -942,6 +1010,7 @@ new_gc_thread (uint32_t n, gc_thread *t)
ws->todo_overflow = NULL;
ws->n_todo_overflow = 0;
ws->todo_large_objects = NULL;
+ ws->todo_seg = END_NONMOVING_TODO_LIST;
ws->part_list = NULL;
ws->n_part_blocks = 0;
@@ -1321,6 +1390,18 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
#endif
/* ----------------------------------------------------------------------------
+ Save the mutable lists in saved_mut_lists where it will be scavenged
+ during GC
+ ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, uint32_t gen_no)
+{
+ cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+ cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
+}
+
+/* ----------------------------------------------------------------------------
Initialise a generation that is to be collected
------------------------------------------------------------------------- */
@@ -1331,11 +1412,17 @@ prepare_collected_gen (generation *gen)
gen_workspace *ws;
bdescr *bd, *next;
- // Throw away the current mutable list. Invariant: the mutable
- // list always has at least one block; this means we can avoid a
- // check for NULL in recordMutable().
g = gen->no;
- if (g != 0) {
+
+ if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
+ // Nonmoving heap's mutable list is always a root.
+ for (i = 0; i < n_capabilities; i++) {
+ stash_mut_list(capabilities[i], g);
+ }
+ } else if (g != 0) {
+ // Otherwise throw away the current mutable list. Invariant: the
+ // mutable list always has at least one block; this means we can avoid
+ // a check for NULL in recordMutable().
for (i = 0; i < n_capabilities; i++) {
freeChain(capabilities[i]->mut_lists[g]);
capabilities[i]->mut_lists[g] =
@@ -1351,13 +1438,17 @@ prepare_collected_gen (generation *gen)
gen->old_threads = gen->threads;
gen->threads = END_TSO_QUEUE;
- // deprecate the existing blocks
- gen->old_blocks = gen->blocks;
- gen->n_old_blocks = gen->n_blocks;
- gen->blocks = NULL;
- gen->n_blocks = 0;
- gen->n_words = 0;
- gen->live_estimate = 0;
+ // deprecate the existing blocks (except in the case of the nonmoving
+ // collector since these will be preserved in nonmovingCollect for the
+ // concurrent GC).
+ if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
+ gen->old_blocks = gen->blocks;
+ gen->n_old_blocks = gen->n_blocks;
+ gen->blocks = NULL;
+ gen->n_blocks = 0;
+ gen->n_words = 0;
+ gen->live_estimate = 0;
+ }
// initialise the large object queues.
ASSERT(gen->scavenged_large_objects == NULL);
@@ -1451,18 +1542,6 @@ prepare_collected_gen (generation *gen)
}
}
-
-/* ----------------------------------------------------------------------------
- Save the mutable lists in saved_mut_lists
- ------------------------------------------------------------------------- */
-
-static void
-stash_mut_list (Capability *cap, uint32_t gen_no)
-{
- cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
-}
-
/* ----------------------------------------------------------------------------
Initialise a generation that is *not* to be collected
------------------------------------------------------------------------- */
@@ -1531,31 +1610,57 @@ collect_gct_blocks (void)
}
/* -----------------------------------------------------------------------------
- During mutation, any blocks that are filled by allocatePinned() are
- stashed on the local pinned_object_blocks list, to avoid needing to
- take a global lock. Here we collect those blocks from the
- cap->pinned_object_blocks lists and put them on the
- main g0->large_object list.
+ During mutation, any blocks that are filled by allocatePinned() are stashed
+ on the local pinned_object_blocks list, to avoid needing to take a global
+ lock. Here we collect those blocks from the cap->pinned_object_blocks lists
+ and put them on the g0->large_object or oldest_gen->large_objects.
+
+ How to decide which list to put them on?
+
+ - When non-moving heap is enabled and this is a major GC, we put them on
+ oldest_gen. This is because after preparation we really want no
+ old-to-young references, and we want to be able to reset mut_lists. For
+ this we need to promote every potentially live object to the oldest gen.
+
+ - Otherwise we put them on g0.
-------------------------------------------------------------------------- */
static void
collect_pinned_object_blocks (void)
{
- uint32_t n;
- bdescr *bd, *prev;
+ generation *gen;
+ const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
+ if (use_nonmoving && major_gc) {
+ gen = oldest_gen;
+ } else {
+ gen = g0;
+ }
- for (n = 0; n < n_capabilities; n++) {
- prev = NULL;
- for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
- prev = bd;
+ for (uint32_t n = 0; n < n_capabilities; n++) {
+ bdescr *last = NULL;
+ if (use_nonmoving && gen == oldest_gen) {
+ // Mark objects as belonging to the nonmoving heap
+ for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+ bd->flags |= BF_NONMOVING;
+ bd->gen = oldest_gen;
+ bd->gen_no = oldest_gen->no;
+ oldest_gen->n_large_words += bd->free - bd->start;
+ oldest_gen->n_large_blocks += bd->blocks;
+ last = bd;
+ }
+ } else {
+ for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+ last = bd;
+ }
}
- if (prev != NULL) {
- prev->link = g0->large_objects;
- if (g0->large_objects != NULL) {
- g0->large_objects->u.back = prev;
+
+ if (last != NULL) {
+ last->link = gen->large_objects;
+ if (gen->large_objects != NULL) {
+ gen->large_objects->u.back = last;
}
- g0->large_objects = capabilities[n]->pinned_object_blocks;
- capabilities[n]->pinned_object_blocks = 0;
+ gen->large_objects = capabilities[n]->pinned_object_blocks;
+ capabilities[n]->pinned_object_blocks = NULL;
}
}
}
@@ -1614,98 +1719,100 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root)
percentage of the maximum heap size available to allocate into.
------------------------------------------------------------------------- */
-static void
-resize_generations (void)
+void
+resizeGenerations (void)
{
uint32_t g;
+ W_ live, size, min_alloc, words;
+ const W_ max = RtsFlags.GcFlags.maxHeapSize;
+ const W_ gens = RtsFlags.GcFlags.generations;
- if (major_gc && RtsFlags.GcFlags.generations > 1) {
- W_ live, size, min_alloc, words;
- const W_ max = RtsFlags.GcFlags.maxHeapSize;
- const W_ gens = RtsFlags.GcFlags.generations;
-
- // live in the oldest generations
- if (oldest_gen->live_estimate != 0) {
- words = oldest_gen->live_estimate;
- } else {
- words = oldest_gen->n_words;
- }
- live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
- oldest_gen->n_large_blocks +
- oldest_gen->n_compact_blocks;
+ // live in the oldest generations
+ if (oldest_gen->live_estimate != 0) {
+ words = oldest_gen->live_estimate;
+ } else {
+ words = oldest_gen->n_words;
+ }
+ live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
+ oldest_gen->n_large_blocks +
+ oldest_gen->n_compact_blocks;
- // default max size for all generations except zero
- size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
- RtsFlags.GcFlags.minOldGenSize);
+ // default max size for all generations except zero
+ size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
+ RtsFlags.GcFlags.minOldGenSize);
- if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
- if (max > 0) {
- RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
- } else {
- RtsFlags.GcFlags.heapSizeSuggestion = size;
- }
+ if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
+ if (max > 0) {
+ RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
+ } else {
+ RtsFlags.GcFlags.heapSizeSuggestion = size;
}
+ }
- // minimum size for generation zero
- min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
- RtsFlags.GcFlags.minAllocAreaSize
- * (W_)n_capabilities);
-
- // Auto-enable compaction when the residency reaches a
- // certain percentage of the maximum heap size (default: 30%).
- if (RtsFlags.GcFlags.compact ||
- (max > 0 &&
- oldest_gen->n_blocks >
- (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
- oldest_gen->mark = 1;
- oldest_gen->compact = 1;
+ // minimum size for generation zero
+ min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
+ RtsFlags.GcFlags.minAllocAreaSize
+ * (W_)n_capabilities);
+
+ // Auto-enable compaction when the residency reaches a
+ // certain percentage of the maximum heap size (default: 30%).
+ // Except when non-moving GC is enabled.
+ if (!RtsFlags.GcFlags.useNonmoving &&
+ (RtsFlags.GcFlags.compact ||
+ (max > 0 &&
+ oldest_gen->n_blocks >
+ (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
+ oldest_gen->mark = 1;
+ oldest_gen->compact = 1;
// debugBelch("compaction: on\n", live);
- } else {
- oldest_gen->mark = 0;
- oldest_gen->compact = 0;
+ } else {
+ oldest_gen->mark = 0;
+ oldest_gen->compact = 0;
// debugBelch("compaction: off\n", live);
- }
+ }
- if (RtsFlags.GcFlags.sweep) {
- oldest_gen->mark = 1;
- }
+ if (RtsFlags.GcFlags.sweep) {
+ oldest_gen->mark = 1;
+ }
- // if we're going to go over the maximum heap size, reduce the
- // size of the generations accordingly. The calculation is
- // different if compaction is turned on, because we don't need
- // to double the space required to collect the old generation.
- if (max != 0) {
+ // if we're going to go over the maximum heap size, reduce the
+ // size of the generations accordingly. The calculation is
+ // different if compaction is turned on, because we don't need
+ // to double the space required to collect the old generation.
+ if (max != 0) {
+
+ // this test is necessary to ensure that the calculations
+ // below don't have any negative results - we're working
+ // with unsigned values here.
+ if (max < min_alloc) {
+ heapOverflow();
+ }
- // this test is necessary to ensure that the calculations
- // below don't have any negative results - we're working
- // with unsigned values here.
- if (max < min_alloc) {
- heapOverflow();
+ if (oldest_gen->compact) {
+ if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
+ size = (max - min_alloc) / ((gens - 1) * 2 - 1);
}
-
- if (oldest_gen->compact) {
- if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
- size = (max - min_alloc) / ((gens - 1) * 2 - 1);
- }
- } else {
- if ( (size * (gens - 1) * 2) + min_alloc > max ) {
- size = (max - min_alloc) / ((gens - 1) * 2);
- }
+ } else {
+ if ( (size * (gens - 1) * 2) + min_alloc > max ) {
+ size = (max - min_alloc) / ((gens - 1) * 2);
}
+ }
- if (size < live) {
- heapOverflow();
- }
+ if (size < live) {
+ heapOverflow();
}
+ }
#if 0
- debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
- min_alloc, size, max);
+ debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
+ min_alloc, size, max);
+ debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n",
+ oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks);
+ debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks);
#endif
- for (g = 0; g < gens; g++) {
- generations[g].max_blocks = size;
- }
+ for (g = 0; g < gens; g++) {
+ generations[g].max_blocks = size;
}
}
@@ -1841,21 +1948,16 @@ resize_nursery (void)
#if defined(DEBUG)
-static void gcCAFs(void)
+void gcCAFs(void)
{
- StgIndStatic *p, *prev;
+ uint32_t i = 0;
+ StgIndStatic *prev = NULL;
- const StgInfoTable *info;
- uint32_t i;
-
- i = 0;
- p = debug_caf_list;
- prev = NULL;
-
- for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
- p = (StgIndStatic*)p->saved_info) {
-
- info = get_itbl((StgClosure*)p);
+ for (StgIndStatic *p = debug_caf_list;
+ p != (StgIndStatic*) END_OF_CAF_LIST;
+ p = (StgIndStatic*) p->saved_info)
+ {
+ const StgInfoTable *info = get_itbl((StgClosure*)p);
ASSERT(info->type == IND_STATIC);
// See Note [STATIC_LINK fields] in Storage.h
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 43cc4ca8a1..bde006913b 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -17,9 +17,12 @@
#include "HeapAlloc.h"
-void GarbageCollect (uint32_t force_major_gc,
+void GarbageCollect (uint32_t collect_gen,
bool do_heap_census,
- uint32_t gc_type, Capability *cap, bool idle_cap[]);
+ bool deadlock_detect,
+ uint32_t gc_type,
+ Capability *cap,
+ bool idle_cap[]);
typedef void (*evac_fn)(void *user, StgClosure **root);
@@ -30,6 +33,8 @@ bool doIdleGCWork(Capability *cap, bool all);
extern uint32_t N;
extern bool major_gc;
+/* See Note [Deadlock detection under nonmoving collector]. */
+extern bool deadlock_detect_gc;
extern bdescr *mark_stack_bd;
extern bdescr *mark_stack_top_bd;
@@ -55,6 +60,8 @@ void gcWorkerThread (Capability *cap);
void initGcThreads (uint32_t from, uint32_t to);
void freeGcThreads (void);
+void resizeGenerations (void);
+
#if defined(THREADED_RTS)
void waitForGcThreads (Capability *cap, bool idle_cap[]);
void releaseGCThreads (Capability *cap, bool idle_cap[]);
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 650dc2c1df..11080c1f22 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -60,6 +60,14 @@ isAlive(StgClosure *p)
// ignore closures in generations that we're not collecting.
bd = Bdescr((P_)q);
+ // isAlive is used when scavenging moving generations, before the mark
+ // phase. Because we don't know alive-ness of objects before the mark phase
+ // we have to conservatively treat objects in the non-moving generation as
+ // alive here.
+ if (bd->flags & BF_NONMOVING) {
+ return p;
+ }
+
// if it's a pointer into to-space, then we're done
if (bd->flags & BF_EVACUATED) {
return p;
@@ -140,14 +148,14 @@ markCAFs (evac_fn evac, void *user)
StgIndStatic *c;
for (c = dyn_caf_list;
- c != (StgIndStatic*)END_OF_CAF_LIST;
+ ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
evac(user, &c->indirectee);
}
for (c = revertible_caf_list;
- c != (StgIndStatic*)END_OF_CAF_LIST;
+ ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index 66f7a7f84f..3012f52f28 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -83,6 +83,7 @@ typedef struct gen_workspace_ {
bdescr * todo_bd;
StgPtr todo_free; // free ptr for todo_bd
StgPtr todo_lim; // lim for todo_bd
+ struct NonmovingSegment *todo_seg; // only available for oldest gen workspace
WSDeque * todo_q;
bdescr * todo_overflow;
@@ -100,9 +101,6 @@ typedef struct gen_workspace_ {
bdescr * part_list;
StgWord n_part_blocks; // count of above
StgWord n_part_words;
-
- StgWord pad[1];
-
} gen_workspace ATTRIBUTE_ALIGNED(64);
// align so that computing gct->gens[n] is a shift, not a multiply
// fails if the size is <64, which is why we need the pad above
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
new file mode 100644
index 0000000000..50cf784aab
--- /dev/null
+++ b/rts/sm/NonMoving.c
@@ -0,0 +1,1390 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "Capability.h"
+#include "Printer.h"
+#include "Storage.h"
+// We call evacuate, which expects the thread-local gc_thread to be valid;
+// This is sometimes declared as a register variable therefore it is necessary
+// to include the declaration so that the compiler doesn't clobber the register.
+#include "GCThread.h"
+#include "GCTDecl.h"
+#include "Schedule.h"
+
+#include "NonMoving.h"
+#include "NonMovingMark.h"
+#include "NonMovingSweep.h"
+#include "NonMovingCensus.h"
+#include "StablePtr.h" // markStablePtrTable
+#include "Schedule.h" // markScheduler
+#include "Weak.h" // dead_weak_ptr_list
+
+struct NonmovingHeap nonmovingHeap;
+
+uint8_t nonmovingMarkEpoch = 1;
+
+static void nonmovingBumpEpoch(void) {
+ nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
+}
+
+#if defined(THREADED_RTS)
+/*
+ * This mutex ensures that only one non-moving collection is active at a time.
+ */
+Mutex nonmoving_collection_mutex;
+
+OSThreadId mark_thread;
+bool concurrent_coll_running = false;
+Condition concurrent_coll_finished;
+Mutex concurrent_coll_finished_lock;
+#endif
+
+/*
+ * Note [Non-moving garbage collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The sources rts/NonMoving*.c implement GHC's non-moving garbage collector
+ * for the oldest generation. In contrast to the throughput-oriented moving
+ * collector, the non-moving collector is designed to achieve low GC latencies
+ * on large heaps. It accomplishes low-latencies by way of a concurrent
+ * mark-and-sweep collection strategy on a specially-designed heap structure.
+ * While the design is described in detail in the design document found in
+ * docs/storage/nonmoving-gc, we briefly summarize the structure here.
+ *
+ *
+ * === Heap Structure ===
+ *
+ * The nonmoving heap (embodied by struct NonmovingHeap) consists of a family
+ * of allocators, each serving a range of allocation sizes. Each allocator
+ * consists of a set of *segments*, each of which contain fixed-size *blocks*
+ * (not to be confused with "blocks" provided by GHC's block allocator; this is
+ * admittedly an unfortunate overlap in terminology). These blocks are the
+ * backing store for the allocator. In addition to blocks, the segment also
+ * contains some header information (see struct NonmovingSegment in
+ * NonMoving.h). This header contains a *bitmap* encoding one byte per block
+ * (used by the collector to record liveness), as well as the index of the next
+ * unallocated block (and a *snapshot* of this field which will be described in
+ * the next section).
+ *
+ * Each allocator maintains three sets of segments:
+ *
+ * - A *current* segment for each capability; this is the segment which that
+ * capability will allocate into.
+ *
+ * - A pool of *active* segments, each of which containing at least one
+ * unallocated block. The allocate will take a segment from this pool when
+ * it fills its *current* segment.
+ *
+ * - A set of *filled* segments, which contain no unallocated blocks and will
+ * be collected during the next major GC cycle
+ *
+ * Storage for segments is allocated using the block allocator using an aligned
+ * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating
+ * the segment header for a clone a simple matter of bit-masking (as
+ * implemented by nonmovingGetSegment).
+ *
+ * In addition, to relieve pressure on the block allocator we keep a small pool
+ * of free blocks around (nonmovingHeap.free) which can be pushed/popped
+ * to/from in a lock-free manner.
+ *
+ *
+ * === Allocation ===
+ *
+ * The allocator (as implemented by nonmovingAllocate) starts by identifying
+ * which allocator the request should be made against. It then allocates into
+ * its local current segment and bumps the next_free pointer to point to the
+ * next unallocated block (as indicated by the bitmap). If it finds the current
+ * segment is now full it moves it to the filled list and looks for a new
+ * segment to make current from a few sources:
+ *
+ * 1. the allocator's active list (see pop_active_segment)
+ * 2. the nonmoving heap's free block pool (see nonmovingPopFreeSegment)
+ * 3. allocate a new segment from the block allocator (see
+ * nonmovingAllocSegment)
+ *
+ * Note that allocation does *not* involve modifying the bitmap. The bitmap is
+ * only modified by the collector.
+ *
+ *
+ * === Snapshot invariant ===
+ *
+ * To safely collect in a concurrent setting, the collector relies on the
+ * notion of a *snapshot*. The snapshot is a hypothetical frozen state of the
+ * heap topology taken at the beginning of the major collection cycle.
+ * With this definition we require the following property of the mark phase,
+ * which we call the *snapshot invariant*,
+ *
+ * All objects that were reachable at the time the snapshot was collected
+ * must have their mark bits set at the end of the mark phase.
+ *
+ * As the mutator might change the topology of the heap while we are marking
+ * this property requires some cooperation from the mutator to maintain.
+ * Specifically, we rely on a write barrier as described in Note [Update
+ * remembered set].
+ *
+ * To determine which objects were existent when the snapshot was taken we
+ * record a snapshot of each segments next_free pointer at the beginning of
+ * collection.
+ *
+ *
+ * === Collection ===
+ *
+ * Collection happens in a few phases some of which occur during a
+ * stop-the-world period (marked with [STW]) and others which can occur
+ * concurrently with mutation and minor collection (marked with [CONC]):
+ *
+ * 1. [STW] Preparatory GC: Here we do a standard minor collection of the
+ * younger generations (which may evacuate things to the nonmoving heap).
+ * References from younger generations into the nonmoving heap are recorded
+ * in the mark queue (see Note [Aging under the non-moving collector] in
+ * this file).
+ *
+ * 2. [STW] Snapshot update: Here we update the segment snapshot metadata
+ * (see nonmovingPrepareMark) and move the filled segments to
+ * nonmovingHeap.sweep_list, which is the set of segments which we will
+ * sweep this GC cycle.
+ *
+ * 3. [STW] Root collection: Here we walk over a variety of root sources
+ * and add them to the mark queue (see nonmovingCollect).
+ *
+ * 4. [CONC] Concurrent marking: Here we do the majority of marking concurrently
+ * with mutator execution (but with the write barrier enabled; see
+ * Note [Update remembered set]).
+ *
+ * 5. [STW] Final sync: Here we interrupt the mutators, ask them to
+ * flush their final update remembered sets, and mark any new references
+ * we find.
+ *
+ * 6. [CONC] Sweep: Here we walk over the nonmoving segments on sweep_list
+ * and place them back on either the active, current, or filled list,
+ * depending upon how much live data they contain.
+ *
+ *
+ * === Marking ===
+ *
+ * Ignoring large and static objects, marking a closure is fairly
+ * straightforward (implemented in NonMovingMark.c:mark_closure):
+ *
+ * 1. Check whether the closure is in the non-moving generation; if not then
+ * we ignore it.
+ * 2. Find the segment containing the closure's block.
+ * 3. Check whether the closure's block is above $seg->next_free_snap; if so
+ * then the block was not allocated when we took the snapshot and therefore
+ * we don't need to mark it.
+ * 4. Check whether the block's bitmap bits is equal to nonmovingMarkEpoch. If
+ * so then we can stop as we have already marked it.
+ * 5. Push the closure's pointers to the mark queue.
+ * 6. Set the blocks bitmap bits to nonmovingMarkEpoch.
+ *
+ * Note that the ordering of (5) and (6) is rather important, as described in
+ * Note [StgStack dirtiness flags and concurrent marking].
+ *
+ *
+ * === Other references ===
+ *
+ * Apart from the design document in docs/storage/nonmoving-gc and the Ueno
+ * 2016 paper (TODO citation) from which it drew inspiration, there are a
+ * variety of other relevant Notes scattered throughout the tree:
+ *
+ * - Note [Concurrent non-moving collection] (NonMoving.c) describes
+ * concurrency control of the nonmoving collector
+ *
+ * - Note [Live data accounting in nonmoving collector] (NonMoving.c)
+ * describes how we track the quantity of live data in the nonmoving
+ * generation.
+ *
+ * - Note [Aging under the non-moving collector] (NonMoving.c) describes how
+ * we accomodate aging
+ *
+ * - Note [Large objects in the non-moving collector] (NonMovingMark.c)
+ * describes how we track large objects.
+ *
+ * - Note [Update remembered set] (NonMovingMark.c) describes the function and
+ * implementation of the update remembered set used to realize the concurrent
+ * write barrier.
+ *
+ * - Note [Concurrent read barrier on deRefWeak#] (NonMovingMark.c) describes
+ * the read barrier on Weak# objects.
+ *
+ * - Note [Unintentional marking in resurrectThreads] (NonMovingMark.c) describes
+ * a tricky interaction between the update remembered set flush and weak
+ * finalization.
+ *
+ * - Note [Origin references in the nonmoving collector] (NonMovingMark.h)
+ * describes how we implement indirection short-cutting and the selector
+ * optimisation.
+ *
+ * - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes
+ * the protocol for concurrent marking of stacks.
+ *
+ * - Note [Static objects under the nonmoving collector] (Storage.c) describes
+ * treatment of static objects.
+ *
+ *
+ * Note [Concurrent non-moving collection]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Concurrency-control of non-moving garbage collection is a bit tricky. There
+ * are a few things to keep in mind:
+ *
+ * - Only one non-moving collection may be active at a time. This is enforced by the
+ * concurrent_coll_running flag, which is set when a collection is on-going. If
+ * we attempt to initiate a new collection while this is set we wait on the
+ * concurrent_coll_finished condition variable, which signals when the
+ * active collection finishes.
+ *
+ * - In between the mark and sweep phases the non-moving collector must synchronize
+ * with mutator threads to collect and mark their final update remembered
+ * sets. This is accomplished using
+ * stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held
+ * the final mark has concluded.
+ *
+ * Note that possibility of concurrent minor and non-moving collections
+ * requires that we handle static objects a bit specially. See
+ * Note [Static objects under the nonmoving collector] in Storage.c
+ * for details.
+ *
+ *
+ * Note [Aging under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The initial design of the non-moving collector mandated that all live data
+ * be evacuated to the non-moving heap prior to a major collection. This
+ * simplified certain bits of implementation and eased reasoning. However, it
+ * was (unsurprisingly) also found to result in significant amounts of
+ * unnecessary copying.
+ *
+ * Consequently, we now allow aging. Aging allows the preparatory GC leading up
+ * to a major collection to evacuate some objects into the young generation.
+ * However, this introduces the following tricky case that might arise after
+ * we have finished the preparatory GC:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * B ←────────────── A ←─────────────── root
+ * │ ┆ ↖─────────────── gen1 mut_list
+ * ╰───────────────→ C
+ * ┆
+ *
+ * In this case C is clearly live, but the non-moving collector can only see
+ * this by walking through B, which lives in the moving heap. However, doing so
+ * would require that we synchronize with the mutator/minor GC to ensure that it
+ * isn't in the middle of moving B. What to do?
+ *
+ * The solution we use here is to teach the preparatory moving collector to
+ * "evacuate" objects it encounters in the non-moving heap by adding them to
+ * the mark queue. This is implemented by pushing the object to the update
+ * remembered set of the capability held by the evacuating gc_thread
+ * (implemented by markQueuePushClosureGC)
+ *
+ * Consequently collection of the case above would proceed as follows:
+ *
+ * 1. Initial state:
+ * * A lives in the non-moving heap and is reachable from the root set
+ * * A is on the oldest generation's mut_list, since it contains a pointer
+ * to B, which lives in a younger generation
+ * * B lives in the moving collector's from space
+ * * C lives in the non-moving heap
+ *
+ * 2. Preparatory GC: Scavenging mut_lists:
+ *
+ * The mut_list of the oldest generation is scavenged, resulting in B being
+ * evacuated (aged) into the moving collector's to-space.
+ *
+ * 3. Preparatory GC: Scavenge B
+ *
+ * B (now in to-space) is scavenged, resulting in evacuation of C.
+ * evacuate(C) pushes a reference to C to the mark queue.
+ *
+ * 4. Non-moving GC: C is marked
+ *
+ * The non-moving collector will come to C in the mark queue and mark it.
+ *
+ *
+ * Note [Deadlock detection under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In GHC the garbage collector is responsible for identifying deadlocked
+ * programs. Providing for this responsibility is slightly tricky in the
+ * non-moving collector due to the existence of aging. In particular, the
+ * non-moving collector cannot traverse objects living in a young generation
+ * but reachable from the non-moving generation, as described in Note [Aging
+ * under the non-moving collector].
+ *
+ * However, this can pose trouble for deadlock detection since it means that we
+ * may conservatively mark dead closures as live. Consider this case:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * MVAR_QUEUE ←───── TSO ←───────────── gen1 mut_list
+ * ↑ │ ╰────────↗ │
+ * │ │ ┆ │
+ * │ │ ┆ ↓
+ * │ ╰──────────→ MVAR
+ * ╰─────────────────╯
+ * ┆
+ *
+ * In this case we have a TSO blocked on a dead MVar. Because the MVAR_TSO_QUEUE on
+ * which it is blocked lives in the moving heap, the TSO is necessarily on the
+ * oldest generation's mut_list. As in Note [Aging under the non-moving
+ * collector], the MVAR_TSO_QUEUE will be evacuated. If MVAR_TSO_QUEUE is aged
+ * (e.g. evacuated to the young generation) then the MVAR will be added to the
+ * mark queue. Consequently, we will falsely conclude that the MVAR is still
+ * alive and fail to spot the deadlock.
+ *
+ * To avoid this sort of situation we disable aging when we are starting a
+ * major GC specifically for deadlock detection (as done by
+ * scheduleDetectDeadlock). This condition is recorded by the
+ * deadlock_detect_gc global variable declared in GC.h. Setting this has a few
+ * effects on the preparatory GC:
+ *
+ * - Evac.c:alloc_for_copy forces evacuation to the non-moving generation.
+ *
+ * - The evacuation logic usually responsible for pushing objects living in
+ * the non-moving heap to the mark queue is disabled. This is safe because
+ * we know that all live objects will be in the non-moving heap by the end
+ * of the preparatory moving collection.
+ *
+ *
+ * Note [Live data accounting in nonmoving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The nonmoving collector uses an approximate heuristic for reporting live
+ * data quantity. Specifically, during mark we record how much live data we
+ * find in nonmoving_live_words. At the end of mark we declare this amount to
+ * be how much live data we have on in the nonmoving heap (by setting
+ * oldest_gen->live_estimate).
+ *
+ * In addition, we update oldest_gen->live_estimate every time we fill a
+ * segment. This, as well, is quite approximate: we assume that all blocks
+ * above next_free_next are newly-allocated. In principle we could refer to the
+ * bitmap to count how many blocks we actually allocated but this too would be
+ * approximate due to concurrent collection and ultimately seems more costly
+ * than the problem demands.
+ *
+ */
+
+memcount nonmoving_live_words = 0;
+
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *mark_queue);
+#endif
+static void nonmovingClearBitmap(struct NonmovingSegment *seg);
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
+
+static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size)
+{
+ bdescr *bd = Bdescr((P_) seg);
+ seg->link = NULL;
+ seg->todo_link = NULL;
+ seg->next_free = 0;
+ nonmovingClearBitmap(seg);
+ bd->nonmoving_segment.log_block_size = log_block_size;
+ bd->nonmoving_segment.next_free_snap = 0;
+ bd->u.scan = nonmovingSegmentGetBlock(seg, 0);
+}
+
+// Add a segment to the free list.
+void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
+{
+ // See Note [Live data accounting in nonmoving collector].
+ if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) {
+ bdescr *bd = Bdescr((StgPtr) seg);
+ ACQUIRE_SM_LOCK;
+ ASSERT(oldest_gen->n_blocks >= bd->blocks);
+ ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks);
+ oldest_gen->n_blocks -= bd->blocks;
+ oldest_gen->n_words -= BLOCK_SIZE_W * bd->blocks;
+ freeGroup(bd);
+ RELEASE_SM_LOCK;
+ return;
+ }
+
+ while (true) {
+ struct NonmovingSegment *old = nonmovingHeap.free;
+ seg->link = old;
+ if (cas((StgVolatilePtr) &nonmovingHeap.free, (StgWord) old, (StgWord) seg) == (StgWord) old)
+ break;
+ }
+ __sync_add_and_fetch(&nonmovingHeap.n_free, 1);
+}
+
+static struct NonmovingSegment *nonmovingPopFreeSegment(void)
+{
+ while (true) {
+ struct NonmovingSegment *seg = nonmovingHeap.free;
+ if (seg == NULL) {
+ return NULL;
+ }
+ if (cas((StgVolatilePtr) &nonmovingHeap.free,
+ (StgWord) seg,
+ (StgWord) seg->link) == (StgWord) seg) {
+ __sync_sub_and_fetch(&nonmovingHeap.n_free, 1);
+ return seg;
+ }
+ }
+}
+
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
+{
+ // We compute the overwhelmingly common size cases directly to avoid a very
+ // expensive integer division.
+ switch (log_block_size) {
+ case 3: return nonmovingBlockCount(3);
+ case 4: return nonmovingBlockCount(4);
+ case 5: return nonmovingBlockCount(5);
+ case 6: return nonmovingBlockCount(6);
+ case 7: return nonmovingBlockCount(7);
+ default: return nonmovingBlockCount(log_block_size);
+ }
+}
+
+/*
+ * Request a fresh segment from the free segment list or allocate one of the
+ * given node.
+ *
+ * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock
+ * under the assumption that we are in a GC context).
+ */
+static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
+{
+ // First try taking something off of the free list
+ struct NonmovingSegment *ret;
+ ret = nonmovingPopFreeSegment();
+
+ // Nothing in the free list, allocate a new segment...
+ if (ret == NULL) {
+ // Take gc spinlock: another thread may be scavenging a moving
+ // generation and call `todo_block_full`
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
+ // See Note [Live data accounting in nonmoving collector].
+ oldest_gen->n_blocks += bd->blocks;
+ oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+
+ for (StgWord32 i = 0; i < bd->blocks; ++i) {
+ initBdescr(&bd[i], oldest_gen, oldest_gen);
+ bd[i].flags = BF_NONMOVING;
+ }
+ ret = (struct NonmovingSegment *)bd->start;
+ }
+
+ // Check alignment
+ ASSERT(((uintptr_t)ret % NONMOVING_SEGMENT_SIZE) == 0);
+ return ret;
+}
+
+static inline unsigned long log2_floor(unsigned long x)
+{
+ return sizeof(unsigned long)*8 - 1 - __builtin_clzl(x);
+}
+
+static inline unsigned long log2_ceil(unsigned long x)
+{
+ unsigned long log = log2_floor(x);
+ return (x - (1 << log)) ? log + 1 : log;
+}
+
+// Advance a segment's next_free pointer. Returns true if segment if full.
+static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count)
+{
+ const uint8_t *bitmap = seg->bitmap;
+ ASSERT(blk_count == nonmovingSegmentBlockCount(seg));
+#if defined(NAIVE_ADVANCE_FREE)
+ // reference implementation
+ for (unsigned int i = seg->next_free+1; i < blk_count; i++) {
+ if (!bitmap[i]) {
+ seg->next_free = i;
+ return false;
+ }
+ }
+ seg->next_free = blk_count;
+ return true;
+#else
+ const uint8_t *c = memchr(&bitmap[seg->next_free+1], 0, blk_count - seg->next_free - 1);
+ if (c == NULL) {
+ seg->next_free = blk_count;
+ return true;
+ } else {
+ seg->next_free = c - bitmap;
+ return false;
+ }
+#endif
+}
+
+static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca)
+{
+ while (true) {
+ struct NonmovingSegment *seg = alloca->active;
+ if (seg == NULL) {
+ return NULL;
+ }
+ if (cas((StgVolatilePtr) &alloca->active,
+ (StgWord) seg,
+ (StgWord) seg->link) == (StgWord) seg) {
+ return seg;
+ }
+ }
+}
+
+/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
+GNUC_ATTR_HOT
+void *nonmovingAllocate(Capability *cap, StgWord sz)
+{
+ unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
+ unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
+
+ // The max we ever allocate is 3276 bytes (anything larger is a large
+ // object and not moved) which is covered by allocator 9.
+ ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
+
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
+
+ // Allocate into current segment
+ struct NonmovingSegment *current = alloca->current[cap->no];
+ ASSERT(current); // current is never NULL
+ void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
+ ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
+
+ // Advance the current segment's next_free or allocate a new segment if full
+ bool full = advance_next_free(current, block_count);
+ if (full) {
+ // Current segment is full: update live data estimate link it to
+ // filled, take an active segment if one exists, otherwise allocate a
+ // new segment.
+
+ // Update live data estimate.
+ // See Note [Live data accounting in nonmoving collector].
+ unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap;
+ unsigned int block_size = 1 << log_block_size;
+ atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
+
+ // push the current segment to the filled list
+ nonmovingPushFilledSegment(current);
+
+ // first look for a new segment in the active list
+ struct NonmovingSegment *new_current = pop_active_segment(alloca);
+
+ // there are no active segments, allocate new segment
+ if (new_current == NULL) {
+ new_current = nonmovingAllocSegment(cap->node);
+ nonmovingInitSegment(new_current, log_block_size);
+ }
+
+ // make it current
+ new_current->link = NULL;
+ alloca->current[cap->no] = new_current;
+ }
+
+ return ret;
+}
+
+/* Allocate a nonmovingAllocator */
+static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
+{
+ size_t allocator_sz =
+ sizeof(struct NonmovingAllocator) +
+ sizeof(void*) * n_caps; // current segment pointer for each capability
+ struct NonmovingAllocator *alloc =
+ stgMallocBytes(allocator_sz, "nonmovingInit");
+ memset(alloc, 0, allocator_sz);
+ return alloc;
+}
+
+static void free_nonmoving_allocator(struct NonmovingAllocator *alloc)
+{
+ stgFree(alloc);
+}
+
+void nonmovingInit(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_collection_mutex);
+ initCondition(&concurrent_coll_finished);
+ initMutex(&concurrent_coll_finished_lock);
+#endif
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities);
+ }
+ nonmovingMarkInitUpdRemSet();
+}
+
+// Stop any nonmoving collection in preparation for RTS shutdown.
+void nonmovingStop(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ if (mark_thread) {
+ debugTrace(DEBUG_nonmoving_gc,
+ "waiting for nonmoving collector thread to terminate");
+ ACQUIRE_LOCK(&concurrent_coll_finished_lock);
+ waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+ }
+#endif
+}
+
+void nonmovingExit(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+
+ // First make sure collector is stopped before we tear things down.
+ nonmovingStop();
+
+#if defined(THREADED_RTS)
+ closeMutex(&concurrent_coll_finished_lock);
+ closeCondition(&concurrent_coll_finished);
+ closeMutex(&nonmoving_collection_mutex);
+#endif
+
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ free_nonmoving_allocator(nonmovingHeap.allocators[i]);
+ }
+}
+
+/*
+ * Assumes that no garbage collector or mutator threads are running to safely
+ * resize the nonmoving_allocators.
+ *
+ * Must hold sm_mutex.
+ */
+void nonmovingAddCapabilities(uint32_t new_n_caps)
+{
+ unsigned int old_n_caps = nonmovingHeap.n_caps;
+ struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
+
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ struct NonmovingAllocator *old = allocs[i];
+ allocs[i] = alloc_nonmoving_allocator(new_n_caps);
+
+ // Copy the old state
+ allocs[i]->filled = old->filled;
+ allocs[i]->active = old->active;
+ for (unsigned int j = 0; j < old_n_caps; j++) {
+ allocs[i]->current[j] = old->current[j];
+ }
+ stgFree(old);
+
+ // Initialize current segments for the new capabilities
+ for (unsigned int j = old_n_caps; j < new_n_caps; j++) {
+ allocs[i]->current[j] = nonmovingAllocSegment(capabilities[j]->node);
+ nonmovingInitSegment(allocs[i]->current[j], NONMOVING_ALLOCA0 + i);
+ allocs[i]->current[j]->link = NULL;
+ }
+ }
+ nonmovingHeap.n_caps = new_n_caps;
+}
+
+static inline void nonmovingClearBitmap(struct NonmovingSegment *seg)
+{
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ memset(seg->bitmap, 0, n);
+}
+
+/* Prepare the heap bitmaps and snapshot metadata for a mark */
+static void nonmovingPrepareMark(void)
+{
+ // See Note [Static objects under the nonmoving collector].
+ prev_static_flag = static_flag;
+ static_flag =
+ static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
+
+ // Should have been cleared by the last sweep
+ ASSERT(nonmovingHeap.sweep_list == NULL);
+
+ nonmovingBumpEpoch();
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+
+ // Update current segments' snapshot pointers
+ for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
+ struct NonmovingSegment *seg = alloca->current[cap_n];
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
+ }
+
+ // Update filled segments' snapshot pointers and move to sweep_list
+ uint32_t n_filled = 0;
+ struct NonmovingSegment *const filled = alloca->filled;
+ alloca->filled = NULL;
+ if (filled) {
+ struct NonmovingSegment *seg = filled;
+ while (true) {
+ n_filled++;
+ prefetchForRead(seg->link);
+ // Clear bitmap
+ prefetchForWrite(seg->link->bitmap);
+ nonmovingClearBitmap(seg);
+ // Set snapshot
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
+ if (seg->link)
+ seg = seg->link;
+ else
+ break;
+ }
+ // add filled segments to sweep_list
+ seg->link = nonmovingHeap.sweep_list;
+ nonmovingHeap.sweep_list = filled;
+ }
+
+ // N.B. It's not necessary to update snapshot pointers of active segments;
+ // they were set after they were swept and haven't seen any allocation
+ // since.
+ }
+
+ // Clear large object bits of existing large objects
+ for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_MARKED;
+ }
+
+ // Add newly promoted large objects and clear mark bits
+ bdescr *next;
+ ASSERT(oldest_gen->scavenged_large_objects == NULL);
+ for (bdescr *bd = oldest_gen->large_objects; bd; bd = next) {
+ next = bd->link;
+ bd->flags |= BF_NONMOVING_SWEEPING;
+ bd->flags &= ~BF_MARKED;
+ dbl_link_onto(bd, &nonmoving_large_objects);
+ }
+ n_nonmoving_large_blocks += oldest_gen->n_large_blocks;
+ oldest_gen->large_objects = NULL;
+ oldest_gen->n_large_words = 0;
+ oldest_gen->n_large_blocks = 0;
+ nonmoving_live_words = 0;
+
+ // Clear compact object mark bits
+ for (bdescr *bd = nonmoving_compact_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_MARKED;
+ }
+
+ // Move new compact objects from younger generations to nonmoving_compact_objects
+ for (bdescr *bd = oldest_gen->compact_objects; bd; bd = next) {
+ next = bd->link;
+ bd->flags |= BF_NONMOVING_SWEEPING;
+ bd->flags &= ~BF_MARKED;
+ dbl_link_onto(bd, &nonmoving_compact_objects);
+ }
+ n_nonmoving_compact_blocks += oldest_gen->n_compact_blocks;
+ oldest_gen->n_compact_blocks = 0;
+ oldest_gen->compact_objects = NULL;
+ // TODO (osa): what about "in import" stuff??
+
+
+
+#if defined(DEBUG)
+ debug_caf_list_snapshot = debug_caf_list;
+ debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
+#endif
+}
+
+// Mark weak pointers in the non-moving heap. They'll either end up in
+// dead_weak_ptr_list or stay in weak_ptr_list. Either way they need to be kept
+// during sweep. See `MarkWeak.c:markWeakPtrList` for the moving heap variant
+// of this.
+static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list)
+{
+ for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)w);
+ // Do not mark finalizers and values here, those fields will be marked
+ // in `nonmovingMarkDeadWeaks` (for dead weaks) or
+ // `nonmovingTidyWeaks` (for live weaks)
+ }
+
+ // We need to mark dead_weak_ptr_list too. This is subtle:
+ //
+ // - By the beginning of this GC we evacuated all weaks to the non-moving
+ // heap (in `markWeakPtrList`)
+ //
+ // - During the scavenging of the moving heap we discovered that some of
+ // those weaks are dead and moved them to `dead_weak_ptr_list`. Note that
+ // because of the fact above _all weaks_ are in the non-moving heap at
+ // this point.
+ //
+ // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we
+ // need to mark it.
+ for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)w);
+ nonmovingMarkDeadWeak(mark_queue, w);
+ }
+}
+
+void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
+{
+#if defined(THREADED_RTS)
+ // We can't start a new collection until the old one has finished
+ // We also don't run in final GC
+ if (concurrent_coll_running || sched_state > SCHED_RUNNING) {
+ return;
+ }
+#endif
+
+ trace(TRACE_nonmoving_gc, "Starting nonmoving GC preparation");
+ resizeGenerations();
+
+ nonmovingPrepareMark();
+
+ // N.B. These should have been cleared at the end of the last sweep.
+ ASSERT(nonmoving_marked_large_objects == NULL);
+ ASSERT(n_nonmoving_marked_large_blocks == 0);
+ ASSERT(nonmoving_marked_compact_objects == NULL);
+ ASSERT(n_nonmoving_marked_compact_blocks == 0);
+
+ MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
+ initMarkQueue(mark_queue);
+ current_mark_queue = mark_queue;
+
+ // Mark roots
+ trace(TRACE_nonmoving_gc, "Marking roots for nonmoving GC");
+ markCAFs((evac_fn)markQueueAddRoot, mark_queue);
+ for (unsigned int n = 0; n < n_capabilities; ++n) {
+ markCapability((evac_fn)markQueueAddRoot, mark_queue,
+ capabilities[n], true/*don't mark sparks*/);
+ }
+ markScheduler((evac_fn)markQueueAddRoot, mark_queue);
+ nonmovingMarkWeakPtrList(mark_queue, *dead_weaks);
+ markStablePtrTable((evac_fn)markQueueAddRoot, mark_queue);
+
+ // Mark threads resurrected during moving heap scavenging
+ for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)tso);
+ }
+ trace(TRACE_nonmoving_gc, "Finished marking roots for nonmoving GC");
+
+ // Roots marked, mark threads and weak pointers
+
+ // At this point all threads are moved to threads list (from old_threads)
+ // and all weaks are moved to weak_ptr_list (from old_weak_ptr_list) by
+ // the previous scavenge step, so we need to move them to "old" lists
+ // again.
+
+ // Fine to override old_threads because any live or resurrected threads are
+ // moved to threads or resurrected_threads lists.
+ ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
+ ASSERT(nonmoving_old_threads == END_TSO_QUEUE);
+ nonmoving_old_threads = oldest_gen->threads;
+ oldest_gen->threads = END_TSO_QUEUE;
+
+ // Make sure we don't lose any weak ptrs here. Weaks in old_weak_ptr_list
+ // will either be moved to `dead_weaks` (if dead) or `weak_ptr_list` (if
+ // alive).
+ ASSERT(oldest_gen->old_weak_ptr_list == NULL);
+ ASSERT(nonmoving_old_weak_ptr_list == NULL);
+ nonmoving_old_weak_ptr_list = oldest_gen->weak_ptr_list;
+ oldest_gen->weak_ptr_list = NULL;
+ trace(TRACE_nonmoving_gc, "Finished nonmoving GC preparation");
+
+ // We are now safe to start concurrent marking
+
+ // Note that in concurrent mark we can't use dead_weaks and
+ // resurrected_threads from the preparation to add new weaks and threads as
+ // that would cause races between minor collection and mark. So we only pass
+ // those lists to mark function in sequential case. In concurrent case we
+ // allocate fresh lists.
+
+#if defined(THREADED_RTS)
+ // If we're interrupting or shutting down, do not let this capability go and
+ // run a STW collection. Reason: we won't be able to acquire this capability
+ // again for the sync if we let it go, because it'll immediately start doing
+ // a major GC, becuase that's what we do when exiting scheduler (see
+ // exitScheduler()).
+ if (sched_state == SCHED_RUNNING) {
+ concurrent_coll_running = true;
+ nonmoving_write_barrier_enabled = true;
+ debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
+ createOSThread(&mark_thread, "non-moving mark thread",
+ nonmovingConcurrentMark, mark_queue);
+ } else {
+ nonmovingConcurrentMark(mark_queue);
+ }
+#else
+ // Use the weak and thread lists from the preparation for any new weaks and
+ // threads found to be dead in mark.
+ nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
+#endif
+}
+
+/* Mark mark queue, threads, and weak pointers until no more weaks have been
+ * resuscitated
+ */
+static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
+{
+ while (true) {
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ // Tidy threads and weaks
+ nonmovingTidyThreads();
+
+ if (! nonmovingTidyWeaks(mark_queue))
+ return;
+ }
+}
+
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *data)
+{
+ MarkQueue *mark_queue = (MarkQueue*)data;
+ StgWeak *dead_weaks = NULL;
+ StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
+ nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
+ return NULL;
+}
+
+// TODO: Not sure where to put this function.
+// Append w2 to the end of w1.
+static void appendWeakList( StgWeak **w1, StgWeak *w2 )
+{
+ while (*w1) {
+ w1 = &(*w1)->link;
+ }
+ *w1 = w2;
+}
+#endif
+
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
+{
+ ACQUIRE_LOCK(&nonmoving_collection_mutex);
+ debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
+
+ // Do concurrent marking; most of the heap will get marked here.
+ nonmovingMarkThreadsWeaks(mark_queue);
+
+#if defined(THREADED_RTS)
+ Task *task = newBoundTask();
+
+ // If at this point if we've decided to exit then just return
+ if (sched_state > SCHED_RUNNING) {
+ // Note that we break our invariants here and leave segments in
+ // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
+ // However because we won't be running mark-sweep in the final GC this
+ // is OK.
+
+ // This is a RTS shutdown so we need to move our copy (snapshot) of
+ // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to
+ // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note
+ // that there may be more weaks added to oldest_gen->threads since we
+ // started mark, so we need to append our list to the tail of
+ // oldest_gen->threads.
+ appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list);
+ appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list);
+ // These lists won't be used again so this is not necessary, but still
+ nonmoving_old_weak_ptr_list = NULL;
+ nonmoving_weak_ptr_list = NULL;
+
+ goto finish;
+ }
+
+ // We're still running, request a sync
+ nonmovingBeginFlush(task);
+
+ bool all_caps_syncd;
+ do {
+ all_caps_syncd = nonmovingWaitForFlush();
+ nonmovingMarkThreadsWeaks(mark_queue);
+ } while (!all_caps_syncd);
+#endif
+
+ nonmovingResurrectThreads(mark_queue, resurrected_threads);
+
+ // No more resurrecting threads after this point
+
+ // Do last marking of weak pointers
+ while (true) {
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ if (!nonmovingTidyWeaks(mark_queue))
+ break;
+ }
+
+ nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
+
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ // Now remove all dead objects from the mut_list to ensure that a younger
+ // generation collection doesn't attempt to look at them after we've swept.
+ nonmovingSweepMutLists();
+
+ debugTrace(DEBUG_nonmoving_gc,
+ "Done marking, resurrecting threads before releasing capabilities");
+
+
+ // Schedule finalizers and resurrect threads
+#if defined(THREADED_RTS)
+ // Just pick a random capability. Not sure if this is a good idea -- we use
+ // only one capability for all finalizers.
+ scheduleFinalizers(capabilities[0], *dead_weaks);
+ // Note that this mutates heap and causes running write barriers.
+ // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
+ // for how we deal with this.
+ resurrectThreads(*resurrected_threads);
+#endif
+
+#if defined(DEBUG)
+ // Zap CAFs that we will sweep
+ nonmovingGcCafs();
+#endif
+
+ ASSERT(mark_queue->top->head == 0);
+ ASSERT(mark_queue->blocks->link == NULL);
+
+ // Update oldest_gen thread and weak lists
+ // Note that we need to append these lists as a concurrent minor GC may have
+ // added stuff to them while we're doing mark-sweep concurrently
+ {
+ StgTSO **threads = &oldest_gen->threads;
+ while (*threads != END_TSO_QUEUE) {
+ threads = &(*threads)->global_link;
+ }
+ *threads = nonmoving_threads;
+ nonmoving_threads = END_TSO_QUEUE;
+ nonmoving_old_threads = END_TSO_QUEUE;
+ }
+
+ {
+ StgWeak **weaks = &oldest_gen->weak_ptr_list;
+ while (*weaks) {
+ weaks = &(*weaks)->link;
+ }
+ *weaks = nonmoving_weak_ptr_list;
+ nonmoving_weak_ptr_list = NULL;
+ nonmoving_old_weak_ptr_list = NULL;
+ }
+
+ // Everything has been marked; allow the mutators to proceed
+#if defined(THREADED_RTS)
+ nonmoving_write_barrier_enabled = false;
+ nonmovingFinishFlush(task);
+#endif
+
+ current_mark_queue = NULL;
+ freeMarkQueue(mark_queue);
+ stgFree(mark_queue);
+
+ oldest_gen->live_estimate = nonmoving_live_words;
+ oldest_gen->n_old_blocks = 0;
+ resizeGenerations();
+
+ /****************************************************
+ * Sweep
+ ****************************************************/
+
+ traceConcSweepBegin();
+
+ // Because we can't mark large object blocks (no room for mark bit) we
+ // collect them in a map in mark_queue and we pass it here to sweep large
+ // objects
+ nonmovingSweepLargeObjects();
+ nonmovingSweepCompactObjects();
+ nonmovingSweepStableNameTable();
+
+ nonmovingSweep();
+ ASSERT(nonmovingHeap.sweep_list == NULL);
+ debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
+ traceConcSweepEnd();
+#if defined(DEBUG)
+ if (RtsFlags.DebugFlags.nonmoving_gc)
+ nonmovingPrintAllocatorCensus();
+#endif
+
+ // TODO: Remainder of things done by GarbageCollect (update stats)
+
+#if defined(THREADED_RTS)
+finish:
+ boundTaskExiting(task);
+
+ // We are done...
+ mark_thread = 0;
+
+ // Signal that the concurrent collection is finished, allowing the next
+ // non-moving collection to proceed
+ concurrent_coll_running = false;
+ signalCondition(&concurrent_coll_finished);
+ RELEASE_LOCK(&nonmoving_collection_mutex);
+#endif
+}
+
+#if defined(DEBUG)
+
+// Use this with caution: this doesn't work correctly during scavenge phase
+// when we're doing parallel scavenging. Use it in mark phase or later (where
+// we don't allocate more anymore).
+void assert_in_nonmoving_heap(StgPtr p)
+{
+ if (!HEAP_ALLOCED_GC(p))
+ return;
+
+ bdescr *bd = Bdescr(p);
+ if (bd->flags & BF_LARGE) {
+ // It should be in a capability (if it's not filled yet) or in non-moving heap
+ for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
+ if (bd == capabilities[cap]->pinned_object_block) {
+ return;
+ }
+ }
+ ASSERT(bd->flags & BF_NONMOVING);
+ return;
+ }
+
+ // Search snapshot segments
+ for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ }
+
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+ // Search current segments
+ for (uint32_t cap_idx = 0; cap_idx < n_capabilities; ++cap_idx) {
+ struct NonmovingSegment *seg = alloca->current[cap_idx];
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ }
+
+ // Search active segments
+ int seg_idx = 0;
+ struct NonmovingSegment *seg = alloca->active;
+ while (seg) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ // Search filled segments
+ seg_idx = 0;
+ seg = alloca->filled;
+ while (seg) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+ }
+
+ // We don't search free segments as they're unused
+
+ barf("%p is not in nonmoving heap\n", (void*)p);
+}
+
+void nonmovingPrintSegment(struct NonmovingSegment *seg)
+{
+ int num_blocks = nonmovingSegmentBlockCount(seg);
+ uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg);
+
+ debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n",
+ num_blocks,
+ log_block_size,
+ 1 << log_block_size,
+ (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size),
+ (void*)Bdescr((P_)seg)->u.scan);
+
+ for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) {
+ StgClosure *p = (StgClosure*)nonmovingSegmentGetBlock(seg, p_idx);
+ if (nonmovingGetMark(seg, p_idx) != 0) {
+ debugBelch("%d (%p)* :\t", p_idx, (void*)p);
+ } else {
+ debugBelch("%d (%p) :\t", p_idx, (void*)p);
+ }
+ printClosure(p);
+ }
+
+ debugBelch("End of segment\n\n");
+}
+
+void nonmovingPrintAllocator(struct NonmovingAllocator *alloc)
+{
+ debugBelch("Allocator at %p\n", (void*)alloc);
+ debugBelch("Filled segments:\n");
+ for (struct NonmovingSegment *seg = alloc->filled; seg != NULL; seg = seg->link) {
+ debugBelch("%p ", (void*)seg);
+ }
+ debugBelch("\nActive segments:\n");
+ for (struct NonmovingSegment *seg = alloc->active; seg != NULL; seg = seg->link) {
+ debugBelch("%p ", (void*)seg);
+ }
+ debugBelch("\nCurrent segments:\n");
+ for (uint32_t i = 0; i < n_capabilities; ++i) {
+ debugBelch("%p ", alloc->current[i]);
+ }
+ debugBelch("\n");
+}
+
+void locate_object(P_ obj)
+{
+ // Search allocators
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+ for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
+ struct NonmovingSegment *seg = alloca->current[cap];
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg);
+ return;
+ }
+ }
+ int seg_idx = 0;
+ struct NonmovingSegment *seg = alloca->active;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in active segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ seg_idx = 0;
+ seg = alloca->filled;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in filled segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+ }
+
+ struct NonmovingSegment *seg = nonmovingHeap.free;
+ int seg_idx = 0;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in free segment %d at %p\n", obj, seg_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ // Search nurseries
+ for (uint32_t nursery_idx = 0; nursery_idx < n_nurseries; ++nursery_idx) {
+ for (bdescr* nursery_block = nurseries[nursery_idx].blocks; nursery_block; nursery_block = nursery_block->link) {
+ if (obj >= nursery_block->start && obj <= nursery_block->start + nursery_block->blocks*BLOCK_SIZE_W) {
+ debugBelch("%p is in nursery %d\n", obj, nursery_idx);
+ return;
+ }
+ }
+ }
+
+ // Search generations
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
+ generation *gen = &generations[g];
+ for (bdescr *blk = gen->blocks; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " blocks\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = gen->old_blocks; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " old blocks\n", obj, g);
+ return;
+ }
+ }
+ }
+
+ // Search large objects
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
+ generation *gen = &generations[g];
+ for (bdescr *large_block = gen->large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in large blocks of generation %d\n", obj, g);
+ return;
+ }
+ }
+ }
+
+ for (bdescr *large_block = nonmoving_large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in nonmoving_large_objects\n", obj);
+ return;
+ }
+ }
+
+ for (bdescr *large_block = nonmoving_marked_large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in nonmoving_marked_large_objects\n", obj);
+ return;
+ }
+ }
+
+ // Search workspaces FIXME only works in non-threaded runtime
+#if !defined(THREADED_RTS)
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) {
+ gen_workspace *ws = &gct->gens[g];
+ for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g);
+ return;
+ }
+ }
+ }
+#endif
+}
+
+void nonmovingPrintSweepList()
+{
+ debugBelch("==== SWEEP LIST =====\n");
+ int i = 0;
+ for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
+ debugBelch("%d: %p\n", i++, (void*)seg);
+ }
+ debugBelch("= END OF SWEEP LIST =\n");
+}
+
+void check_in_mut_list(StgClosure *p)
+{
+ for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
+ for (bdescr *bd = capabilities[cap_n]->mut_lists[oldest_gen->no]; bd; bd = bd->link) {
+ for (StgPtr q = bd->start; q < bd->free; ++q) {
+ if (*((StgPtr**)q) == (StgPtr*)p) {
+ debugBelch("Object is in mut list of cap %d: %p\n", cap_n, capabilities[cap_n]->mut_lists[oldest_gen->no]);
+ return;
+ }
+ }
+ }
+ }
+
+ debugBelch("Object is not in a mut list\n");
+}
+
+void print_block_list(bdescr* bd)
+{
+ while (bd) {
+ debugBelch("%p, ", (void*)bd);
+ bd = bd->link;
+ }
+ debugBelch("\n");
+}
+
+void print_thread_list(StgTSO* tso)
+{
+ while (tso != END_TSO_QUEUE) {
+ printClosure((StgClosure*)tso);
+ tso = tso->global_link;
+ }
+}
+
+#endif
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
new file mode 100644
index 0000000000..b3d4e14065
--- /dev/null
+++ b/rts/sm/NonMoving.h
@@ -0,0 +1,335 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if !defined(CMINUSMINUS)
+
+#include <string.h>
+#include "HeapAlloc.h"
+#include "NonMovingMark.h"
+
+#include "BeginPrivate.h"
+
+// Segments
+#define NONMOVING_SEGMENT_BITS 15 // 2^15 = 32kByte
+// Mask to find base of segment
+#define NONMOVING_SEGMENT_MASK ((1 << NONMOVING_SEGMENT_BITS) - 1)
+// In bytes
+#define NONMOVING_SEGMENT_SIZE (1 << NONMOVING_SEGMENT_BITS)
+// In words
+#define NONMOVING_SEGMENT_SIZE_W ((1 << NONMOVING_SEGMENT_BITS) / SIZEOF_VOID_P)
+// In blocks
+#define NONMOVING_SEGMENT_BLOCKS (NONMOVING_SEGMENT_SIZE / BLOCK_SIZE)
+
+_Static_assert(NONMOVING_SEGMENT_SIZE % BLOCK_SIZE == 0,
+ "non-moving segment size must be multiple of block size");
+
+// The index of a block within a segment
+typedef uint16_t nonmoving_block_idx;
+
+// A non-moving heap segment
+struct NonmovingSegment {
+ struct NonmovingSegment *link; // for linking together segments into lists
+ struct NonmovingSegment *todo_link; // NULL when not in todo list
+ nonmoving_block_idx next_free; // index of the next unallocated block
+ uint8_t bitmap[]; // liveness bitmap
+ // After the liveness bitmap comes the data blocks. Note that we need to
+ // ensure that the size of this struct (including the bitmap) is a multiple
+ // of the word size since GHC assumes that all object pointers are
+ // so-aligned.
+
+ // N.B. There are also bits of information which are stored in the
+ // NonmovingBlockInfo stored in the segment's block descriptor. Namely:
+ //
+ // * the block size can be found in nonmovingBlockInfo(seg)->log_block_size.
+ // * the next_free snapshot can be found in
+ // nonmovingBlockInfo(seg)->next_free_snap.
+ //
+ // This allows us to mark a nonmoving closure without bringing the
+ // NonmovingSegment header into cache.
+};
+
+// This is how we mark end of todo lists. Not NULL because todo_link == NULL
+// means segment is not in list.
+#define END_NONMOVING_TODO_LIST ((struct NonmovingSegment*)1)
+
+// A non-moving allocator for a particular block size
+struct NonmovingAllocator {
+ struct NonmovingSegment *filled;
+ struct NonmovingSegment *active;
+ // indexed by capability number
+ struct NonmovingSegment *current[];
+};
+
+// first allocator is of size 2^NONMOVING_ALLOCA0 (in bytes)
+#define NONMOVING_ALLOCA0 3
+
+// allocators cover block sizes of 2^NONMOVING_ALLOCA0 to
+// 2^(NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT) (in bytes)
+#define NONMOVING_ALLOCA_CNT 12
+
+// maximum number of free segments to hold on to
+#define NONMOVING_MAX_FREE 16
+
+struct NonmovingHeap {
+ struct NonmovingAllocator *allocators[NONMOVING_ALLOCA_CNT];
+ // free segment list. This is a cache where we keep up to
+ // NONMOVING_MAX_FREE segments to avoid thrashing the block allocator.
+ // Note that segments in this list are still counted towards
+ // oldest_gen->n_blocks.
+ struct NonmovingSegment *free;
+ // how many segments in free segment list? accessed atomically.
+ unsigned int n_free;
+
+ // records the current length of the nonmovingAllocator.current arrays
+ unsigned int n_caps;
+
+ // The set of segments being swept in this GC. Segments are moved here from
+ // the filled list during preparation and moved back to either the filled,
+ // active, or free lists during sweep. Should be NULL before mark and
+ // after sweep.
+ struct NonmovingSegment *sweep_list;
+};
+
+extern struct NonmovingHeap nonmovingHeap;
+
+extern memcount nonmoving_live_words;
+
+#if defined(THREADED_RTS)
+extern bool concurrent_coll_running;
+#endif
+
+void nonmovingInit(void);
+void nonmovingStop(void);
+void nonmovingExit(void);
+
+
+// dead_weaks and resurrected_threads lists are used for two things:
+//
+// - The weaks and threads in those lists are found to be dead during
+// preparation, but the weaks will be used for finalization and threads will
+// be scheduled again (aka. resurrection) so we need to keep them alive in the
+// non-moving heap as well. So we treat them as roots and mark them.
+//
+// - In non-threaded runtime we add weaks and threads found to be dead in the
+// non-moving heap to those lists so that they'll be finalized and scheduled
+// as other weaks and threads. In threaded runtime we can't do this as that'd
+// cause races between a minor collection and non-moving collection. Instead
+// in non-moving heap we finalize the weaks and resurrect the threads
+// directly, but in a pause.
+//
+void nonmovingCollect(StgWeak **dead_weaks,
+ StgTSO **resurrected_threads);
+
+void *nonmovingAllocate(Capability *cap, StgWord sz);
+void nonmovingAddCapabilities(uint32_t new_n_caps);
+void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
+
+
+INLINE_HEADER struct NonmovingSegmentInfo *nonmovingSegmentInfo(struct NonmovingSegment *seg) {
+ return &Bdescr((StgPtr) seg)->nonmoving_segment;
+}
+
+INLINE_HEADER uint8_t nonmovingSegmentLogBlockSize(struct NonmovingSegment *seg) {
+ return nonmovingSegmentInfo(seg)->log_block_size;
+}
+
+// Add a segment to the appropriate active list.
+INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
+{
+ struct NonmovingAllocator *alloc =
+ nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+ while (true) {
+ struct NonmovingSegment *current_active = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->active);
+ seg->link = current_active;
+ if (cas((StgVolatilePtr) &alloc->active, (StgWord) current_active, (StgWord) seg) == (StgWord) current_active) {
+ break;
+ }
+ }
+}
+
+// Add a segment to the appropriate filled list.
+INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
+{
+ struct NonmovingAllocator *alloc =
+ nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+ while (true) {
+ struct NonmovingSegment *current_filled = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->filled);
+ seg->link = current_filled;
+ if (cas((StgVolatilePtr) &alloc->filled, (StgWord) current_filled, (StgWord) seg) == (StgWord) current_filled) {
+ break;
+ }
+ }
+}
+// Assert that the pointer can be traced by the non-moving collector (e.g. in
+// mark phase). This means one of the following:
+//
+// - A static object
+// - A large object
+// - An object in the non-moving heap (e.g. in one of the segments)
+//
+void assert_in_nonmoving_heap(StgPtr p);
+
+// The block size of a given segment in bytes.
+INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg)
+{
+ return 1 << nonmovingSegmentLogBlockSize(seg);
+}
+
+// How many blocks does a segment with the given block size have?
+INLINE_HEADER unsigned int nonmovingBlockCount(uint8_t log_block_size)
+{
+ unsigned int segment_data_size = NONMOVING_SEGMENT_SIZE - sizeof(struct NonmovingSegment);
+ segment_data_size -= segment_data_size % SIZEOF_VOID_P;
+ unsigned int blk_size = 1 << log_block_size;
+ // N.B. +1 accounts for the byte in the mark bitmap.
+ return segment_data_size / (blk_size + 1);
+}
+
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
+
+// How many blocks does the given segment contain? Also the size of the bitmap.
+INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
+{
+ return nonmovingBlockCountFromSize(nonmovingSegmentLogBlockSize(seg));
+}
+
+// Get a pointer to the given block index assuming that the block size is as
+// given (avoiding a potential cache miss when this information is already
+// available). The log_block_size argument must be equal to seg->block_size.
+INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
+{
+ ASSERT(log_block_size == nonmovingSegmentLogBlockSize(seg));
+ // Block size in bytes
+ unsigned int blk_size = 1 << log_block_size;
+ // Bitmap size in bytes
+ W_ bitmap_size = nonmovingBlockCountFromSize(log_block_size) * sizeof(uint8_t);
+ // Where the actual data starts (address of the first block).
+ // Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that
+ // ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back
+ // back to bytes by multiplying with word size.
+ W_ data = ROUNDUP_BYTES_TO_WDS(((W_)seg) + sizeof(struct NonmovingSegment) + bitmap_size) * sizeof(W_);
+ return (void*)(data + i*blk_size);
+}
+
+// Get a pointer to the given block index.
+INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
+{
+ return nonmovingSegmentGetBlock_(seg, nonmovingSegmentLogBlockSize(seg), i);
+}
+
+// Get the segment which a closure resides in. Assumes that pointer points into
+// non-moving heap.
+INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment_unchecked(StgPtr p)
+{
+ const uintptr_t mask = ~NONMOVING_SEGMENT_MASK;
+ return (struct NonmovingSegment *) (((uintptr_t) p) & mask);
+}
+
+INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
+{
+ ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
+ return nonmovingGetSegment_unchecked(p);
+}
+
+INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p)
+{
+ ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
+ struct NonmovingSegment *seg = nonmovingGetSegment(p);
+ ptrdiff_t blk0 = (ptrdiff_t)nonmovingSegmentGetBlock(seg, 0);
+ ptrdiff_t offset = (ptrdiff_t)p - blk0;
+ return (nonmoving_block_idx) (offset >> nonmovingSegmentLogBlockSize(seg));
+}
+
+// TODO: Eliminate this
+extern uint8_t nonmovingMarkEpoch;
+
+INLINE_HEADER void nonmovingSetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
+{
+ seg->bitmap[i] = nonmovingMarkEpoch;
+}
+
+INLINE_HEADER uint8_t nonmovingGetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
+{
+ return seg->bitmap[i];
+}
+
+INLINE_HEADER void nonmovingSetClosureMark(StgPtr p)
+{
+ nonmovingSetMark(nonmovingGetSegment(p), nonmovingGetBlockIdx(p));
+}
+
+// TODO: Audit the uses of these
+/* Was the given closure marked this major GC cycle? */
+INLINE_HEADER bool nonmovingClosureMarkedThisCycle(StgPtr p)
+{
+ struct NonmovingSegment *seg = nonmovingGetSegment(p);
+ nonmoving_block_idx blk_idx = nonmovingGetBlockIdx(p);
+ return nonmovingGetMark(seg, blk_idx) == nonmovingMarkEpoch;
+}
+
+INLINE_HEADER bool nonmovingClosureMarked(StgPtr p)
+{
+ struct NonmovingSegment *seg = nonmovingGetSegment(p);
+ nonmoving_block_idx blk_idx = nonmovingGetBlockIdx(p);
+ return nonmovingGetMark(seg, blk_idx) != 0;
+}
+
+// Can be called during a major collection to determine whether a particular
+// segment is in the set of segments that will be swept this collection cycle.
+INLINE_HEADER bool nonmovingSegmentBeingSwept(struct NonmovingSegment *seg)
+{
+ struct NonmovingSegmentInfo *seginfo = nonmovingSegmentInfo(seg);
+ unsigned int n = nonmovingBlockCountFromSize(seginfo->log_block_size);
+ return seginfo->next_free_snap >= n;
+}
+
+// Can be called during a major collection to determine whether a particular
+// closure lives in a segment that will be swept this collection cycle.
+// Note that this returns true for both large and normal objects.
+INLINE_HEADER bool nonmovingClosureBeingSwept(StgClosure *p)
+{
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (HEAP_ALLOCED_GC(p)) {
+ if (bd->flags & BF_NONMOVING_SWEEPING) {
+ return true;
+ } else if (bd->flags & BF_NONMOVING) {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ return nonmovingSegmentBeingSwept(seg);
+ } else {
+ // outside of the nonmoving heap
+ return false;
+ }
+ } else {
+ // a static object
+ return true;
+ }
+}
+
+INLINE_HEADER bool isNonmovingClosure(StgClosure *p)
+{
+ return !HEAP_ALLOCED_GC(p) || Bdescr((P_)p)->flags & BF_NONMOVING;
+}
+
+#if defined(DEBUG)
+
+void nonmovingPrintSegment(struct NonmovingSegment *seg);
+void nonmovingPrintAllocator(struct NonmovingAllocator *alloc);
+void locate_object(P_ obj);
+void nonmovingPrintSweepList(void);
+// Check if the object is in one of non-moving heap mut_lists
+void check_in_mut_list(StgClosure *p);
+void print_block_list(bdescr *bd);
+void print_thread_list(StgTSO* tso);
+
+#endif
+
+#include "EndPrivate.h"
+
+#endif // CMINUSMINUS
diff --git a/rts/sm/NonMovingCensus.c b/rts/sm/NonMovingCensus.c
new file mode 100644
index 0000000000..670d51263c
--- /dev/null
+++ b/rts/sm/NonMovingCensus.c
@@ -0,0 +1,129 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Accounting census
+ *
+ * This is a simple space accounting census useful for characterising
+ * fragmentation in the nonmoving heap.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "NonMoving.h"
+#include "Trace.h"
+#include "NonMovingCensus.h"
+
+// N.B. This may miss segments in the event of concurrent mutation (e.g. if a
+// mutator retires its current segment to the filled list).
+//
+// all_stopped is whether we can guarantee that all mutators and minor GCs are
+// stopped. In this case is safe to look at active and current segments so we can
+// also collect statistics on live words.
+static inline struct NonmovingAllocCensus
+nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_words)
+{
+ struct NonmovingAllocCensus census = {0, 0, 0, 0};
+
+ for (struct NonmovingSegment *seg = alloc->filled;
+ seg != NULL;
+ seg = seg->link)
+ {
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ census.n_filled_segs++;
+ census.n_live_blocks += n;
+ if (collect_live_words) {
+ for (unsigned int i=0; i < n; i++) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ census.n_live_words += closure_sizeW(c);
+ }
+ }
+ }
+
+ for (struct NonmovingSegment *seg = alloc->active;
+ seg != NULL;
+ seg = seg->link)
+ {
+ census.n_active_segs++;
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ for (unsigned int i=0; i < n; i++) {
+ if (nonmovingGetMark(seg, i)) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ if (collect_live_words)
+ census.n_live_words += closure_sizeW(c);
+ census.n_live_blocks++;
+ }
+ }
+ }
+
+ for (unsigned int cap=0; cap < n_capabilities; cap++)
+ {
+ struct NonmovingSegment *seg = alloc->current[cap];
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ for (unsigned int i=0; i < n; i++) {
+ if (nonmovingGetMark(seg, i)) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ if (collect_live_words)
+ census.n_live_words += closure_sizeW(c);
+ census.n_live_blocks++;
+ }
+ }
+ }
+ return census;
+}
+
+/* This must not be used when mutators are active since it assumes that
+ * all blocks in nonmoving heap are valid closures.
+ */
+struct NonmovingAllocCensus
+nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc)
+{
+ return nonmovingAllocatorCensus_(alloc, true);
+}
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensus(struct NonmovingAllocator *alloc)
+{
+ return nonmovingAllocatorCensus_(alloc, false);
+}
+
+
+void nonmovingPrintAllocatorCensus()
+{
+ if (!RtsFlags.GcFlags.useNonmoving)
+ return;
+
+ for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ struct NonmovingAllocCensus census =
+ nonmovingAllocatorCensus(nonmovingHeap.allocators[i]);
+
+ uint32_t blk_size = 1 << (i + NONMOVING_ALLOCA0);
+ // We define occupancy as the fraction of space that is used for useful
+ // data (that is, live and not slop).
+ double occupancy = 100.0 * census.n_live_words * sizeof(W_)
+ / (census.n_live_blocks * blk_size);
+ if (census.n_live_blocks == 0) occupancy = 100;
+ (void) occupancy; // silence warning if !DEBUG
+ debugTrace(DEBUG_nonmoving_gc, "Allocator %d (%d bytes - %d bytes): "
+ "%d active segs, %d filled segs, %d live blocks, %d live words "
+ "(%2.1f%% occupancy)",
+ i, 1 << (i + NONMOVING_ALLOCA0 - 1), 1 << (i + NONMOVING_ALLOCA0),
+ census.n_active_segs, census.n_filled_segs, census.n_live_blocks, census.n_live_words,
+ occupancy);
+ }
+}
+
+void nonmovingTraceAllocatorCensus()
+{
+#if defined(TRACING)
+ if (!RtsFlags.GcFlags.useNonmoving && !TRACE_nonmoving_gc)
+ return;
+
+ for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ const struct NonmovingAllocCensus census =
+ nonmovingAllocatorCensus(nonmovingHeap.allocators[i]);
+ const uint32_t log_blk_size = i + NONMOVING_ALLOCA0;
+ traceNonmovingHeapCensus(log_blk_size, &census);
+ }
+#endif
+}
diff --git a/rts/sm/NonMovingCensus.h b/rts/sm/NonMovingCensus.h
new file mode 100644
index 0000000000..7a66dc9b69
--- /dev/null
+++ b/rts/sm/NonMovingCensus.h
@@ -0,0 +1,28 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Accounting census
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "NonMoving.h"
+
+struct NonmovingAllocCensus {
+ uint32_t n_active_segs;
+ uint32_t n_filled_segs;
+ uint32_t n_live_blocks;
+ uint32_t n_live_words;
+};
+
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc);
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensus(struct NonmovingAllocator *alloc);
+
+void nonmovingPrintAllocatorCensus(void);
+void nonmovingTraceAllocatorCensus(void);
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
new file mode 100644
index 0000000000..03e342806a
--- /dev/null
+++ b/rts/sm/NonMovingMark.c
@@ -0,0 +1,1958 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Mark phase
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+// We call evacuate, which expects the thread-local gc_thread to be valid;
+// This is sometimes declared as a register variable therefore it is necessary
+// to include the declaration so that the compiler doesn't clobber the register.
+#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
+#include "NonMoving.h"
+#include "BlockAlloc.h" /* for countBlocks */
+#include "HeapAlloc.h"
+#include "Task.h"
+#include "Trace.h"
+#include "HeapUtils.h"
+#include "Printer.h"
+#include "Schedule.h"
+#include "Weak.h"
+#include "STM.h"
+#include "MarkWeak.h"
+#include "sm/Storage.h"
+#include "CNF.h"
+
+static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
+static void mark_tso (MarkQueue *queue, StgTSO *tso);
+static void mark_stack (MarkQueue *queue, StgStack *stack);
+static void mark_PAP_payload (MarkQueue *queue,
+ StgClosure *fun,
+ StgClosure **payload,
+ StgWord size);
+
+// How many Array# entries to add to the mark queue at once?
+#define MARK_ARRAY_CHUNK_LENGTH 128
+
+/* Note [Large objects in the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The nonmoving collector keeps a separate list of its large objects, apart from
+ * oldest_gen->large_objects. There are two reasons for this:
+ *
+ * 1. oldest_gen is mutated by minor collections, which happen concurrently with
+ * marking
+ * 2. the non-moving collector needs a consistent picture
+ *
+ * At the beginning of a major collection, nonmovingCollect takes the objects in
+ * oldest_gen->large_objects (which includes all large objects evacuated by the
+ * moving collector) and adds them to nonmoving_large_objects. This is the set
+ * of large objects that will being collected in the current major GC cycle.
+ *
+ * As the concurrent mark phase proceeds, the large objects in
+ * nonmoving_large_objects that are found to be live are moved to
+ * nonmoving_marked_large_objects. During sweep we discard all objects that remain
+ * in nonmoving_large_objects and move everything in nonmoving_marked_larged_objects
+ * back to nonmoving_large_objects.
+ *
+ * During minor collections large objects will accumulate on
+ * oldest_gen->large_objects, where they will be picked up by the nonmoving
+ * collector and moved to nonmoving_large_objects during the next major GC.
+ * When this happens the block gets its BF_NONMOVING_SWEEPING flag set to
+ * indicate that it is part of the snapshot and consequently should be marked by
+ * the nonmoving mark phase..
+ */
+
+bdescr *nonmoving_large_objects = NULL;
+bdescr *nonmoving_marked_large_objects = NULL;
+memcount n_nonmoving_large_blocks = 0;
+memcount n_nonmoving_marked_large_blocks = 0;
+
+bdescr *nonmoving_compact_objects = NULL;
+bdescr *nonmoving_marked_compact_objects = NULL;
+memcount n_nonmoving_compact_blocks = 0;
+memcount n_nonmoving_marked_compact_blocks = 0;
+
+#if defined(THREADED_RTS)
+/* Protects everything above. Furthermore, we only set the BF_MARKED bit of
+ * large object blocks when this is held. This ensures that the write barrier
+ * (e.g. finish_upd_rem_set_mark) and the collector (mark_closure) don't try to
+ * move the same large object to nonmoving_marked_large_objects more than once.
+ */
+static Mutex nonmoving_large_objects_mutex;
+// Note that we don't need a similar lock for compact objects becuase we never
+// mark a compact object eagerly in a write barrier; all compact objects are
+// marked by the mark thread, so there can't be any races here.
+#endif
+
+/*
+ * Where we keep our threads during collection since we must have a snapshot of
+ * the threads that lived in the nonmoving heap at the time that the snapshot
+ * was taken to safely resurrect.
+ */
+StgTSO *nonmoving_old_threads = END_TSO_QUEUE;
+/* Same for weak pointers */
+StgWeak *nonmoving_old_weak_ptr_list = NULL;
+/* Because we can "tidy" thread and weak lists concurrently with a minor GC we
+ * need to move marked threads and weaks to these lists until we pause for sync.
+ * Then we move them to oldest_gen lists. */
+StgTSO *nonmoving_threads = END_TSO_QUEUE;
+StgWeak *nonmoving_weak_ptr_list = NULL;
+
+#if defined(DEBUG)
+// TODO (osa): Document
+StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
+#endif
+
+/* Note [Update remembered set]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The concurrent non-moving collector uses a remembered set to ensure
+ * that its marking is consistent with the snapshot invariant defined in
+ * the design. This remembered set, known as the update remembered set,
+ * records all pointers that have been overwritten since the beginning
+ * of the concurrent mark. This ensures that concurrent mutation cannot hide
+ * pointers to live objects from the nonmoving garbage collector.
+ *
+ * The update remembered set is maintained via a write barrier that
+ * is enabled whenever a concurrent mark is active. This write barrier
+ * can be found in a number of places:
+ *
+ * - In rts/Primops.cmm in primops responsible for modifying mutable closures
+ * (e.g. MVARs, MUT_VARs, etc.)
+ *
+ * - In rts/STM.c, where
+ *
+ * - In the dirty_* functions found in rts/Storage.c where we dirty MVARs,
+ * MUT_VARs, TSOs and STACKs. STACK is a somewhat special case, as described
+ * in Note [StgStack dirtiness flags and concurrent marking] in TSO.h.
+ *
+ * - In the code generated by the STG code generator for pointer array writes
+ *
+ * - In thunk updates (e.g. updateWithIndirection) to ensure that the free
+ * variables of the original thunk remain reachable.
+ *
+ * There is also a read barrier to handle weak references, as described in
+ * Note [Concurrent read barrier on deRefWeak#].
+ *
+ * The representation of the update remembered set is the same as that of
+ * the mark queue. For efficiency, each capability maintains its own local
+ * accumulator of remembered set entries. When a capability fills its
+ * accumulator it is linked in to the global remembered set
+ * (upd_rem_set_block_list), where it is consumed by the mark phase.
+ *
+ * The mark phase is responsible for freeing update remembered set block
+ * allocations.
+ *
+ * Note that we eagerly flush update remembered sets during minor GCs as
+ * described in Note [Eager update remembered set flushing].
+ *
+ *
+ * Note [Eager update remembered set flushing]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * We eagerly flush update remembered sets during minor GCs to avoid scenarios
+ * like the following which could result in long sync pauses:
+ *
+ * 1. We start a major GC, all thread stacks are added to the mark queue.
+ * 2. The concurrent mark thread starts.
+ * 3. The mutator is allowed to resume. One mutator thread T is scheduled and marks its
+ * stack to its local update remembered set.
+ * 4. The mark thread eventually encounters the mutator thread's stack but
+ * sees that it has already been marked; skips it.
+ * 5. Thread T continues running but does not push enough to its update
+ * remembered set to require a flush.
+ * 6. Eventually the mark thread finished marking and requests a final sync.
+ * 7. The thread T flushes its update remembered set.
+ * 8. We find that a large fraction of the heap (namely the subset that is
+ * only reachable from the thread T's stack) needs to be marked, incurring
+ * a large sync pause
+ *
+ * We avoid this by periodically (during minor GC) forcing a flush of the
+ * update remembered set.
+ *
+ * A better (but more complex) approach that would be worthwhile trying in the
+ * future would be to rather do the following:
+ *
+ * 1. Concurrent mark phase is on-going
+ * 2. Mark thread runs out of things to mark
+ * 3. Mark thread sends a signal to capabilities requesting that they send
+ * their update remembered sets without suspending their execution
+ * 4. The mark thread marks everything it was sent; runs out of things to mark
+ * 5. Mark thread initiates a sync
+ * 6. Capabilities send their final update remembered sets and suspend execution
+ * 7. Mark thread marks everything is was sent
+ * 8. Mark thead allows capabilities to resume.
+ *
+ * However, this is obviously a fair amount of complexity and so far the
+ * periodic eager flushing approach has been sufficient.
+ *
+ *
+ * Note [Concurrent read barrier on deRefWeak#]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * In general the non-moving GC assumes that all pointers reachable from a
+ * marked object are themselves marked (or in the mark queue). However,
+ * weak pointers are an obvious exception to this rule. In particular,
+ * deRefWeakPtr# allows the mutator to turn a weak reference into a strong
+ * reference. This interacts badly with concurrent collection. For
+ * instance, consider this program:
+ *
+ * f :: a -> b -> IO b
+ * f k v = do
+ * -- assume that k and v are the only references to the
+ * -- closures to which they refer.
+ * weak <- mkWeakPtr k v Nothing
+ *
+ * -- N.B. k is now technically dead since the only reference to it is
+ * -- weak, but we've not yet had a chance to tombstone the WeakPtr
+ * -- (which will happen in the course of major GC).
+ * performMajorGC
+ * -- Now we are running concurrently with the mark...
+
+ * Just x <- deRefWeak weak
+ * -- We have now introduced a reference to `v`, which will
+ * -- not be marked as the only reference to `v` when the snapshot was
+ * -- taken is via a WeakPtr.
+ * return x
+ *
+ */
+static Mutex upd_rem_set_lock;
+bdescr *upd_rem_set_block_list = NULL;
+
+#if defined(THREADED_RTS)
+/* Used during the mark/sweep phase transition to track how many capabilities
+ * have pushed their update remembered sets. Protected by upd_rem_set_lock.
+ */
+static volatile StgWord upd_rem_set_flush_count = 0;
+#endif
+
+
+/* Signaled by each capability when it has flushed its update remembered set */
+static Condition upd_rem_set_flushed_cond;
+
+/* Indicates to mutators that the write barrier must be respected. Set while
+ * concurrent mark is running.
+ */
+StgWord nonmoving_write_barrier_enabled = false;
+
+/* Used to provide the current mark queue to the young generation
+ * collector for scavenging.
+ */
+MarkQueue *current_mark_queue = NULL;
+
+/* Initialise update remembered set data structures */
+void nonmovingMarkInitUpdRemSet() {
+ initMutex(&upd_rem_set_lock);
+ initCondition(&upd_rem_set_flushed_cond);
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_large_objects_mutex);
+#endif
+}
+
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t markQueueLength(MarkQueue *q);
+#endif
+static void init_mark_queue_(MarkQueue *queue);
+
+/* Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ */
+void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+{
+ if (markQueueIsEmpty(rset)) return;
+
+ // find the tail of the queue
+ bdescr *start = rset->blocks;
+ bdescr *end = start;
+ while (end->link != NULL)
+ end = end->link;
+
+ // add the blocks to the global remembered set
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ end->link = upd_rem_set_block_list;
+ upd_rem_set_block_list = start;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ // Reset remembered set
+ ACQUIRE_SM_LOCK;
+ init_mark_queue_(rset);
+ rset->is_upd_rem_set = true;
+ RELEASE_SM_LOCK;
+}
+
+#if defined(THREADED_RTS)
+/* Called by capabilities to flush their update remembered sets when
+ * synchronising with the non-moving collector as it transitions from mark to
+ * sweep phase.
+ */
+void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
+{
+ debugTrace(DEBUG_nonmoving_gc,
+ "Capability %d flushing update remembered set: %d",
+ cap->no, markQueueLength(&cap->upd_rem_set.queue));
+ traceConcUpdRemSetFlush(cap);
+ nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
+ atomic_inc(&upd_rem_set_flush_count, 1);
+ signalCondition(&upd_rem_set_flushed_cond);
+ // After this mutation will remain suspended until nonmovingFinishFlush
+ // releases its capabilities.
+}
+
+/* Request that all capabilities flush their update remembered sets and suspend
+ * execution until the further notice.
+ */
+void nonmovingBeginFlush(Task *task)
+{
+ debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
+ traceConcSyncBegin();
+ upd_rem_set_flush_count = 0;
+ stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
+
+ // XXX: We may have been given a capability via releaseCapability (i.e. a
+ // task suspended due to a foreign call) in which case our requestSync
+ // logic won't have been hit. Make sure that everyone so far has flushed.
+ // Ideally we want to mark asynchronously with syncing.
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ nonmovingFlushCapUpdRemSetBlocks(capabilities[i]);
+ }
+}
+
+/* Wait until a capability has flushed its update remembered set. Returns true
+ * if all capabilities have flushed.
+ */
+bool nonmovingWaitForFlush()
+{
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ debugTrace(DEBUG_nonmoving_gc, "Flush count %d", upd_rem_set_flush_count);
+ bool finished = upd_rem_set_flush_count == n_capabilities;
+ if (!finished) {
+ waitCondition(&upd_rem_set_flushed_cond, &upd_rem_set_lock);
+ }
+ RELEASE_LOCK(&upd_rem_set_lock);
+ return finished;
+}
+
+/* Note [Unintentional marking in resurrectThreads]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In both moving and non-moving collectors threads found to be unreachable are
+ * evacuated/marked and then resurrected with resurrectThreads. resurrectThreads
+ * raises an exception in the unreachable thread via raiseAsync, which does
+ * mutations on the heap. These mutations cause adding stuff to UpdRemSet of the
+ * thread's capability. Here's an example backtrace where this happens:
+ *
+ * #0 updateRemembSetPushClosure
+ * #1 0x000000000072b363 in dirty_TVAR
+ * #2 0x00000000007162e5 in remove_watch_queue_entries_for_trec
+ * #3 0x0000000000717098 in stmAbortTransaction
+ * #4 0x000000000070c6eb in raiseAsync
+ * #5 0x000000000070b473 in throwToSingleThreaded__
+ * #6 0x000000000070b4ab in throwToSingleThreaded
+ * #7 0x00000000006fce82 in resurrectThreads
+ * #8 0x00000000007215db in nonmovingMark_
+ * #9 0x0000000000721438 in nonmovingConcurrentMark
+ * #10 0x00007f1ee81cd6db in start_thread
+ * #11 0x00007f1ee850688f in clone
+ *
+ * However we don't really want to run write barriers when calling
+ * resurrectThreads here, because we're in a GC pause, and overwritten values
+ * are definitely gone forever (as opposed to being inserted in a marked object
+ * or kept in registers and used later).
+ *
+ * When this happens, if we don't reset the UpdRemSets, what happens is in the
+ * next mark we see these objects that were added in previous mark's
+ * resurrectThreads in UpdRemSets, and mark those. This causes keeping
+ * unreachable objects alive, and effects weak finalization and thread resurrect
+ * (which rely on things become unreachable). As an example, stm048 fails when
+ * we get this wrong, because when we do raiseAsync on a thread that was blocked
+ * on an STM transaction we mutate a TVAR_WATCH_QUEUE, which has a reference to
+ * the TSO that was running the STM transaction. If the TSO becomes unreachable
+ * again in the next GC we don't realize this, because it was added to an
+ * UpdRemSet in the previous GC's mark phase, because of raiseAsync.
+ *
+ * To fix this we clear all UpdRemSets in nonmovingFinishFlush, right before
+ * releasing capabilities. This is somewhat inefficient (we allow adding objects
+ * to UpdRemSets, only to later reset them), but the only case where we add to
+ * UpdRemSets during mark is resurrectThreads, and I don't think we do so many
+ * resurrection in a thread that we fill UpdRemSets and allocate new blocks. So
+ * pushing an UpdRemSet in this case is really fast, and resetting is even
+ * faster (we just update a pointer).
+ *
+ * TODO (osa): What if we actually marked UpdRemSets in this case, in the mark
+ * loop? Would that work? Or what would break?
+ */
+
+/* Notify capabilities that the synchronisation is finished; they may resume
+ * execution.
+ */
+void nonmovingFinishFlush(Task *task)
+{
+ // See Note [Unintentional marking in resurrectThreads]
+ for (uint32_t i = 0; i < n_capabilities; i++) {
+ reset_upd_rem_set(&capabilities[i]->upd_rem_set);
+ }
+ // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
+ // filled and we flushed them.
+ freeChain_lock(upd_rem_set_block_list);
+ upd_rem_set_block_list = NULL;
+
+ debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
+ traceConcSyncEnd();
+ releaseAllCapabilities(n_capabilities, NULL, task);
+}
+#endif
+
+/*********************************************************
+ * Pushing to either the mark queue or remembered set
+ *********************************************************/
+
+STATIC_INLINE void
+push (MarkQueue *q, const MarkQueueEnt *ent)
+{
+ // Are we at the end of the block?
+ if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
+ // Yes, this block is full.
+ if (q->is_upd_rem_set) {
+ nonmovingAddUpdRemSetBlocks(q);
+ } else {
+ // allocate a fresh block.
+ ACQUIRE_SM_LOCK;
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ bd->link = q->blocks;
+ q->blocks = bd;
+ q->top = (MarkQueueBlock *) bd->start;
+ q->top->head = 0;
+ RELEASE_SM_LOCK;
+ }
+ }
+
+ q->top->entries[q->top->head] = *ent;
+ q->top->head++;
+}
+
+/* A variant of push to be used by the minor GC when it encounters a reference
+ * to an object in the non-moving heap. In contrast to the other push
+ * operations this uses the gc_alloc_block_sync spinlock instead of the
+ * SM_LOCK to allocate new blocks in the event that the mark queue is full.
+ */
+void
+markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
+{
+ /* We should not make it here if we are doing a deadlock detect GC.
+ * See Note [Deadlock detection under nonmoving collector].
+ */
+ ASSERT(!deadlock_detect_gc);
+
+ // Are we at the end of the block?
+ if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
+ // Yes, this block is full.
+ // allocate a fresh block.
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ bd->link = q->blocks;
+ q->blocks = bd;
+ q->top = (MarkQueueBlock *) bd->start;
+ q->top->head = 0;
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ }
+
+ MarkQueueEnt ent = {
+ .mark_closure = {
+ .p = UNTAG_CLOSURE(p),
+ .origin = NULL,
+ }
+ };
+ q->top->entries[q->top->head] = ent;
+ q->top->head++;
+}
+
+static inline
+void push_closure (MarkQueue *q,
+ StgClosure *p,
+ StgClosure **origin)
+{
+#if defined(DEBUG)
+ ASSERT(!HEAP_ALLOCED_GC(p) || (Bdescr((StgPtr) p)->gen == oldest_gen));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ // Commenting out: too slow
+ // if (RtsFlags.DebugFlags.sanity) {
+ // assert_in_nonmoving_heap((P_)p);
+ // if (origin)
+ // assert_in_nonmoving_heap((P_)origin);
+ // }
+#endif
+
+ // This must be true as origin points to a pointer and therefore must be
+ // word-aligned. However, we check this as otherwise we would confuse this
+ // with a mark_array entry
+ ASSERT(((uintptr_t) origin & 0x3) == 0);
+
+ MarkQueueEnt ent = {
+ .mark_closure = {
+ .p = p,
+ .origin = origin,
+ }
+ };
+ push(q, &ent);
+}
+
+static
+void push_array (MarkQueue *q,
+ const StgMutArrPtrs *array,
+ StgWord start_index)
+{
+ // TODO: Push this into callers where they already have the Bdescr
+ if (HEAP_ALLOCED_GC(array) && (Bdescr((StgPtr) array)->gen != oldest_gen))
+ return;
+
+ MarkQueueEnt ent = {
+ .mark_array = {
+ .array = array,
+ .start_index = (start_index << 16) | 0x3,
+ }
+ };
+ push(q, &ent);
+}
+
+static
+void push_thunk_srt (MarkQueue *q, const StgInfoTable *info)
+{
+ const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info);
+ if (thunk_info->i.srt) {
+ push_closure(q, (StgClosure*)GET_SRT(thunk_info), NULL);
+ }
+}
+
+static
+void push_fun_srt (MarkQueue *q, const StgInfoTable *info)
+{
+ const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info);
+ if (fun_info->i.srt) {
+ push_closure(q, (StgClosure*)GET_FUN_SRT(fun_info), NULL);
+ }
+}
+
+/*********************************************************
+ * Pushing to the update remembered set
+ *
+ * upd_rem_set_push_* functions are directly called by
+ * mutators and need to check whether the value is in
+ * non-moving heap.
+ *********************************************************/
+
+// Check if the object is traced by the non-moving collector. This holds in two
+// conditions:
+//
+// - Object is in non-moving heap
+// - Object is a large (BF_LARGE) and marked as BF_NONMOVING
+// - Object is static (HEAP_ALLOCED_GC(obj) == false)
+//
+static
+bool check_in_nonmoving_heap(StgClosure *p) {
+ if (HEAP_ALLOCED_GC(p)) {
+ // This works for both large and small objects:
+ return Bdescr((P_)p)->flags & BF_NONMOVING;
+ } else {
+ return true; // a static object
+ }
+}
+
+/* Push the free variables of a (now-evaluated) thunk to the
+ * update remembered set.
+ */
+inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
+{
+ const StgInfoTable *info;
+ do {
+ info = get_volatile_itbl((StgClosure *) thunk);
+ } while (info->type == WHITEHOLE);
+ updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk);
+}
+
+/* Push the free variables of a thunk to the update remembered set.
+ * This is called by the thunk update code (e.g. updateWithIndirection) before
+ * we update the indirectee to ensure that the thunk's free variables remain
+ * visible to the concurrent collector.
+ *
+ * See Note [Update rememembered set].
+ */
+void updateRemembSetPushThunkEager(Capability *cap,
+ const StgThunkInfoTable *info,
+ StgThunk *thunk)
+{
+ /* N.B. info->i.type mustn't be WHITEHOLE */
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ switch (info->i.type) {
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ {
+ push_thunk_srt(queue, &info->i);
+
+ for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
+ if (check_in_nonmoving_heap(thunk->payload[i])) {
+ // Don't bother to push origin; it makes the barrier needlessly
+ // expensive with little benefit.
+ push_closure(queue, thunk->payload[i], NULL);
+ }
+ }
+ break;
+ }
+ case AP:
+ {
+ StgAP *ap = (StgAP *) thunk;
+ if (check_in_nonmoving_heap(ap->fun)) {
+ push_closure(queue, ap->fun, NULL);
+ }
+ mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
+ break;
+ }
+ case THUNK_SELECTOR:
+ case BLACKHOLE:
+ // TODO: This is right, right?
+ break;
+ // The selector optimization performed by the nonmoving mark may have
+ // overwritten a thunk which we are updating with an indirection.
+ case IND:
+ {
+ StgInd *ind = (StgInd *) thunk;
+ if (check_in_nonmoving_heap(ind->indirectee)) {
+ push_closure(queue, ind->indirectee, NULL);
+ }
+ break;
+ }
+ default:
+ barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
+ thunk, info->i.type);
+ }
+}
+
+void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
+{
+ updateRemembSetPushThunk(regTableToCapability(reg), p);
+}
+
+inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
+{
+ if (check_in_nonmoving_heap(p)) {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_closure(queue, p, NULL);
+ }
+}
+
+void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p)
+{
+ updateRemembSetPushClosure(regTableToCapability(reg), p);
+}
+
+STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
+{
+ // TODO: Deduplicate with mark_closure
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->gen != oldest_gen) {
+ return false;
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
+ return false;
+ } else {
+ return ! (bd->flags & BF_MARKED);
+ }
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ return nonmovingGetMark(seg, block_idx) != nonmovingMarkEpoch;
+ }
+}
+
+/* Set the mark bit; only to be called *after* we have fully marked the closure */
+STATIC_INLINE void finish_upd_rem_set_mark(StgClosure *p)
+{
+ bdescr *bd = Bdescr((StgPtr) p);
+ if (bd->flags & BF_LARGE) {
+ // Someone else may have already marked it.
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
+ if (! (bd->flags & BF_MARKED)) {
+ bd->flags |= BF_MARKED;
+ dbl_link_remove(bd, &nonmoving_large_objects);
+ dbl_link_onto(bd, &nonmoving_marked_large_objects);
+ n_nonmoving_large_blocks -= bd->blocks;
+ n_nonmoving_marked_large_blocks += bd->blocks;
+ }
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ nonmovingSetMark(seg, block_idx);
+ }
+}
+
+void updateRemembSetPushTSO(Capability *cap, StgTSO *tso)
+{
+ if (needs_upd_rem_set_mark((StgClosure *) tso)) {
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: TSO %p", tso);
+ mark_tso(&cap->upd_rem_set.queue, tso);
+ finish_upd_rem_set_mark((StgClosure *) tso);
+ }
+}
+
+void updateRemembSetPushStack(Capability *cap, StgStack *stack)
+{
+ // N.B. caller responsible for checking nonmoving_write_barrier_enabled
+ if (needs_upd_rem_set_mark((StgClosure *) stack)) {
+ StgWord8 marking = stack->marking;
+ // See Note [StgStack dirtiness flags and concurrent marking]
+ if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
+ mark_stack(&cap->upd_rem_set.queue, stack);
+ finish_upd_rem_set_mark((StgClosure *) stack);
+ return;
+ } else {
+ // The concurrent GC has claimed the right to mark the stack.
+ // Wait until it finishes marking before proceeding with
+ // mutation.
+ while (needs_upd_rem_set_mark((StgClosure *) stack));
+#if defined(PARALLEL_GC)
+ busy_wait_nop(); // TODO: Spinning here is unfortunate
+#endif
+ return;
+ }
+ }
+}
+
+/*********************************************************
+ * Pushing to the mark queue
+ *********************************************************/
+
+void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent)
+{
+ push(q, ent);
+}
+
+void markQueuePushClosure (MarkQueue *q,
+ StgClosure *p,
+ StgClosure **origin)
+{
+ // TODO: Push this into callers where they already have the Bdescr
+ if (check_in_nonmoving_heap(p)) {
+ push_closure(q, p, origin);
+ }
+}
+
+/* TODO: Do we really never want to specify the origin here? */
+void markQueueAddRoot (MarkQueue* q, StgClosure** root)
+{
+ markQueuePushClosure(q, *root, NULL);
+}
+
+/* Push a closure to the mark queue without origin information */
+void markQueuePushClosure_ (MarkQueue *q, StgClosure *p)
+{
+ markQueuePushClosure(q, p, NULL);
+}
+
+void markQueuePushFunSrt (MarkQueue *q, const StgInfoTable *info)
+{
+ push_fun_srt(q, info);
+}
+
+void markQueuePushThunkSrt (MarkQueue *q, const StgInfoTable *info)
+{
+ push_thunk_srt(q, info);
+}
+
+void markQueuePushArray (MarkQueue *q,
+ const StgMutArrPtrs *array,
+ StgWord start_index)
+{
+ push_array(q, array, start_index);
+}
+
+/*********************************************************
+ * Popping from the mark queue
+ *********************************************************/
+
+// Returns invalid MarkQueueEnt if queue is empty.
+static MarkQueueEnt markQueuePop_ (MarkQueue *q)
+{
+ MarkQueueBlock *top;
+
+again:
+ top = q->top;
+
+ // Are we at the beginning of the block?
+ if (top->head == 0) {
+ // Is this the first block of the queue?
+ if (q->blocks->link == NULL) {
+ // Yes, therefore queue is empty...
+ MarkQueueEnt none = { .null_entry = { .p = NULL } };
+ return none;
+ } else {
+ // No, unwind to the previous block and try popping again...
+ bdescr *old_block = q->blocks;
+ q->blocks = old_block->link;
+ q->top = (MarkQueueBlock*)q->blocks->start;
+ ACQUIRE_SM_LOCK;
+ freeGroup(old_block); // TODO: hold on to a block to avoid repeated allocation/deallocation?
+ RELEASE_SM_LOCK;
+ goto again;
+ }
+ }
+
+ top->head--;
+ MarkQueueEnt ent = top->entries[top->head];
+ return ent;
+}
+
+static MarkQueueEnt markQueuePop (MarkQueue *q)
+{
+#if MARK_PREFETCH_QUEUE_DEPTH == 0
+ return markQueuePop_(q);
+#else
+ unsigned int i = q->prefetch_head;
+ while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) {
+ MarkQueueEnt new = markQueuePop_(q);
+ if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) {
+ // Mark queue is empty; look for any valid entries in the prefetch
+ // queue
+ for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH;
+ j != i;
+ j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH)
+ {
+ if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) {
+ i = j;
+ goto done;
+ }
+ }
+ return new;
+ }
+
+ // The entry may not be a MARK_CLOSURE but it doesn't matter, our
+ // MarkQueueEnt encoding always places the pointer to the object to be
+ // marked first.
+ prefetchForRead(&new.mark_closure.p->header.info);
+ prefetchForRead(Bdescr((StgPtr) new.mark_closure.p));
+ q->prefetch_queue[i] = new;
+ i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
+ }
+
+ done:
+ ;
+ MarkQueueEnt ret = q->prefetch_queue[i];
+ q->prefetch_queue[i].null_entry.p = NULL;
+ q->prefetch_head = i;
+ return ret;
+#endif
+}
+
+/*********************************************************
+ * Creating and destroying MarkQueues and UpdRemSets
+ *********************************************************/
+
+/* Must hold sm_mutex. */
+static void init_mark_queue_ (MarkQueue *queue)
+{
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ queue->blocks = bd;
+ queue->top = (MarkQueueBlock *) bd->start;
+ queue->top->head = 0;
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue));
+ queue->prefetch_head = 0;
+#endif
+}
+
+/* Must hold sm_mutex. */
+void initMarkQueue (MarkQueue *queue)
+{
+ init_mark_queue_(queue);
+ queue->is_upd_rem_set = false;
+}
+
+/* Must hold sm_mutex. */
+void init_upd_rem_set (UpdRemSet *rset)
+{
+ init_mark_queue_(&rset->queue);
+ rset->queue.is_upd_rem_set = true;
+}
+
+void reset_upd_rem_set (UpdRemSet *rset)
+{
+ // UpdRemSets always have one block for the mark queue. This assertion is to
+ // update this code if we change that.
+ ASSERT(rset->queue.blocks->link == NULL);
+ rset->queue.top->head = 0;
+}
+
+void freeMarkQueue (MarkQueue *queue)
+{
+ freeChain_lock(queue->blocks);
+}
+
+#if defined(THREADED_RTS) && defined(DEBUG)
+static uint32_t
+markQueueLength (MarkQueue *q)
+{
+ uint32_t n = 0;
+ for (bdescr *block = q->blocks; block; block = block->link) {
+ MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
+ n += queue->head;
+ }
+ return n;
+}
+#endif
+
+
+/*********************************************************
+ * Marking
+ *********************************************************/
+
+/*
+ * N.B. Mutation of TRecHeaders is completely unprotected by any write
+ * barrier. Consequently it's quite important that we deeply mark
+ * any outstanding transactions.
+ */
+static void
+mark_trec_header (MarkQueue *queue, StgTRecHeader *trec)
+{
+ while (trec != NO_TREC) {
+ StgTRecChunk *chunk = trec->current_chunk;
+ markQueuePushClosure_(queue, (StgClosure *) trec);
+ markQueuePushClosure_(queue, (StgClosure *) chunk);
+ while (chunk != END_STM_CHUNK_LIST) {
+ for (StgWord i=0; i < chunk->next_entry_idx; i++) {
+ TRecEntry *ent = &chunk->entries[i];
+ markQueuePushClosure_(queue, (StgClosure *) ent->tvar);
+ markQueuePushClosure_(queue, ent->expected_value);
+ markQueuePushClosure_(queue, ent->new_value);
+ }
+ chunk = chunk->prev_chunk;
+ }
+ trec = trec->enclosing_trec;
+ }
+}
+
+static void
+mark_tso (MarkQueue *queue, StgTSO *tso)
+{
+ // TODO: Clear dirty if contains only old gen objects
+
+ if (tso->bound != NULL) {
+ markQueuePushClosure_(queue, (StgClosure *) tso->bound->tso);
+ }
+
+ markQueuePushClosure_(queue, (StgClosure *) tso->blocked_exceptions);
+ markQueuePushClosure_(queue, (StgClosure *) tso->bq);
+ mark_trec_header(queue, tso->trec);
+ markQueuePushClosure_(queue, (StgClosure *) tso->stackobj);
+ markQueuePushClosure_(queue, (StgClosure *) tso->_link);
+ if ( tso->why_blocked == BlockedOnMVar
+ || tso->why_blocked == BlockedOnMVarRead
+ || tso->why_blocked == BlockedOnBlackHole
+ || tso->why_blocked == BlockedOnMsgThrowTo
+ || tso->why_blocked == NotBlocked
+ ) {
+ markQueuePushClosure_(queue, tso->block_info.closure);
+ }
+}
+
+static void
+do_push_closure (StgClosure **p, void *user)
+{
+ MarkQueue *queue = (MarkQueue *) user;
+ // TODO: Origin? need reference to containing closure
+ markQueuePushClosure_(queue, *p);
+}
+
+static void
+mark_large_bitmap (MarkQueue *queue,
+ StgClosure **p,
+ StgLargeBitmap *large_bitmap,
+ StgWord size)
+{
+ walk_large_bitmap(do_push_closure, p, large_bitmap, size, queue);
+}
+
+static void
+mark_small_bitmap (MarkQueue *queue, StgClosure **p, StgWord size, StgWord bitmap)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ // TODO: Origin?
+ markQueuePushClosure(queue, *p, NULL);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+}
+
+static GNUC_ATTR_HOT
+void mark_PAP_payload (MarkQueue *queue,
+ StgClosure *fun,
+ StgClosure **payload,
+ StgWord size)
+{
+ const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(fun));
+ ASSERT(fun_info->i.type != PAP);
+ StgPtr p = (StgPtr) payload;
+
+ StgWord bitmap;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ mark_large_bitmap(queue, payload, GET_FUN_LARGE_BITMAP(fun_info), size);
+ break;
+ case ARG_BCO:
+ mark_large_bitmap(queue, payload, BCO_BITMAP(fun), size);
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ mark_small_bitmap(queue, (StgClosure **) p, size, bitmap);
+ break;
+ }
+}
+
+/* Helper for mark_stack; returns next stack frame. */
+static StgPtr
+mark_arg_block (MarkQueue *queue, const StgFunInfoTable *fun_info, StgClosure **args)
+{
+ StgWord bitmap, size;
+
+ StgPtr p = (StgPtr)args;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ mark_large_bitmap(queue, (StgClosure**)p, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ mark_small_bitmap(queue, (StgClosure**)p, size, bitmap);
+ p += size;
+ break;
+ }
+ return p;
+}
+
+static GNUC_ATTR_HOT void
+mark_stack_ (MarkQueue *queue, StgPtr sp, StgPtr spBottom)
+{
+ ASSERT(sp <= spBottom);
+
+ while (sp < spBottom) {
+ const StgRetInfoTable *info = get_ret_itbl((StgClosure *)sp);
+ switch (info->i.type) {
+ case UPDATE_FRAME:
+ {
+ // See Note [upd-black-hole] in rts/Scav.c
+ StgUpdateFrame *frame = (StgUpdateFrame *) sp;
+ markQueuePushClosure_(queue, frame->updatee);
+ sp += sizeofW(StgUpdateFrame);
+ continue;
+ }
+
+ // small bitmap (< 32 entries, or 64 on a 64-bit machine)
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case RET_SMALL:
+ {
+ StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ StgWord size = BITMAP_SIZE(info->i.layout.bitmap);
+ // NOTE: the payload starts immediately after the info-ptr, we
+ // don't have an StgHeader in the same sense as a heap closure.
+ sp++;
+ mark_small_bitmap(queue, (StgClosure **) sp, size, bitmap);
+ sp += size;
+ }
+ follow_srt:
+ if (info->i.srt) {
+ markQueuePushClosure_(queue, (StgClosure*)GET_SRT(info));
+ }
+ continue;
+
+ case RET_BCO: {
+ sp++;
+ markQueuePushClosure_(queue, *(StgClosure**)sp);
+ StgBCO *bco = (StgBCO *)*sp;
+ sp++;
+ StgWord size = BCO_BITMAP_SIZE(bco);
+ mark_large_bitmap(queue, (StgClosure **) sp, BCO_BITMAP(bco), size);
+ sp += size;
+ continue;
+ }
+
+ // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
+ case RET_BIG:
+ {
+ StgWord size;
+
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ sp++;
+ mark_large_bitmap(queue, (StgClosure **) sp, GET_LARGE_BITMAP(&info->i), size);
+ sp += size;
+ // and don't forget to follow the SRT
+ goto follow_srt;
+ }
+
+ case RET_FUN:
+ {
+ StgRetFun *ret_fun = (StgRetFun *)sp;
+ const StgFunInfoTable *fun_info;
+
+ markQueuePushClosure_(queue, ret_fun->fun);
+ fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
+ sp = mark_arg_block(queue, fun_info, ret_fun->payload);
+ goto follow_srt;
+ }
+
+ default:
+ barf("mark_stack: weird activation record found on stack: %d", (int)(info->i.type));
+ }
+ }
+}
+
+static GNUC_ATTR_HOT void
+mark_stack (MarkQueue *queue, StgStack *stack)
+{
+ // TODO: Clear dirty if contains only old gen objects
+
+ mark_stack_(queue, stack->sp, stack->stack + stack->stack_size);
+}
+
+/* See Note [Static objects under the nonmoving collector].
+ *
+ * Returns true if the object needs to be marked.
+ */
+static bool
+bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
+{
+ while (1) {
+ StgWord link = (StgWord) *link_field;
+ StgWord new = (link & ~STATIC_BITS) | static_flag;
+ if ((link & STATIC_BITS) == static_flag)
+ return false;
+ else if (cas((StgVolatilePtr) link_field, link, new) == link) {
+ return true;
+ }
+ }
+}
+
+static GNUC_ATTR_HOT void
+mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
+{
+ StgClosure *p = (StgClosure*)p0;
+
+ try_again:
+ ;
+ bdescr *bd = NULL;
+ StgClosure *p_next = NULL;
+ StgWord tag = GET_CLOSURE_TAG(p);
+ p = UNTAG_CLOSURE(p);
+
+# define PUSH_FIELD(obj, field) \
+ markQueuePushClosure(queue, \
+ (StgClosure *) (obj)->field, \
+ (StgClosure **) &(obj)->field)
+
+ if (!HEAP_ALLOCED_GC(p)) {
+ const StgInfoTable *info = get_itbl(p);
+ StgHalfWord type = info->type;
+
+ if (type == CONSTR_0_1 || type == CONSTR_0_2 || type == CONSTR_NOCAF) {
+ // no need to put these on the static linked list, they don't need
+ // to be marked.
+ return;
+ }
+
+ switch (type) {
+
+ case THUNK_STATIC:
+ if (info->srt != 0) {
+ if (bump_static_flag(THUNK_STATIC_LINK((StgClosure *)p), p)) {
+ markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
+ }
+ }
+ goto done;
+
+ case FUN_STATIC:
+ if (info->srt != 0 || info->layout.payload.ptrs != 0) {
+ if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
+ markQueuePushFunSrt(queue, info); // TODO this function repeats the check above
+
+ // a FUN_STATIC can also be an SRT, so it may have pointer
+ // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
+ // the [FUN] optimisation.
+ // TODO (osa) I don't understand this comment
+ for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
+ PUSH_FIELD(p, payload[i]);
+ }
+ }
+ }
+ goto done;
+
+ case IND_STATIC:
+ if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ }
+ goto done;
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
+ for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
+ PUSH_FIELD(p, payload[i]);
+ }
+ }
+ goto done;
+
+ case WHITEHOLE:
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
+ // busy_wait_nop(); // FIXME
+ goto try_again;
+
+ default:
+ barf("mark_closure(static): strange closure type %d", (int)(info->type));
+ }
+ }
+
+ bd = Bdescr((StgPtr) p);
+
+ if (bd->gen != oldest_gen) {
+ // Here we have an object living outside of the non-moving heap. While
+ // we likely evacuated nearly everything to the nonmoving heap during
+ // preparation there are nevertheless a few ways in which we might trace
+ // a reference into younger generations:
+ //
+ // * a mutable object might have been updated
+ // * we might have aged an object
+ goto done;
+ }
+
+ ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
+
+ ASSERT(!IS_FORWARDING_PTR(p->header.info));
+
+ // N.B. only the first block of a compact region is guaranteed to carry
+ // BF_NONMOVING; conseqently we must separately check for BF_COMPACT.
+ if (bd->flags & (BF_COMPACT | BF_NONMOVING)) {
+
+ if (bd->flags & BF_COMPACT) {
+ StgCompactNFData *str = objectGetCompact((StgClosure*)p);
+ bd = Bdescr((P_)str);
+
+ if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
+ // Not in the snapshot
+ return;
+ }
+ if (bd->flags & BF_MARKED) {
+ goto done;
+ }
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
+ // Not in the snapshot
+ goto done;
+ }
+ if (bd->flags & BF_MARKED) {
+ goto done;
+ }
+
+ // Mark contents
+ p = (StgClosure*)bd->start;
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+
+ /* We don't mark blocks that,
+ * - were not live at the time that the snapshot was taken, or
+ * - we have already marked this cycle
+ */
+ uint8_t mark = nonmovingGetMark(seg, block_idx);
+ /* Don't mark things we've already marked (since we may loop) */
+ if (mark == nonmovingMarkEpoch)
+ goto done;
+
+ StgClosure *snapshot_loc =
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
+ if (p >= snapshot_loc && mark == 0) {
+ /*
+ * In this case we are looking at a block that wasn't allocated
+ * at the time that the snapshot was taken. We mustn't trace
+ * things above the allocation pointer that aren't marked since
+ * they may not be valid objects.
+ */
+ goto done;
+ }
+ }
+ }
+
+ // A pinned object that is still attached to a capability (because it's not
+ // filled yet). No need to trace it pinned objects can't contain poiners.
+ else if (bd->flags & BF_PINNED) {
+#if defined(DEBUG)
+ bool found_it = false;
+ for (uint32_t i = 0; i < n_capabilities; ++i) {
+ if (capabilities[i]->pinned_object_block == bd) {
+ found_it = true;
+ break;
+ }
+ }
+ ASSERT(found_it);
+#endif
+ return; // we don't update origin here! TODO(osa): explain this
+ }
+
+ else {
+ barf("Strange closure in nonmoving mark: %p", p);
+ }
+
+ /////////////////////////////////////////////////////
+ // Trace pointers
+ /////////////////////////////////////////////////////
+
+ const StgInfoTable *info = get_itbl(p);
+ switch (info->type) {
+
+ case MVAR_CLEAN:
+ case MVAR_DIRTY: {
+ StgMVar *mvar = (StgMVar *) p;
+ PUSH_FIELD(mvar, head);
+ PUSH_FIELD(mvar, tail);
+ PUSH_FIELD(mvar, value);
+ break;
+ }
+
+ case TVAR: {
+ StgTVar *tvar = ((StgTVar *)p);
+ PUSH_FIELD(tvar, current_value);
+ PUSH_FIELD(tvar, first_watch_queue_entry);
+ break;
+ }
+
+ case FUN_2_0:
+ markQueuePushFunSrt(queue, info);
+ PUSH_FIELD(p, payload[1]);
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case THUNK_2_0: {
+ StgThunk *thunk = (StgThunk *) p;
+ markQueuePushThunkSrt(queue, info);
+ PUSH_FIELD(thunk, payload[1]);
+ PUSH_FIELD(thunk, payload[0]);
+ break;
+ }
+
+ case CONSTR_2_0:
+ PUSH_FIELD(p, payload[1]);
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case THUNK_1_0:
+ markQueuePushThunkSrt(queue, info);
+ PUSH_FIELD((StgThunk *) p, payload[0]);
+ break;
+
+ case FUN_1_0:
+ markQueuePushFunSrt(queue, info);
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case CONSTR_1_0:
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case THUNK_0_1:
+ markQueuePushThunkSrt(queue, info);
+ break;
+
+ case FUN_0_1:
+ markQueuePushFunSrt(queue, info);
+ break;
+
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ break;
+
+ case THUNK_0_2:
+ markQueuePushThunkSrt(queue, info);
+ break;
+
+ case FUN_0_2:
+ markQueuePushFunSrt(queue, info);
+ break;
+
+ case THUNK_1_1:
+ markQueuePushThunkSrt(queue, info);
+ PUSH_FIELD((StgThunk *) p, payload[0]);
+ break;
+
+ case FUN_1_1:
+ markQueuePushFunSrt(queue, info);
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case CONSTR_1_1:
+ PUSH_FIELD(p, payload[0]);
+ break;
+
+ case FUN:
+ markQueuePushFunSrt(queue, info);
+ goto gen_obj;
+
+ case THUNK: {
+ markQueuePushThunkSrt(queue, info);
+ for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
+ StgClosure **field = &((StgThunk *) p)->payload[i];
+ markQueuePushClosure(queue, *field, field);
+ }
+ break;
+ }
+
+ gen_obj:
+ case CONSTR:
+ case CONSTR_NOCAF:
+ case WEAK:
+ case PRIM:
+ {
+ for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
+ StgClosure **field = &((StgClosure *) p)->payload[i];
+ markQueuePushClosure(queue, *field, field);
+ }
+ break;
+ }
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ PUSH_FIELD(bco, instrs);
+ PUSH_FIELD(bco, literals);
+ PUSH_FIELD(bco, ptrs);
+ break;
+ }
+
+
+ case IND: {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ if (origin != NULL) {
+ p_next = ((StgInd*)p)->indirectee;
+ }
+ break;
+ }
+
+ case BLACKHOLE: {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ StgClosure *indirectee = ((StgInd*)p)->indirectee;
+ if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
+ // do nothing
+ } else {
+ p_next = indirectee;
+ }
+ break;
+ }
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ PUSH_FIELD((StgMutVar *)p, var);
+ break;
+
+ case BLOCKING_QUEUE: {
+ StgBlockingQueue *bq = (StgBlockingQueue *)p;
+ PUSH_FIELD(bq, bh);
+ PUSH_FIELD(bq, owner);
+ PUSH_FIELD(bq, queue);
+ PUSH_FIELD(bq, link);
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ if (RtsFlags.GcFlags.nonmovingSelectorOpt) {
+ nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
+ } else {
+ PUSH_FIELD((StgSelector *) p, selectee);
+ }
+ break;
+
+ case AP_STACK: {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+ PUSH_FIELD(ap, fun);
+ mark_stack_(queue, (StgPtr) ap->payload, (StgPtr) ap->payload + ap->size);
+ break;
+ }
+
+ case PAP: {
+ StgPAP *pap = (StgPAP *) p;
+ PUSH_FIELD(pap, fun);
+ mark_PAP_payload(queue, pap->fun, pap->payload, pap->n_args);
+ break;
+ }
+
+ case AP: {
+ StgAP *ap = (StgAP *) p;
+ PUSH_FIELD(ap, fun);
+ mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
+ break;
+ }
+
+ case ARR_WORDS:
+ // nothing to follow
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ // TODO: Check this against Scav.c
+ markQueuePushArray(queue, (StgMutArrPtrs *) p, 0);
+ break;
+
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: {
+ StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs *) p;
+ for (StgWord i = 0; i < arr->ptrs; i++) {
+ StgClosure **field = &arr->payload[i];
+ markQueuePushClosure(queue, *field, field);
+ }
+ break;
+ }
+
+ case TSO:
+ mark_tso(queue, (StgTSO *) p);
+ break;
+
+ case STACK: {
+ // See Note [StgStack dirtiness flags and concurrent marking]
+ StgStack *stack = (StgStack *) p;
+ StgWord8 marking = stack->marking;
+
+ // N.B. stack->marking must be != nonmovingMarkEpoch unless
+ // someone has already marked it.
+ if (cas_word8(&stack->marking, marking, nonmovingMarkEpoch)
+ != nonmovingMarkEpoch) {
+ // We have claimed the right to mark the stack.
+ mark_stack(queue, stack);
+ } else {
+ // A mutator has already started marking the stack; we just let it
+ // do its thing and move on. There's no reason to wait; we know that
+ // the stack will be fully marked before we sweep due to the final
+ // post-mark synchronization. Most importantly, we do not set its
+ // mark bit, the mutator is responsible for this.
+ goto done;
+ }
+ break;
+ }
+
+ case MUT_PRIM: {
+ for (StgHalfWord p_idx = 0; p_idx < info->layout.payload.ptrs; ++p_idx) {
+ StgClosure **field = &p->payload[p_idx];
+ markQueuePushClosure(queue, *field, field);
+ }
+ break;
+ }
+
+ case TREC_CHUNK: {
+ // TODO: Should we abort here? This should have already been marked
+ // when we dirtied the TSO
+ StgTRecChunk *tc = ((StgTRecChunk *) p);
+ PUSH_FIELD(tc, prev_chunk);
+ TRecEntry *end = &tc->entries[tc->next_entry_idx];
+ for (TRecEntry *e = &tc->entries[0]; e < end; e++) {
+ markQueuePushClosure_(queue, (StgClosure *) e->tvar);
+ markQueuePushClosure_(queue, (StgClosure *) e->expected_value);
+ markQueuePushClosure_(queue, (StgClosure *) e->new_value);
+ }
+ break;
+ }
+
+ case WHITEHOLE:
+ while (get_volatile_itbl(p)->type == WHITEHOLE);
+ goto try_again;
+
+ case COMPACT_NFDATA:
+ break;
+
+ default:
+ barf("mark_closure: unimplemented/strange closure type %d @ %p",
+ info->type, p);
+ }
+
+# undef PUSH_FIELD
+
+ /* Set the mark bit: it's important that we do this only after we actually push
+ * the object's pointers since in the case of marking stacks there may be a
+ * mutator waiting for us to finish so it can start execution.
+ */
+ if (bd->flags & BF_COMPACT) {
+ StgCompactNFData *str = objectGetCompact((StgClosure*)p);
+ dbl_link_remove(bd, &nonmoving_compact_objects);
+ dbl_link_onto(bd, &nonmoving_marked_compact_objects);
+ StgWord blocks = str->totalW / BLOCK_SIZE_W;
+ n_nonmoving_compact_blocks -= blocks;
+ n_nonmoving_marked_compact_blocks += blocks;
+ bd->flags |= BF_MARKED;
+ } else if (bd->flags & BF_LARGE) {
+ /* Marking a large object isn't idempotent since we move it to
+ * nonmoving_marked_large_objects; to ensure that we don't repeatedly
+ * mark a large object, we only set BF_MARKED on large objects in the
+ * nonmoving heap while holding nonmoving_large_objects_mutex
+ */
+ ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
+ if (! (bd->flags & BF_MARKED)) {
+ // Remove the object from nonmoving_large_objects and link it to
+ // nonmoving_marked_large_objects
+ dbl_link_remove(bd, &nonmoving_large_objects);
+ dbl_link_onto(bd, &nonmoving_marked_large_objects);
+ n_nonmoving_large_blocks -= bd->blocks;
+ n_nonmoving_marked_large_blocks += bd->blocks;
+ bd->flags |= BF_MARKED;
+ }
+ RELEASE_LOCK(&nonmoving_large_objects_mutex);
+ } else if (bd->flags & BF_NONMOVING) {
+ // TODO: Kill repetition
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
+ nonmovingSetMark(seg, block_idx);
+ nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_);
+ }
+
+ // If we found a indirection to shortcut keep going.
+ if (p_next) {
+ p = p_next;
+ goto try_again;
+ }
+
+done:
+ if (origin != NULL && (!HEAP_ALLOCED(p) || bd->flags & BF_NONMOVING)) {
+ if (UNTAG_CLOSURE((StgClosure*)p0) != p && *origin == p0) {
+ if (cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)TAG_CLOSURE(tag, p)) == (StgWord)p0) {
+ // debugBelch("Thunk optimization successful\n");
+ }
+ }
+ }
+}
+
+/* This is the main mark loop.
+ * Invariants:
+ *
+ * a. nonmovingPrepareMark has been called.
+ * b. the nursery has been fully evacuated into the non-moving generation.
+ * c. the mark queue has been seeded with a set of roots.
+ *
+ */
+GNUC_ATTR_HOT void
+nonmovingMark (MarkQueue *queue)
+{
+ traceConcMarkBegin();
+ debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
+ unsigned int count = 0;
+ while (true) {
+ count++;
+ MarkQueueEnt ent = markQueuePop(queue);
+
+ switch (nonmovingMarkQueueEntryType(&ent)) {
+ case MARK_CLOSURE:
+ mark_closure(queue, ent.mark_closure.p, ent.mark_closure.origin);
+ break;
+ case MARK_ARRAY: {
+ const StgMutArrPtrs *arr = ent.mark_array.array;
+ StgWord start = ent.mark_array.start_index >> 16;
+ StgWord end = start + MARK_ARRAY_CHUNK_LENGTH;
+ if (end < arr->ptrs) {
+ markQueuePushArray(queue, ent.mark_array.array, end);
+ } else {
+ end = arr->ptrs;
+ }
+ for (StgWord i = start; i < end; i++) {
+ markQueuePushClosure_(queue, arr->payload[i]);
+ }
+ break;
+ }
+ case NULL_ENTRY:
+ // Perhaps the update remembered set has more to mark...
+ if (upd_rem_set_block_list) {
+ ACQUIRE_LOCK(&upd_rem_set_lock);
+ bdescr *old = queue->blocks;
+ queue->blocks = upd_rem_set_block_list;
+ queue->top = (MarkQueueBlock *) queue->blocks->start;
+ upd_rem_set_block_list = NULL;
+ RELEASE_LOCK(&upd_rem_set_lock);
+
+ ACQUIRE_SM_LOCK;
+ freeGroup(old);
+ RELEASE_SM_LOCK;
+ } else {
+ // Nothing more to do
+ debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
+ traceConcMarkEnd(count);
+ return;
+ }
+ }
+ }
+}
+
+// A variant of `isAlive` that works for non-moving heap. Used for:
+//
+// - Collecting weak pointers; checking key of a weak pointer.
+// - Resurrecting threads; checking if a thread is dead.
+// - Sweeping object lists: large_objects, mut_list, stable_name_table.
+//
+// This may only be used after a full mark but before nonmovingSweep as it
+// relies on the correctness of the next_free_snap and mark bitmaps.
+bool nonmovingIsAlive (StgClosure *p)
+{
+ // Ignore static closures. See comments in `isAlive`.
+ if (!HEAP_ALLOCED_GC(p)) {
+ return true;
+ }
+
+ bdescr *bd = Bdescr((P_)p);
+
+ // All non-static objects in the non-moving heap should be marked as
+ // BF_NONMOVING
+ ASSERT(bd->flags & BF_NONMOVING);
+
+ if (bd->flags & (BF_COMPACT | BF_LARGE)) {
+ if (bd->flags & BF_COMPACT) {
+ StgCompactNFData *str = objectGetCompact((StgClosure*)p);
+ bd = Bdescr((P_)str);
+ }
+ return (bd->flags & BF_NONMOVING_SWEEPING) == 0
+ // the large object wasn't in the snapshot and therefore wasn't marked
+ || (bd->flags & BF_MARKED) != 0;
+ // The object was marked
+ } else {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
+ nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p);
+ uint8_t mark = nonmovingGetMark(seg, i);
+ if (i >= nonmovingSegmentInfo(seg)->next_free_snap) {
+ // If the object is allocated after next_free_snap then one of the
+ // following must be true:
+ //
+ // * if its mark is 0 then the block was not allocated last time
+ // the segment was swept; however, it may have been allocated since
+ // then and therefore we must conclude that the block is alive.
+ //
+ // * if its mark is equal to nonmovingMarkEpoch then we found that
+ // the object was alive in the snapshot of the current GC (recall
+ // that this function may only be used after a mark).
+ // Consequently we must conclude that the object is still alive.
+ //
+ // * if its mark is not equal to nonmovingMarkEpoch then we found
+ // that the object was not reachable in the last snapshot.
+ // Assuming that the mark is complete we can conclude that the
+ // object is dead since the snapshot invariant guarantees that
+ // all objects alive in the snapshot would be marked.
+ //
+ return mark == nonmovingMarkEpoch || mark == 0;
+ } else {
+ // If the object is below next_free_snap then the snapshot
+ // invariant guarantees that it is marked if reachable.
+ return mark == nonmovingMarkEpoch;
+ }
+ }
+}
+
+// Check whether a snapshotted object is alive. That is for an object that we
+// know to be in the snapshot, is its mark bit set. It is imperative that the
+// object is in the snapshot (e.g. was in the nonmoving heap at the time that
+// the snapshot was taken) since we assume that its mark bit reflects its
+// reachability.
+//
+// This is used when
+//
+// - Collecting weak pointers; checking key of a weak pointer.
+// - Resurrecting threads; checking if a thread is dead.
+// - Sweeping object lists: large_objects, mut_list, stable_name_table.
+//
+static bool nonmovingIsNowAlive (StgClosure *p)
+{
+ // Ignore static closures. See comments in `isAlive`.
+ if (!HEAP_ALLOCED_GC(p)) {
+ return true;
+ }
+
+ bdescr *bd = Bdescr((P_)p);
+
+ // All non-static objects in the non-moving heap should be marked as
+ // BF_NONMOVING
+ ASSERT(bd->flags & BF_NONMOVING);
+
+ if (bd->flags & BF_LARGE) {
+ return (bd->flags & BF_NONMOVING_SWEEPING) == 0
+ // the large object wasn't in the snapshot and therefore wasn't marked
+ || (bd->flags & BF_MARKED) != 0;
+ // The object was marked
+ } else {
+ return nonmovingClosureMarkedThisCycle((P_)p);
+ }
+}
+
+// Non-moving heap variant of `tidyWeakList`
+bool nonmovingTidyWeaks (struct MarkQueue_ *queue)
+{
+ bool did_work = false;
+
+ StgWeak **last_w = &nonmoving_old_weak_ptr_list;
+ StgWeak *next_w;
+ for (StgWeak *w = nonmoving_old_weak_ptr_list; w != NULL; w = next_w) {
+ if (w->header.info == &stg_DEAD_WEAK_info) {
+ // finalizeWeak# was called on the weak
+ next_w = w->link;
+ *last_w = next_w;
+ continue;
+ }
+
+ // Otherwise it's a live weak
+ ASSERT(w->header.info == &stg_WEAK_info);
+
+ if (nonmovingIsNowAlive(w->key)) {
+ nonmovingMarkLiveWeak(queue, w);
+ did_work = true;
+
+ // remove this weak ptr from old_weak_ptr list
+ *last_w = w->link;
+ next_w = w->link;
+
+ // and put it on the weak ptr list
+ w->link = nonmoving_weak_ptr_list;
+ nonmoving_weak_ptr_list = w;
+ } else {
+ last_w = &(w->link);
+ next_w = w->link;
+ }
+ }
+
+ return did_work;
+}
+
+void nonmovingMarkDeadWeak (struct MarkQueue_ *queue, StgWeak *w)
+{
+ if (w->cfinalizers != &stg_NO_FINALIZER_closure) {
+ markQueuePushClosure_(queue, w->value);
+ }
+ markQueuePushClosure_(queue, w->finalizer);
+}
+
+void nonmovingMarkLiveWeak (struct MarkQueue_ *queue, StgWeak *w)
+{
+ ASSERT(nonmovingClosureMarkedThisCycle((P_)w));
+ markQueuePushClosure_(queue, w->value);
+ markQueuePushClosure_(queue, w->finalizer);
+ markQueuePushClosure_(queue, w->cfinalizers);
+}
+
+// When we're done with marking, any weak pointers with non-marked keys will be
+// considered "dead". We mark values and finalizers of such weaks, and then
+// schedule them for finalization in `scheduleFinalizers` (which we run during
+// synchronization).
+void nonmovingMarkDeadWeaks (struct MarkQueue_ *queue, StgWeak **dead_weaks)
+{
+ StgWeak *next_w;
+ for (StgWeak *w = nonmoving_old_weak_ptr_list; w; w = next_w) {
+ ASSERT(!nonmovingClosureMarkedThisCycle((P_)(w->key)));
+ nonmovingMarkDeadWeak(queue, w);
+ next_w = w ->link;
+ w->link = *dead_weaks;
+ *dead_weaks = w;
+ }
+}
+
+// Non-moving heap variant of of `tidyThreadList`
+void nonmovingTidyThreads ()
+{
+ StgTSO *next;
+ StgTSO **prev = &nonmoving_old_threads;
+ for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
+
+ next = t->global_link;
+
+ // N.B. This thread is in old_threads, consequently we *know* it is in
+ // the snapshot and it is therefore safe to rely on the bitmap to
+ // determine its reachability.
+ if (nonmovingIsNowAlive((StgClosure*)t)) {
+ // alive
+ *prev = next;
+
+ // move this thread onto threads list
+ t->global_link = nonmoving_threads;
+ nonmoving_threads = t;
+ } else {
+ // not alive (yet): leave this thread on the old_threads list
+ prev = &(t->global_link);
+ }
+ }
+}
+
+void nonmovingResurrectThreads (struct MarkQueue_ *queue, StgTSO **resurrected_threads)
+{
+ StgTSO *next;
+ for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) {
+ next = t->global_link;
+
+ switch (t->what_next) {
+ case ThreadKilled:
+ case ThreadComplete:
+ continue;
+ default:
+ markQueuePushClosure_(queue, (StgClosure*)t);
+ t->global_link = *resurrected_threads;
+ *resurrected_threads = t;
+ }
+ }
+}
+
+#if defined(DEBUG)
+
+void printMarkQueueEntry (MarkQueueEnt *ent)
+{
+ switch(nonmovingMarkQueueEntryType(ent)) {
+ case MARK_CLOSURE:
+ debugBelch("Closure: ");
+ printClosure(ent->mark_closure.p);
+ break;
+ case MARK_ARRAY:
+ debugBelch("Array\n");
+ break;
+ case NULL_ENTRY:
+ debugBelch("End of mark\n");
+ break;
+ }
+}
+
+void printMarkQueue (MarkQueue *q)
+{
+ debugBelch("======== MARK QUEUE ========\n");
+ for (bdescr *block = q->blocks; block; block = block->link) {
+ MarkQueueBlock *queue = (MarkQueueBlock*)block->start;
+ for (uint32_t i = 0; i < queue->head; ++i) {
+ printMarkQueueEntry(&queue->entries[i]);
+ }
+ }
+ debugBelch("===== END OF MARK QUEUE ====\n");
+}
+
+#endif
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
new file mode 100644
index 0000000000..fe150f47cb
--- /dev/null
+++ b/rts/sm/NonMovingMark.h
@@ -0,0 +1,205 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Mark phase
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "Hash.h"
+#include "Task.h"
+#include "NonMoving.h"
+
+#include "BeginPrivate.h"
+
+#include "Hash.h"
+
+enum EntryType {
+ NULL_ENTRY = 0,
+ MARK_CLOSURE,
+ MARK_ARRAY
+};
+
+/* Note [Origin references in the nonmoving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * To implement indirection short-cutting and the selector optimisation the
+ * collector needs to know where it found references, so it can update the
+ * reference if it later turns out that points to an indirection. For this
+ * reason, each mark queue entry contains two things:
+ *
+ * - a pointer to the object to be marked (p), and
+ *
+ * - a pointer to the field where we found the reference (origin)
+ *
+ * Note that the origin pointer is an interior pointer: it points not to a
+ * valid closure (with info table pointer) but rather to a field inside a closure.
+ * Since such references can't be safely scavenged we establish the invariant
+ * that the origin pointer may only point to a field of an object living in the
+ * nonmoving heap, where no scavenging is needed.
+ *
+ */
+
+typedef struct {
+ // Which kind of mark queue entry we have is determined by the low bits of
+ // the second word: they must be zero in the case of a mark_closure entry
+ // (since the second word of a mark_closure entry points to a pointer and
+ // pointers must be word-aligned). In the case of a mark_array we set them
+ // to 0x3 (the value of start_index is shifted to the left to accomodate
+ // this). null_entry where p==NULL is used to indicate the end of the queue.
+ union {
+ struct {
+ void *p; // must be NULL
+ } null_entry;
+ struct {
+ StgClosure *p; // the object to be marked
+ StgClosure **origin; // field where this reference was found.
+ // See Note [Origin references in the nonmoving collector]
+ } mark_closure;
+ struct {
+ const StgMutArrPtrs *array;
+ StgWord start_index; // start index is shifted to the left by 16 bits
+ } mark_array;
+ };
+} MarkQueueEnt;
+
+INLINE_HEADER enum EntryType nonmovingMarkQueueEntryType(MarkQueueEnt *ent)
+{
+ if (ent->null_entry.p == NULL) {
+ return NULL_ENTRY;
+ } else if (((uintptr_t) ent->mark_closure.origin & TAG_BITS) == 0) {
+ return MARK_CLOSURE;
+ } else {
+ ASSERT((ent->mark_array.start_index & TAG_BITS) == 0x3);
+ return MARK_ARRAY;
+ }
+}
+
+typedef struct {
+ // index of first *unused* queue entry
+ uint32_t head;
+
+ MarkQueueEnt entries[];
+} MarkQueueBlock;
+
+// How far ahead in mark queue to prefetch?
+#define MARK_PREFETCH_QUEUE_DEPTH 5
+
+/* The mark queue is not capable of concurrent read or write.
+ *
+ * invariants:
+ *
+ * a. top == blocks->start;
+ * b. there is always a valid MarkQueueChunk, although it may be empty
+ * (e.g. top->head == 0).
+ */
+typedef struct MarkQueue_ {
+ // A singly link-list of blocks, each containing a MarkQueueChunk.
+ bdescr *blocks;
+
+ // Cached value of blocks->start.
+ MarkQueueBlock *top;
+
+ // Is this a mark queue or a capability-local update remembered set?
+ bool is_upd_rem_set;
+
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ // A ring-buffer of entries which we will mark next
+ MarkQueueEnt prefetch_queue[MARK_PREFETCH_QUEUE_DEPTH];
+ // The first free slot in prefetch_queue.
+ uint8_t prefetch_head;
+#endif
+} MarkQueue;
+
+/* While it shares its representation with MarkQueue, UpdRemSet differs in
+ * behavior when pushing; namely full chunks are immediately pushed to the
+ * global update remembered set, not accumulated into a chain. We make this
+ * distinction apparent in the types.
+ */
+typedef struct {
+ MarkQueue queue;
+} UpdRemSet;
+
+// Number of blocks to allocate for a mark queue
+#define MARK_QUEUE_BLOCKS 16
+
+// The length of MarkQueueBlock.entries
+#define MARK_QUEUE_BLOCK_ENTRIES ((MARK_QUEUE_BLOCKS * BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
+
+extern bdescr *nonmoving_large_objects, *nonmoving_marked_large_objects,
+ *nonmoving_compact_objects, *nonmoving_marked_compact_objects;
+extern memcount n_nonmoving_large_blocks, n_nonmoving_marked_large_blocks,
+ n_nonmoving_compact_blocks, n_nonmoving_marked_compact_blocks;
+
+extern StgTSO *nonmoving_old_threads;
+extern StgWeak *nonmoving_old_weak_ptr_list;
+extern StgTSO *nonmoving_threads;
+extern StgWeak *nonmoving_weak_ptr_list;
+
+#if defined(DEBUG)
+extern StgIndStatic *debug_caf_list_snapshot;
+#endif
+
+extern MarkQueue *current_mark_queue;
+extern bdescr *upd_rem_set_block_list;
+
+
+void nonmovingMarkInitUpdRemSet(void);
+
+void init_upd_rem_set(UpdRemSet *rset);
+void reset_upd_rem_set(UpdRemSet *rset);
+void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
+void updateRemembSetPushThunk(Capability *cap, StgThunk *p);
+void updateRemembSetPushTSO(Capability *cap, StgTSO *tso);
+void updateRemembSetPushStack(Capability *cap, StgStack *stack);
+
+#if defined(THREADED_RTS)
+void nonmovingFlushCapUpdRemSetBlocks(Capability *cap);
+void nonmovingBeginFlush(Task *task);
+bool nonmovingWaitForFlush(void);
+void nonmovingFinishFlush(Task *task);
+#endif
+
+void markQueueAddRoot(MarkQueue* q, StgClosure** root);
+
+void initMarkQueue(MarkQueue *queue);
+void freeMarkQueue(MarkQueue *queue);
+void nonmovingMark(struct MarkQueue_ *restrict queue);
+
+bool nonmovingTidyWeaks(struct MarkQueue_ *queue);
+void nonmovingTidyThreads(void);
+void nonmovingMarkDeadWeaks(struct MarkQueue_ *queue, StgWeak **dead_weak_ptr_list);
+void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_threads);
+bool nonmovingIsAlive(StgClosure *p);
+void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w);
+void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w);
+void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset);
+
+void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent);
+void markQueuePushClosureGC(MarkQueue *q, StgClosure *p);
+void markQueuePushClosure(MarkQueue *q,
+ StgClosure *p,
+ StgClosure **origin);
+void markQueuePushClosure_(MarkQueue *q, StgClosure *p);
+void markQueuePushThunkSrt(MarkQueue *q, const StgInfoTable *info);
+void markQueuePushFunSrt(MarkQueue *q, const StgInfoTable *info);
+void markQueuePushArray(MarkQueue *q, const StgMutArrPtrs *array, StgWord start_index);
+void updateRemembSetPushThunkEager(Capability *cap,
+ const StgThunkInfoTable *orig_info,
+ StgThunk *thunk);
+
+INLINE_HEADER bool markQueueIsEmpty(MarkQueue *q)
+{
+ return (q->blocks == NULL) || (q->top->head == 0 && q->blocks->link == NULL);
+}
+
+#if defined(DEBUG)
+
+void printMarkQueueEntry(MarkQueueEnt *ent);
+void printMarkQueue(MarkQueue *q);
+
+#endif
+
+#include "EndPrivate.h"
diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c
new file mode 100644
index 0000000000..9583c7baf9
--- /dev/null
+++ b/rts/sm/NonMovingScav.c
@@ -0,0 +1,389 @@
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "NonMoving.h"
+#include "NonMovingScav.h"
+#include "Capability.h"
+#include "Scav.h"
+#include "Evac.h"
+#include "GCThread.h" // for GCUtils.h
+#include "GCUtils.h"
+#include "Printer.h"
+#include "MarkWeak.h" // scavengeLiveWeak
+
+void
+nonmovingScavengeOne (StgClosure *q)
+{
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+ StgPtr p = (StgPtr)q;
+ const StgInfoTable *info = get_itbl(q);
+ const bool saved_eager_promotion = gct->eager_promotion;
+
+ switch (info->type) {
+
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ gct->eager_promotion = false;
+ evacuate((StgClosure **)&mvar->head);
+ evacuate((StgClosure **)&mvar->tail);
+ evacuate((StgClosure **)&mvar->value);
+ gct->eager_promotion = saved_eager_promotion;
+ if (gct->failed_to_evac) {
+ mvar->header.info = &stg_MVAR_DIRTY_info;
+ } else {
+ mvar->header.info = &stg_MVAR_CLEAN_info;
+ }
+ break;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *)p);
+ gct->eager_promotion = false;
+ evacuate((StgClosure **)&tvar->current_value);
+ evacuate((StgClosure **)&tvar->first_watch_queue_entry);
+ gct->eager_promotion = saved_eager_promotion;
+ if (gct->failed_to_evac) {
+ tvar->header.info = &stg_TVAR_DIRTY_info;
+ } else {
+ tvar->header.info = &stg_TVAR_CLEAN_info;
+ }
+ break;
+ }
+
+ case FUN_2_0:
+ scavenge_fun_srt(info);
+ evacuate(&((StgClosure *)p)->payload[1]);
+ evacuate(&((StgClosure *)p)->payload[0]);
+ break;
+
+ case THUNK_2_0:
+ scavenge_thunk_srt(info);
+ evacuate(&((StgThunk *)p)->payload[1]);
+ evacuate(&((StgThunk *)p)->payload[0]);
+ break;
+
+ case CONSTR_2_0:
+ evacuate(&((StgClosure *)p)->payload[1]);
+ evacuate(&((StgClosure *)p)->payload[0]);
+ break;
+
+ case THUNK_1_0:
+ scavenge_thunk_srt(info);
+ evacuate(&((StgThunk *)p)->payload[0]);
+ break;
+
+ case FUN_1_0:
+ scavenge_fun_srt(info);
+ FALLTHROUGH;
+ case CONSTR_1_0:
+ evacuate(&((StgClosure *)p)->payload[0]);
+ break;
+
+ case THUNK_0_1:
+ scavenge_thunk_srt(info);
+ break;
+
+ case FUN_0_1:
+ scavenge_fun_srt(info);
+ FALLTHROUGH;
+ case CONSTR_0_1:
+ break;
+
+ case THUNK_0_2:
+ scavenge_thunk_srt(info);
+ break;
+
+ case FUN_0_2:
+ scavenge_fun_srt(info);
+ FALLTHROUGH;
+ case CONSTR_0_2:
+ break;
+
+ case THUNK_1_1:
+ scavenge_thunk_srt(info);
+ evacuate(&((StgThunk *)p)->payload[0]);
+ break;
+
+ case FUN_1_1:
+ scavenge_fun_srt(info);
+ FALLTHROUGH;
+ case CONSTR_1_1:
+ evacuate(&q->payload[0]);
+ break;
+
+ case FUN:
+ scavenge_fun_srt(info);
+ goto gen_obj;
+
+ case THUNK:
+ {
+ scavenge_thunk_srt(info);
+ StgPtr end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+ evacuate((StgClosure **)p);
+ }
+ break;
+ }
+
+ case WEAK:
+ {
+ // We must evacuate the key since it may refer to an object in the
+ // moving heap which may be long gone by the time we call
+ // nonmovingTidyWeaks.
+ StgWeak *weak = (StgWeak *) p;
+ gct->eager_promotion = true;
+ evacuate(&weak->key);
+ gct->eager_promotion = saved_eager_promotion;
+ goto gen_obj;
+ }
+
+ gen_obj:
+ case CONSTR:
+ case CONSTR_NOCAF:
+ case PRIM:
+ {
+ StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+ evacuate((StgClosure **)p);
+ }
+ break;
+ }
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ evacuate((StgClosure **)&bco->instrs);
+ evacuate((StgClosure **)&bco->literals);
+ evacuate((StgClosure **)&bco->ptrs);
+ break;
+ }
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ gct->eager_promotion = false;
+ evacuate(&((StgMutVar *)p)->var);
+ gct->eager_promotion = saved_eager_promotion;
+ if (gct->failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ }
+ break;
+
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)p;
+
+ gct->eager_promotion = false;
+ evacuate(&bq->bh);
+ evacuate((StgClosure**)&bq->owner);
+ evacuate((StgClosure**)&bq->queue);
+ evacuate((StgClosure**)&bq->link);
+ gct->eager_promotion = saved_eager_promotion;
+
+ if (gct->failed_to_evac) {
+ bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ } else {
+ bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ }
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ evacuate(&s->selectee);
+ break;
+ }
+
+ // A chunk of stack saved in a heap object
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+
+ evacuate(&ap->fun);
+ scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+ break;
+ }
+
+ case PAP:
+ p = scavenge_PAP((StgPAP *)p);
+ break;
+
+ case AP:
+ scavenge_AP((StgAP *)p);
+ break;
+
+ case ARR_WORDS:
+ // nothing to follow
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ {
+ gct->eager_promotion = false;
+ scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
+ gct->eager_promotion = saved_eager_promotion;
+ if (gct->failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ }
+ gct->failed_to_evac = true; // always put it on the mutable list.
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ // follow everything
+ {
+ scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
+
+ if (gct->failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ }
+ break;
+ }
+
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ // follow everything
+ {
+ StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p);
+ gct->eager_promotion = false;
+ for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) {
+ evacuate((StgClosure **)p);
+ }
+ gct->eager_promotion = saved_eager_promotion;
+
+ if (gct->failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info;
+ }
+ gct->failed_to_evac = true; // always put it on the mutable list.
+ break;
+ }
+
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ // follow everything
+ {
+ StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p);
+ for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) {
+ evacuate((StgClosure **)p);
+ }
+
+ if (gct->failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ scavengeTSO((StgTSO *)p);
+ break;
+ }
+
+ case STACK:
+ {
+ StgStack *stack = (StgStack*)p;
+
+ gct->eager_promotion = false;
+ scavenge_stack(stack->sp, stack->stack + stack->stack_size);
+ gct->eager_promotion = saved_eager_promotion;
+ stack->dirty = gct->failed_to_evac;
+ break;
+ }
+
+ case MUT_PRIM:
+ {
+ StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ gct->eager_promotion = false;
+ for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+ evacuate((StgClosure **)p);
+ }
+ gct->eager_promotion = saved_eager_promotion;
+ gct->failed_to_evac = true; // mutable
+ break;
+ }
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = ((StgTRecChunk *) p);
+ TRecEntry *e = &(tc -> entries[0]);
+ gct->eager_promotion = false;
+ evacuate((StgClosure **)&tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ evacuate((StgClosure **)&e->tvar);
+ evacuate((StgClosure **)&e->expected_value);
+ evacuate((StgClosure **)&e->new_value);
+ }
+ gct->eager_promotion = saved_eager_promotion;
+ gct->failed_to_evac = true; // mutable
+ break;
+ }
+
+ case IND:
+ case BLACKHOLE:
+ case IND_STATIC:
+ evacuate(&((StgInd *)p)->indirectee);
+ break;
+
+ case COMPACT_NFDATA:
+ scavenge_compact((StgCompactNFData*)p);
+ break;
+
+ default:
+ barf("nonmoving scavenge: unimplemented/strange closure type %d @ %p",
+ info->type, p);
+ }
+
+ if (gct->failed_to_evac) {
+ // Mutable object or points to a younger object, add to the mut_list
+ gct->failed_to_evac = false;
+ if (oldest_gen->no > 0) {
+ recordMutableGen_GC(q, oldest_gen->no);
+ }
+ }
+}
+
+/* Scavenge objects evacuated into a nonmoving segment by a minor GC */
+void
+scavengeNonmovingSegment (struct NonmovingSegment *seg)
+{
+ const StgWord blk_size = nonmovingSegmentBlockSize(seg);
+ gct->evac_gen_no = oldest_gen->no;
+ gct->failed_to_evac = false;
+
+ // scavenge objects between scan and free_ptr whose bitmap bits are 0
+ bdescr *seg_block = Bdescr((P_)seg);
+
+ ASSERT(seg_block->u.scan >= (P_)nonmovingSegmentGetBlock(seg, 0));
+ ASSERT(seg_block->u.scan <= (P_)nonmovingSegmentGetBlock(seg, seg->next_free));
+
+ StgPtr scan_end = (P_)nonmovingSegmentGetBlock(seg, seg->next_free);
+ if (seg_block->u.scan == scan_end)
+ return;
+
+ nonmoving_block_idx p_idx = nonmovingGetBlockIdx(seg_block->u.scan);
+ while (seg_block->u.scan < scan_end) {
+ StgClosure *p = (StgClosure*)seg_block->u.scan;
+
+ // bit set = was allocated in a previous GC, no need to scavenge
+ // bit not set = new allocation, so scavenge
+ if (nonmovingGetMark(seg, p_idx) == 0) {
+ nonmovingScavengeOne(p);
+ }
+
+ p_idx++;
+ seg_block->u.scan = (P_)(((uint8_t*)seg_block->u.scan) + blk_size);
+ }
+}
diff --git a/rts/sm/NonMovingScav.h b/rts/sm/NonMovingScav.h
new file mode 100644
index 0000000000..021385e1e9
--- /dev/null
+++ b/rts/sm/NonMovingScav.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "NonMoving.h"
+
+#include "BeginPrivate.h"
+
+void nonmovingScavengeOne(StgClosure *p);
+void scavengeNonmovingSegment(struct NonmovingSegment *seg);
+
+#include "EndPrivate.h"
diff --git a/rts/sm/NonMovingShortcut.c b/rts/sm/NonMovingShortcut.c
new file mode 100644
index 0000000000..83c4857677
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.c
@@ -0,0 +1,326 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "GC.h"
+#include "SMPClosureOps.h"
+#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
+#include "Printer.h"
+
+#define MAX_THUNK_SELECTOR_DEPTH 16
+
+//#define SELECTOR_OPT_DEBUG
+
+#if defined(SELECTOR_OPT_DEBUG)
+static void
+print_selector_chain(StgClosure *p)
+{
+ debugBelch("Selector chain: %p", (void*)p);
+ StgClosure *next = p->payload[0];
+ while (next != NULL) {
+ debugBelch(", %p", next);
+ next = next->payload[0];
+ }
+ debugBelch("\n");
+}
+#endif
+
+static void
+update_selector_chain(
+ StgClosure *chain,
+ StgClosure **origin,
+ StgSelector * const p0,
+ StgClosure * const val
+) {
+ ASSERT(val != NULL);
+
+ // Make sure we don't introduce non-moving-to-moving pointers here.
+ ASSERT(isNonmovingClosure(val));
+
+ // This case we can't handle because we don't know info ptr of the closure
+ // before we locked it.
+ ASSERT(chain != val);
+
+#if defined(SELECTOR_OPT_DEBUG)
+ if (chain != NULL) {
+ print_selector_chain(chain);
+ debugBelch("Value: ");
+ printClosure(val);
+ }
+#endif
+
+ while (chain) {
+ // debugBelch("chain: %p\n", (void*)chain);
+
+ StgClosure *next = chain->payload[0];
+
+ // We only update closures in the non-moving heap
+ ASSERT(isNonmovingClosure(chain));
+
+ ((StgInd*)chain)->indirectee = val;
+ unlockClosure((StgClosure*)chain, &stg_IND_info);
+
+ chain = next;
+ }
+
+ if (origin != NULL && (StgClosure*)p0 != val) {
+ cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)val);
+ }
+}
+
+// Returns value of the selector thunk. The value is a non-moving closure. If
+// it's not possible to evaluate the selector thunk the return value will be the
+// selector itself.
+static StgClosure*
+nonmoving_eval_thunk_selector_(
+ MarkQueue *queue,
+ StgSelector * const p0,
+ StgClosure ** const origin,
+ int depth
+) {
+ // This function should only be called on non-moving objects.
+ ASSERT(HEAP_ALLOCED_GC((P_)p0) && isNonmovingClosure((StgClosure*)p0));
+
+ markQueuePushClosure(queue, (StgClosure*)p0, NULL);
+
+ // INVARIANT: A non-moving object. Locked (below).
+ StgClosure *p = (StgClosure*)p0;
+
+ // Chain of non-moving selectors to update. These will be INDs to `p` when
+ // we reach to a value. INVARIANT: All objects in the chain are locked, and
+ // in the non-moving heap.
+ StgClosure *chain = NULL;
+
+ // Variables to update: p.
+selector_changed:
+ ;
+
+ // debugBelch("Selector changed: %p\n", (void*)p);
+
+ // Lock the selector to avoid concurrent modification in mutators
+ const StgInfoTable *selector_info_ptr = lockClosure((StgClosure*)p);
+ StgInfoTable *selector_info_tbl = INFO_PTR_TO_STRUCT(selector_info_ptr);
+
+ if (selector_info_tbl->type != THUNK_SELECTOR) {
+ // Selector updated in the meantime, or we reached to a value. Update
+ // the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // The closure is locked and it's a selector thunk. If the selectee is a
+ // CONSTR we do the selection here and the In the selected value will be the
+ // value of this selector thunk.
+ //
+ // Two cases:
+ //
+ // - If the selected value is also a selector thunk, then we loop and
+ // evaluate it. The final value will be the value of both the current
+ // selector and the selected value (which is also a selector thunk).
+ //
+ // - If the selectee is a selector thunk, we recursively evaluate it (up to
+ // a certain depth, specified with MAX_THUNK_SELECTOR_DEPTH), then do the
+ // selection on the value of it.
+
+ //
+ // Do the selection
+ //
+
+ uint32_t field = selector_info_tbl->layout.selector_offset;
+ StgClosure *selectee = UNTAG_CLOSURE(((StgSelector*)p)->selectee);
+
+ // Variables to update: selectee
+selectee_changed:
+ // debugBelch("Selectee changed: %p\n", (void*)selectee);
+
+ if (!isNonmovingClosure(selectee)) {
+ // The selectee is a moving object, and it may be moved by a concurrent
+ // minor GC while we read the info table and fields, so don't try to
+ // read the fields, just update the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Selectee is a non-moving object, mark it.
+ markQueuePushClosure(queue, selectee, NULL);
+
+ const StgInfoTable *selectee_info_tbl = get_itbl(selectee);
+ switch (selectee_info_tbl->type) {
+ case WHITEHOLE: {
+ // Probably a loop. Abort.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF: {
+ // Selectee is a constructor in the non-moving heap.
+ // Select the field.
+
+ // Check that the size is in range.
+ ASSERT(field < (StgWord32)(selectee_info_tbl->layout.payload.ptrs +
+ selectee_info_tbl->layout.payload.nptrs));
+
+ StgClosure *val = UNTAG_CLOSURE(selectee->payload[field]);
+
+ // `val` is the value of this selector thunk. We need to check a
+ // few cases:
+ //
+ // - If `val` is in the moving heap, we stop here and update the
+ // chain. All updated objects should be added to the mut_list.
+ // (TODO (osa): What happens if the value is evacuated as we do
+ // this?)
+ //
+ // - If `val` is in the non-moving heap, we check if it's also a
+ // selector. If it is we add it to the chain and loop.
+
+ // Follow indirections. Variables to update: `val`.
+ val_changed:
+ if (!isNonmovingClosure(val)) {
+ // The selected value is a moving object, so we won't be
+ // updating the chain to this object.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ switch (get_itbl(val)->type) {
+ case IND:
+ case IND_STATIC:
+ ;
+ // Follow the indirection
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ val = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ goto val_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ case THUNK_SELECTOR:
+ // Value of the selector thunk is again a selector thunk in the
+ // non-moving heap. Add the current selector to the chain and
+ // loop.
+ p->payload[0] = chain;
+ chain = p;
+ p = val;
+ goto selector_changed;
+ default:
+ // Found a value, add the current selector to the chain and
+ // update it.
+ p->payload[0] = chain;
+ chain = p;
+ update_selector_chain(chain, origin, p0, val);
+ return val;
+ }
+ }
+
+ case IND:
+ case IND_STATIC: {
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd *)selectee)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ selectee = indirectee;
+ goto selectee_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ }
+
+ case BLACKHOLE: {
+ StgClosure *indirectee = ((StgInd*)selectee)->indirectee;
+
+ if (!isNonmovingClosure(UNTAG_CLOSURE(indirectee))) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Establish whether this BH has been updated, and is now an
+ // indirection, as in evacuate().
+ if (GET_CLOSURE_TAG(indirectee) == 0) {
+ const StgInfoTable *i = indirectee->header.info;
+ if (i == &stg_TSO_info
+ || i == &stg_WHITEHOLE_info
+ || i == &stg_BLOCKING_QUEUE_CLEAN_info
+ || i == &stg_BLOCKING_QUEUE_DIRTY_info) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ ASSERT(i != &stg_IND_info); // TODO not sure about this part
+ }
+
+ // It's an indirection, follow it.
+ selectee = UNTAG_CLOSURE(indirectee);
+ goto selectee_changed;
+ }
+
+ case AP:
+ case AP_STACK:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_STATIC: {
+ // Not evaluated yet
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+
+ case THUNK_SELECTOR: {
+ // Selectee is a selector thunk. Evaluate it if we haven't reached
+ // the recursion limit yet.
+ if (depth < MAX_THUNK_SELECTOR_DEPTH) {
+ StgClosure *new_selectee =
+ UNTAG_CLOSURE(nonmoving_eval_thunk_selector_(
+ queue, (StgSelector*)selectee, NULL, depth+1));
+ ASSERT(isNonmovingClosure(new_selectee));
+ if (selectee == new_selectee) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ } else {
+ selectee = new_selectee;
+ goto selectee_changed;
+ }
+ } else {
+ // Recursion limit reached
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ }
+
+ default: {
+ barf("nonmoving_eval_thunk_selector: strange selectee %d",
+ (int)(selectee_info_tbl->type));
+ }
+ }
+}
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin)
+{
+ nonmoving_eval_thunk_selector_(queue, p, origin, 0);
+}
diff --git a/rts/sm/NonMovingShortcut.h b/rts/sm/NonMovingShortcut.h
new file mode 100644
index 0000000000..72297884aa
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "BeginPrivate.h"
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin);
+
+#include "EndPrivate.h"
diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c
new file mode 100644
index 0000000000..cf5fcd70d7
--- /dev/null
+++ b/rts/sm/NonMovingSweep.c
@@ -0,0 +1,402 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Sweep phase
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "NonMovingSweep.h"
+#include "NonMoving.h"
+#include "NonMovingMark.h" // for nonmovingIsAlive
+#include "Capability.h"
+#include "GCThread.h" // for GCUtils.h
+#include "GCUtils.h"
+#include "Storage.h"
+#include "Trace.h"
+#include "StableName.h"
+#include "CNF.h" // compactFree
+
+// On which list should a particular segment be placed?
+enum SweepResult {
+ SEGMENT_FREE, // segment is empty: place on free list
+ SEGMENT_PARTIAL, // segment is partially filled: place on active list
+ SEGMENT_FILLED // segment is full: place on filled list
+};
+
+// Determine which list a marked segment should be placed on and initialize
+// next_free indices as appropriate.
+GNUC_ATTR_HOT static enum SweepResult
+nonmovingSweepSegment(struct NonmovingSegment *seg)
+{
+ bool found_free = false;
+ bool found_live = false;
+
+ for (nonmoving_block_idx i = 0;
+ i < nonmovingSegmentBlockCount(seg);
+ ++i)
+ {
+ if (seg->bitmap[i] == nonmovingMarkEpoch) {
+ found_live = true;
+ } else if (!found_free) {
+ found_free = true;
+ seg->next_free = i;
+ nonmovingSegmentInfo(seg)->next_free_snap = i;
+ Bdescr((P_)seg)->u.scan = (P_)nonmovingSegmentGetBlock(seg, i);
+ seg->bitmap[i] = 0;
+ } else {
+ seg->bitmap[i] = 0;
+ }
+
+ if (found_free && found_live) {
+ // zero the remaining dead object's mark bits
+ for (; i < nonmovingSegmentBlockCount(seg); ++i) {
+ if (seg->bitmap[i] != nonmovingMarkEpoch) {
+ seg->bitmap[i] = 0;
+ }
+ }
+ return SEGMENT_PARTIAL;
+ }
+ }
+
+ if (found_live) {
+ return SEGMENT_FILLED;
+ } else {
+ ASSERT(seg->next_free == 0);
+ ASSERT(nonmovingSegmentInfo(seg)->next_free_snap == 0);
+ return SEGMENT_FREE;
+ }
+}
+
+#if defined(DEBUG)
+
+void nonmovingGcCafs()
+{
+ uint32_t i = 0;
+ StgIndStatic *next;
+
+ for (StgIndStatic *caf = debug_caf_list_snapshot;
+ caf != (StgIndStatic*) END_OF_CAF_LIST;
+ caf = next)
+ {
+ next = (StgIndStatic*)caf->saved_info;
+
+ const StgInfoTable *info = get_itbl((StgClosure*)caf);
+ ASSERT(info->type == IND_STATIC);
+
+ StgWord flag = ((StgWord) caf->static_link) & STATIC_BITS;
+ if (flag != 0 && flag != static_flag) {
+ debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", caf);
+ SET_INFO((StgClosure*)caf, &stg_GCD_CAF_info); // stub it
+ } else {
+ // CAF is alive, move it back to the debug_caf_list
+ ++i;
+ debugTrace(DEBUG_gccafs, "CAF alive at 0x%p", caf);
+ ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
+ caf->saved_info = (const StgInfoTable*)debug_caf_list;
+ debug_caf_list = caf;
+ RELEASE_SM_LOCK;
+ }
+ }
+
+ debugTrace(DEBUG_gccafs, "%d CAFs live", i);
+ debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
+}
+
+static void
+clear_segment(struct NonmovingSegment* seg)
+{
+ size_t end = ((size_t)seg) + NONMOVING_SEGMENT_SIZE;
+ memset(&seg->bitmap, 0, end - (size_t)&seg->bitmap);
+}
+
+static void
+clear_segment_free_blocks(struct NonmovingSegment* seg)
+{
+ unsigned int block_size = nonmovingSegmentBlockSize(seg);
+ for (unsigned int p_idx = 0; p_idx < nonmovingSegmentBlockCount(seg); ++p_idx) {
+ // after mark, so bit not set == dead
+ if (nonmovingGetMark(seg, p_idx) == 0) {
+ memset(nonmovingSegmentGetBlock(seg, p_idx), 0, block_size);
+ }
+ }
+}
+
+#endif
+
+GNUC_ATTR_HOT void nonmovingSweep(void)
+{
+ while (nonmovingHeap.sweep_list) {
+ struct NonmovingSegment *seg = nonmovingHeap.sweep_list;
+
+ // Pushing the segment to one of the free/active/filled segments
+ // updates the link field, so update sweep_list here
+ nonmovingHeap.sweep_list = seg->link;
+
+ enum SweepResult ret = nonmovingSweepSegment(seg);
+
+ switch (ret) {
+ case SEGMENT_FREE:
+ IF_DEBUG(sanity, clear_segment(seg));
+ nonmovingPushFreeSegment(seg);
+ break;
+ case SEGMENT_PARTIAL:
+ IF_DEBUG(sanity, clear_segment_free_blocks(seg));
+ nonmovingPushActiveSegment(seg);
+ break;
+ case SEGMENT_FILLED:
+ nonmovingPushFilledSegment(seg);
+ break;
+ default:
+ barf("nonmovingSweep: weird sweep return: %d\n", ret);
+ }
+ }
+}
+
+/* Must a closure remain on the mutable list?
+ *
+ * A closure must remain if any of the following applies:
+ *
+ * 1. it contains references to a younger generation
+ * 2. it's a mutable closure (e.g. mutable array or MUT_PRIM)
+ */
+static bool is_closure_clean(StgClosure *p)
+{
+ const StgInfoTable *info = get_itbl(p);
+
+#define CLEAN(ptr) (!HEAP_ALLOCED((StgClosure*) ptr) || Bdescr((StgPtr) ptr)->gen == oldest_gen)
+
+ switch (info->type) {
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ if (!CLEAN(mvar->head)) goto dirty_MVAR;
+ if (!CLEAN(mvar->tail)) goto dirty_MVAR;
+ if (!CLEAN(mvar->value)) goto dirty_MVAR;
+ mvar->header.info = &stg_MVAR_CLEAN_info;
+ return true;
+
+dirty_MVAR:
+ mvar->header.info = &stg_MVAR_DIRTY_info;
+ return false;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *)p);
+ if (!CLEAN(tvar->current_value)) goto dirty_TVAR;
+ if (!CLEAN(tvar->first_watch_queue_entry)) goto dirty_TVAR;
+ tvar->header.info = &stg_TVAR_CLEAN_info;
+ return true;
+
+dirty_TVAR:
+ tvar->header.info = &stg_TVAR_DIRTY_info;
+ return false;
+ }
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ {
+ StgPtr end = (StgPtr)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (StgPtr q = (StgPtr)((StgThunk *)p)->payload; q < end; q++) {
+ if (!CLEAN(*q)) return false;
+ }
+ return true;
+ }
+
+ case FUN:
+ case FUN_1_0: // hardly worth specialising these guys
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case CONSTR:
+ case CONSTR_NOCAF:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ case PRIM:
+ {
+ StgPtr end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (StgPtr q = (StgPtr)((StgClosure *)p)->payload; q < end; q++) {
+ if (!CLEAN(*q)) return false;
+ }
+ return true;
+ }
+
+ case WEAK:
+ return false; // TODO
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ if (!CLEAN(((StgMutVar *)p)->var)) {
+ p->header.info = &stg_MUT_VAR_DIRTY_info;
+ return false;
+ } else {
+ p->header.info = &stg_MUT_VAR_CLEAN_info;
+ return true;
+ }
+
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)p;
+
+ if (!CLEAN(bq->bh)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->owner)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->queue)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->link)) goto dirty_BLOCKING_QUEUE;
+ bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ return true;
+
+dirty_BLOCKING_QUEUE:
+ bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ return false;
+ }
+
+ case THUNK_SELECTOR:
+ return CLEAN(((StgSelector *) p)->selectee);
+
+ case ARR_WORDS:
+ return true;
+
+ default:
+ // TODO: the rest
+ return false;
+ }
+#undef CLEAN
+}
+
+/* N.B. This happens during the pause so we own all capabilities. */
+void nonmovingSweepMutLists()
+{
+ for (uint32_t n = 0; n < n_capabilities; n++) {
+ Capability *cap = capabilities[n];
+ bdescr *old_mut_list = cap->mut_lists[oldest_gen->no];
+ cap->mut_lists[oldest_gen->no] = allocBlockOnNode_sync(cap->node);
+ for (bdescr *bd = old_mut_list; bd; bd = bd->link) {
+ for (StgPtr p = bd->start; p < bd->free; p++) {
+ StgClosure **q = (StgClosure**)p;
+ if (nonmovingIsAlive(*q) && !is_closure_clean(*q)) {
+ recordMutableCap(*q, cap, oldest_gen->no);
+ }
+ }
+ }
+ freeChain_lock(old_mut_list);
+ }
+}
+
+/* A variant of freeChain_lock that will only hold the lock for at most max_dur
+ * freed blocks to ensure that we don't starve other lock users (e.g. the
+ * mutator).
+ */
+static void freeChain_lock_max(bdescr *bd, int max_dur)
+{
+ ACQUIRE_SM_LOCK;
+ bdescr *next_bd;
+ int i = 0;
+ while (bd != NULL) {
+ next_bd = bd->link;
+ freeGroup(bd);
+ bd = next_bd;
+ if (i == max_dur) {
+ RELEASE_SM_LOCK;
+ yieldThread();
+ ACQUIRE_SM_LOCK;
+ i = 0;
+ }
+ i++;
+ }
+ RELEASE_SM_LOCK;
+}
+
+void nonmovingSweepLargeObjects()
+{
+ freeChain_lock_max(nonmoving_large_objects, 10000);
+ nonmoving_large_objects = nonmoving_marked_large_objects;
+ n_nonmoving_large_blocks = n_nonmoving_marked_large_blocks;
+ nonmoving_marked_large_objects = NULL;
+ n_nonmoving_marked_large_blocks = 0;
+}
+
+void nonmovingSweepCompactObjects()
+{
+ bdescr *next;
+ ACQUIRE_SM_LOCK;
+ for (bdescr *bd = nonmoving_compact_objects; bd; bd = next) {
+ next = bd->link;
+ compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
+ }
+ RELEASE_SM_LOCK;
+ nonmoving_compact_objects = nonmoving_marked_compact_objects;
+ n_nonmoving_compact_blocks = n_nonmoving_marked_compact_blocks;
+ nonmoving_marked_compact_objects = NULL;
+ n_nonmoving_marked_compact_blocks = 0;
+}
+
+// Helper for nonmovingSweepStableNameTable. Essentially nonmovingIsAlive,
+// but works when the object died in moving heap, see
+// nonmovingSweepStableNameTable
+static bool is_alive(StgClosure *p)
+{
+ if (!HEAP_ALLOCED_GC(p)) {
+ return true;
+ }
+
+ if (nonmovingClosureBeingSwept(p)) {
+ return nonmovingIsAlive(p);
+ } else {
+ // We don't want to sweep any stable names which weren't in the
+ // set of segments that we swept.
+ // See Note [Sweeping stable names in the concurrent collector]
+ return true;
+ }
+}
+
+void nonmovingSweepStableNameTable()
+{
+ // See comments in gcStableTables
+
+ /* Note [Sweeping stable names in the concurrent collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * When collecting concurrently we need to take care to avoid freeing
+ * stable names the we didn't sweep this collection cycle. For instance,
+ * consider the following situation:
+ *
+ * 1. We take a snapshot and start collection
+ * 2. A mutator allocates a new object, then makes a stable name for it
+ * 3. The mutator performs a minor GC and promotes the new object to the nonmoving heap
+ * 4. The GC thread gets to the sweep phase and, when traversing the stable
+ * name table, finds the new object unmarked. It then assumes that the
+ * object is dead and removes the stable name from the stable name table.
+ *
+ */
+
+ // FIXME: We can't use nonmovingIsAlive here without first using isAlive:
+ // a stable name can die during moving heap collection and we can't use
+ // nonmovingIsAlive on those objects. Inefficient.
+
+ stableNameLock();
+ FOR_EACH_STABLE_NAME(
+ p, {
+ if (p->sn_obj != NULL) {
+ if (!is_alive((StgClosure*)p->sn_obj)) {
+ p->sn_obj = NULL; // Just to make an assertion happy
+ freeSnEntry(p);
+ } else if (p->addr != NULL) {
+ if (!is_alive((StgClosure*)p->addr)) {
+ p->addr = NULL;
+ }
+ }
+ }
+ });
+ stableNameUnlock();
+}
diff --git a/rts/sm/NonMovingSweep.h b/rts/sm/NonMovingSweep.h
new file mode 100644
index 0000000000..24e9eccd5e
--- /dev/null
+++ b/rts/sm/NonMovingSweep.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Sweep phase
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "NonMoving.h"
+#include "Hash.h"
+
+GNUC_ATTR_HOT void nonmovingSweep(void);
+
+// Remove unmarked entries in oldest generation mut_lists
+void nonmovingSweepMutLists(void);
+
+// Remove unmarked entries in oldest generation scavenged_large_objects list
+void nonmovingSweepLargeObjects(void);
+
+// Remove unmarked entries in oldest generation compact_objects list
+void nonmovingSweepCompactObjects(void);
+
+// Remove dead entries in the stable name table
+void nonmovingSweepStableNameTable(void);
+
+#if defined(DEBUG)
+// The non-moving equivalent of the moving collector's gcCAFs.
+void nonmovingGcCafs(void);
+#endif
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 3585bd93b4..23f0fc57b4 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -29,6 +29,8 @@
#include "Arena.h"
#include "RetainerProfile.h"
#include "CNF.h"
+#include "sm/NonMoving.h"
+#include "sm/NonMovingMark.h"
#include "Profiling.h" // prof_arena
/* -----------------------------------------------------------------------------
@@ -40,6 +42,9 @@ static void checkLargeBitmap ( StgPtr payload, StgLargeBitmap*, uint32_t );
static void checkClosureShallow ( const StgClosure * );
static void checkSTACK (StgStack *stack);
+static W_ countNonMovingSegments ( struct NonmovingSegment *segs );
+static W_ countNonMovingHeap ( struct NonmovingHeap *heap );
+
/* -----------------------------------------------------------------------------
Check stack sanity
-------------------------------------------------------------------------- */
@@ -478,6 +483,41 @@ void checkHeapChain (bdescr *bd)
}
}
+/* -----------------------------------------------------------------------------
+ * Check nonmoving heap sanity
+ *
+ * After a concurrent sweep the nonmoving heap can be checked for validity.
+ * -------------------------------------------------------------------------- */
+
+static void checkNonmovingSegments (struct NonmovingSegment *seg)
+{
+ while (seg != NULL) {
+ const nonmoving_block_idx count = nonmovingSegmentBlockCount(seg);
+ for (nonmoving_block_idx i=0; i < count; i++) {
+ if (seg->bitmap[i] == nonmovingMarkEpoch) {
+ StgPtr p = nonmovingSegmentGetBlock(seg, i);
+ checkClosure((StgClosure *) p);
+ } else if (i < nonmovingSegmentInfo(seg)->next_free_snap){
+ seg->bitmap[i] = 0;
+ }
+ }
+ seg = seg->link;
+ }
+}
+
+void checkNonmovingHeap (const struct NonmovingHeap *heap)
+{
+ for (unsigned int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ const struct NonmovingAllocator *alloc = heap->allocators[i];
+ checkNonmovingSegments(alloc->filled);
+ checkNonmovingSegments(alloc->active);
+ for (unsigned int cap=0; cap < n_capabilities; cap++) {
+ checkNonmovingSegments(alloc->current[cap]);
+ }
+ }
+}
+
+
void
checkHeapChunk(StgPtr start, StgPtr end)
{
@@ -632,9 +672,9 @@ checkGlobalTSOList (bool checkTSOs)
stack = tso->stackobj;
while (1) {
- if (stack->dirty & 1) {
- ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & TSO_MARKED));
- stack->dirty &= ~TSO_MARKED;
+ if (stack->dirty & STACK_DIRTY) {
+ ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & STACK_SANE));
+ stack->dirty &= ~STACK_SANE;
}
frame = (StgUnderflowFrame*) (stack->stack + stack->stack_size
- sizeofW(StgUnderflowFrame));
@@ -669,7 +709,7 @@ checkMutableList( bdescr *mut_bd, uint32_t gen )
((StgTSO *)p)->flags |= TSO_MARKED;
break;
case STACK:
- ((StgStack *)p)->dirty |= TSO_MARKED;
+ ((StgStack *)p)->dirty |= STACK_SANE;
break;
}
}
@@ -766,16 +806,42 @@ static void checkGeneration (generation *gen,
uint32_t n;
gen_workspace *ws;
- ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+ //ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
#if defined(THREADED_RTS)
- // heap sanity checking doesn't work with SMP, because we can't
- // zero the slop (see Updates.h). However, we can sanity-check
- // the heap after a major gc, because there is no slop.
+ // heap sanity checking doesn't work with SMP for two reasons:
+ // * we can't zero the slop (see Updates.h). However, we can sanity-check
+ // the heap after a major gc, because there is no slop.
+ //
+ // * the nonmoving collector may be mutating its large object lists, unless we
+ // were in fact called by the nonmoving collector.
if (!after_major_gc) return;
#endif
+ if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) {
+ ASSERT(countNonMovingSegments(nonmovingHeap.free) == (W_) nonmovingHeap.n_free * NONMOVING_SEGMENT_BLOCKS);
+ ASSERT(countBlocks(nonmoving_large_objects) == n_nonmoving_large_blocks);
+ ASSERT(countBlocks(nonmoving_marked_large_objects) == n_nonmoving_marked_large_blocks);
+
+ // Compact regions
+ // Accounting here is tricky due to the fact that the CNF allocation
+ // code modifies generation->n_compact_blocks directly. However, most
+ // objects being swept by the nonmoving GC are tracked in
+ // nonmoving_*_compact_objects. Consequently we can only maintain a very loose
+ // sanity invariant here.
+ uint32_t counted_cnf_blocks = 0;
+ counted_cnf_blocks += countCompactBlocks(nonmoving_marked_compact_objects);
+ counted_cnf_blocks += countCompactBlocks(nonmoving_compact_objects);
+ counted_cnf_blocks += countCompactBlocks(oldest_gen->compact_objects);
+
+ uint32_t total_cnf_blocks = 0;
+ total_cnf_blocks += n_nonmoving_compact_blocks + oldest_gen->n_compact_blocks;
+ total_cnf_blocks += n_nonmoving_marked_compact_blocks;
+
+ ASSERT(counted_cnf_blocks == total_cnf_blocks);
+ }
+
checkHeapChain(gen->blocks);
for (n = 0; n < n_capabilities; n++) {
@@ -824,6 +890,15 @@ markCompactBlocks(bdescr *bd)
}
}
+static void
+markNonMovingSegments(struct NonmovingSegment *seg)
+{
+ while (seg) {
+ markBlocks(Bdescr((P_)seg));
+ seg = seg->link;
+ }
+}
+
// If memInventory() calculates that we have a memory leak, this
// function will try to find the block(s) that are leaking by marking
// all the ones that we know about, and search through memory to find
@@ -834,7 +909,7 @@ markCompactBlocks(bdescr *bd)
static void
findMemoryLeak (void)
{
- uint32_t g, i;
+ uint32_t g, i, j;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (i = 0; i < n_capabilities; i++) {
markBlocks(capabilities[i]->mut_lists[g]);
@@ -854,6 +929,27 @@ findMemoryLeak (void)
for (i = 0; i < n_capabilities; i++) {
markBlocks(gc_threads[i]->free_blocks);
markBlocks(capabilities[i]->pinned_object_block);
+ markBlocks(capabilities[i]->upd_rem_set.queue.blocks);
+ }
+
+ if (RtsFlags.GcFlags.useNonmoving) {
+ markBlocks(upd_rem_set_block_list);
+ markBlocks(nonmoving_large_objects);
+ markBlocks(nonmoving_marked_large_objects);
+ markBlocks(nonmoving_compact_objects);
+ markBlocks(nonmoving_marked_compact_objects);
+ for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i];
+ markNonMovingSegments(alloc->filled);
+ markNonMovingSegments(alloc->active);
+ for (j = 0; j < n_capabilities; j++) {
+ markNonMovingSegments(alloc->current[j]);
+ }
+ }
+ markNonMovingSegments(nonmovingHeap.sweep_list);
+ markNonMovingSegments(nonmovingHeap.free);
+ if (current_mark_queue)
+ markBlocks(current_mark_queue->blocks);
}
#if defined(PROFILING)
@@ -914,14 +1010,65 @@ void findSlop(bdescr *bd)
static W_
genBlocks (generation *gen)
{
- ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+ W_ ret = 0;
+ if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) {
+ // See Note [Live data accounting in nonmoving collector].
+ ASSERT(countNonMovingHeap(&nonmovingHeap) == gen->n_blocks);
+ ret += countAllocdBlocks(nonmoving_large_objects);
+ ret += countAllocdBlocks(nonmoving_marked_large_objects);
+ ret += countAllocdCompactBlocks(nonmoving_compact_objects);
+ ret += countAllocdCompactBlocks(nonmoving_marked_compact_objects);
+ ret += countNonMovingHeap(&nonmovingHeap);
+ if (current_mark_queue)
+ ret += countBlocks(current_mark_queue->blocks);
+ } else {
+ ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+ ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks);
+ ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import);
+ ret += gen->n_blocks;
+ }
+
ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
- ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks);
- ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import);
- return gen->n_blocks + gen->n_old_blocks +
+
+ ret += gen->n_old_blocks +
countAllocdBlocks(gen->large_objects) +
countAllocdCompactBlocks(gen->compact_objects) +
countAllocdCompactBlocks(gen->compact_blocks_in_import);
+ return ret;
+}
+
+static W_
+countNonMovingSegments(struct NonmovingSegment *segs)
+{
+ W_ ret = 0;
+ while (segs) {
+ ret += countBlocks(Bdescr((P_)segs));
+ segs = segs->link;
+ }
+ return ret;
+}
+
+static W_
+countNonMovingAllocator(struct NonmovingAllocator *alloc)
+{
+ W_ ret = countNonMovingSegments(alloc->filled)
+ + countNonMovingSegments(alloc->active);
+ for (uint32_t i = 0; i < n_capabilities; ++i) {
+ ret += countNonMovingSegments(alloc->current[i]);
+ }
+ return ret;
+}
+
+static W_
+countNonMovingHeap(struct NonmovingHeap *heap)
+{
+ W_ ret = 0;
+ for (int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++) {
+ ret += countNonMovingAllocator(heap->allocators[alloc_idx]);
+ }
+ ret += countNonMovingSegments(heap->sweep_list);
+ ret += countNonMovingSegments(heap->free);
+ return ret;
}
void
@@ -929,11 +1076,20 @@ memInventory (bool show)
{
uint32_t g, i;
W_ gen_blocks[RtsFlags.GcFlags.generations];
- W_ nursery_blocks, retainer_blocks,
- arena_blocks, exec_blocks, gc_free_blocks = 0;
+ W_ nursery_blocks = 0, retainer_blocks = 0,
+ arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0,
+ upd_rem_set_blocks = 0;
W_ live_blocks = 0, free_blocks = 0;
bool leak;
+#if defined(THREADED_RTS)
+ // Can't easily do a memory inventory: We might race with the nonmoving
+ // collector. In principle we could try to take nonmoving_collection_mutex
+ // and do an inventory if we have it but we don't currently implement this.
+ if (RtsFlags.GcFlags.useNonmoving)
+ return;
+#endif
+
// count the blocks we current have
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
@@ -947,20 +1103,19 @@ memInventory (bool show)
gen_blocks[g] += genBlocks(&generations[g]);
}
- nursery_blocks = 0;
for (i = 0; i < n_nurseries; i++) {
ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
nursery_blocks += nurseries[i].n_blocks;
}
for (i = 0; i < n_capabilities; i++) {
- gc_free_blocks += countBlocks(gc_threads[i]->free_blocks);
+ W_ n = countBlocks(gc_threads[i]->free_blocks);
+ gc_free_blocks += n;
if (capabilities[i]->pinned_object_block != NULL) {
nursery_blocks += capabilities[i]->pinned_object_block->blocks;
}
nursery_blocks += countBlocks(capabilities[i]->pinned_object_blocks);
}
- retainer_blocks = 0;
#if defined(PROFILING)
if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
retainer_blocks = retainerStackBlocks();
@@ -976,12 +1131,19 @@ memInventory (bool show)
/* count the blocks on the free list */
free_blocks = countFreeList();
+ // count UpdRemSet blocks
+ for (i = 0; i < n_capabilities; ++i) {
+ upd_rem_set_blocks += countBlocks(capabilities[i]->upd_rem_set.queue.blocks);
+ }
+ upd_rem_set_blocks += countBlocks(upd_rem_set_block_list);
+
live_blocks = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
live_blocks += gen_blocks[g];
}
live_blocks += nursery_blocks +
- + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks;
+ + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks
+ + upd_rem_set_blocks;
#define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
@@ -1010,6 +1172,8 @@ memInventory (bool show)
gc_free_blocks, MB(gc_free_blocks));
debugBelch(" free : %5" FMT_Word " blocks (%6.1lf MB)\n",
free_blocks, MB(free_blocks));
+ debugBelch(" UpdRemSet : %5" FMT_Word " blocks (%6.1lf MB)\n",
+ upd_rem_set_blocks, MB(upd_rem_set_blocks));
debugBelch(" total : %5" FMT_Word " blocks (%6.1lf MB)\n",
live_blocks + free_blocks, MB(live_blocks+free_blocks));
if (leak) {
diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h
index 9227e6fd18..b6f2054383 100644
--- a/rts/sm/Sanity.h
+++ b/rts/sm/Sanity.h
@@ -31,6 +31,7 @@ void checkStaticObjects ( StgClosure* static_objects );
void checkStackChunk ( StgPtr sp, StgPtr stack_end );
StgOffset checkStackFrame ( StgPtr sp );
StgOffset checkClosure ( const StgClosure* p );
+void checkNonmovingHeap ( const struct NonmovingHeap *heap );
void checkRunQueue (Capability *cap);
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index c486cd96c5..501d958aae 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -62,8 +62,8 @@
#include "Hash.h"
#include "sm/MarkWeak.h"
-
-static void scavenge_stack (StgPtr p, StgPtr stack_end);
+#include "sm/NonMoving.h" // for nonmoving_set_closure_mark_bit
+#include "sm/NonMovingScav.h"
static void scavenge_large_bitmap (StgPtr p,
StgLargeBitmap *large_bitmap,
@@ -76,6 +76,15 @@ static void scavenge_large_bitmap (StgPtr p,
# define scavenge_block(a) scavenge_block1(a)
# define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g)
# define scavenge_capability_mut_lists(cap) scavenge_capability_mut_Lists1(cap)
+# define scavengeTSO(tso) scavengeTSO1(tso)
+# define scavenge_stack(p, stack_end) scavenge_stack1(p, stack_end)
+# define scavenge_fun_srt(info) scavenge_fun_srt1(info)
+# define scavenge_fun_srt(info) scavenge_fun_srt1(info)
+# define scavenge_thunk_srt(info) scavenge_thunk_srt1(info)
+# define scavenge_mut_arr_ptrs(info) scavenge_mut_arr_ptrs1(info)
+# define scavenge_PAP(pap) scavenge_PAP1(pap)
+# define scavenge_AP(ap) scavenge_AP1(ap)
+# define scavenge_compact(str) scavenge_compact1(str)
#endif
static void do_evacuate(StgClosure **p, void *user STG_UNUSED)
@@ -87,7 +96,7 @@ static void do_evacuate(StgClosure **p, void *user STG_UNUSED)
Scavenge a TSO.
-------------------------------------------------------------------------- */
-static void
+void
scavengeTSO (StgTSO *tso)
{
bool saved_eager;
@@ -165,7 +174,10 @@ evacuate_hash_entry(MapHashData *dat, StgWord key, const void *value)
SET_GCT(old_gct);
}
-static void
+/* Here we scavenge the sharing-preservation hash-table, which may contain keys
+ * living in from-space.
+ */
+void
scavenge_compact(StgCompactNFData *str)
{
bool saved_eager;
@@ -198,7 +210,7 @@ scavenge_compact(StgCompactNFData *str)
Mutable arrays of pointers
-------------------------------------------------------------------------- */
-static StgPtr scavenge_mut_arr_ptrs (StgMutArrPtrs *a)
+StgPtr scavenge_mut_arr_ptrs (StgMutArrPtrs *a)
{
W_ m;
bool any_failed;
@@ -348,14 +360,14 @@ scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
return p;
}
-STATIC_INLINE GNUC_ATTR_HOT StgPtr
+GNUC_ATTR_HOT StgPtr
scavenge_PAP (StgPAP *pap)
{
evacuate(&pap->fun);
return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
}
-STATIC_INLINE StgPtr
+StgPtr
scavenge_AP (StgAP *ap)
{
evacuate(&ap->fun);
@@ -366,7 +378,7 @@ scavenge_AP (StgAP *ap)
Scavenge SRTs
-------------------------------------------------------------------------- */
-STATIC_INLINE GNUC_ATTR_HOT void
+GNUC_ATTR_HOT void
scavenge_thunk_srt(const StgInfoTable *info)
{
StgThunkInfoTable *thunk_info;
@@ -380,7 +392,7 @@ scavenge_thunk_srt(const StgInfoTable *info)
}
}
-STATIC_INLINE GNUC_ATTR_HOT void
+GNUC_ATTR_HOT void
scavenge_fun_srt(const StgInfoTable *info)
{
StgFunInfoTable *fun_info;
@@ -1570,10 +1582,10 @@ static void
scavenge_mutable_list(bdescr *bd, generation *gen)
{
StgPtr p, q;
- uint32_t gen_no;
- gen_no = gen->no;
+ uint32_t gen_no = gen->no;
gct->evac_gen_no = gen_no;
+
for (; bd != NULL; bd = bd->link) {
for (q = bd->start; q < bd->free; q++) {
p = (StgPtr)*q;
@@ -1648,7 +1660,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
;
}
- if (scavenge_one(p)) {
+ if (RtsFlags.GcFlags.useNonmoving && major_gc && gen == oldest_gen) {
+ // We can't use scavenge_one here as we need to scavenge SRTs
+ nonmovingScavengeOne((StgClosure *)p);
+ } else if (scavenge_one(p)) {
// didn't manage to promote everything, so put the
// object back on the list.
recordMutableGen_GC((StgClosure *)p,gen_no);
@@ -1660,7 +1675,14 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
void
scavenge_capability_mut_lists (Capability *cap)
{
- uint32_t g;
+ // In a major GC only nonmoving heap's mut list is root
+ if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+ uint32_t g = oldest_gen->no;
+ scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen);
+ freeChain_sync(cap->saved_mut_lists[g]);
+ cap->saved_mut_lists[g] = NULL;
+ return;
+ }
/* Mutable lists from each generation > N
* we want to *scavenge* these roots, not evacuate them: they're not
@@ -1668,7 +1690,7 @@ scavenge_capability_mut_lists (Capability *cap)
* Also do them in reverse generation order, for the usual reason:
* namely to reduce the likelihood of spurious old->new pointers.
*/
- for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+ for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) {
scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]);
freeChain_sync(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
@@ -1795,7 +1817,7 @@ scavenge_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, StgWord size )
AP_STACK_UPDs, since these are just sections of copied stack.
-------------------------------------------------------------------------- */
-static void
+void
scavenge_stack(StgPtr p, StgPtr stack_end)
{
const StgRetInfoTable* info;
@@ -2038,6 +2060,16 @@ loop:
for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
ws = &gct->gens[g];
+ if (ws->todo_seg != END_NONMOVING_TODO_LIST) {
+ struct NonmovingSegment *seg = ws->todo_seg;
+ ASSERT(seg->todo_link);
+ ws->todo_seg = seg->todo_link;
+ seg->todo_link = NULL;
+ scavengeNonmovingSegment(seg);
+ did_something = true;
+ break;
+ }
+
gct->scan_bd = NULL;
// If we have a scan block with some work to do,
diff --git a/rts/sm/Scav.h b/rts/sm/Scav.h
index 21ca691bff..94250bcf7a 100644
--- a/rts/sm/Scav.h
+++ b/rts/sm/Scav.h
@@ -17,10 +17,26 @@
void scavenge_loop (void);
void scavenge_capability_mut_lists (Capability *cap);
+void scavengeTSO (StgTSO *tso);
+void scavenge_stack (StgPtr p, StgPtr stack_end);
+void scavenge_fun_srt (const StgInfoTable *info);
+void scavenge_thunk_srt (const StgInfoTable *info);
+StgPtr scavenge_mut_arr_ptrs (StgMutArrPtrs *a);
+StgPtr scavenge_PAP (StgPAP *pap);
+StgPtr scavenge_AP (StgAP *ap);
+void scavenge_compact (StgCompactNFData *str);
#if defined(THREADED_RTS)
void scavenge_loop1 (void);
void scavenge_capability_mut_Lists1 (Capability *cap);
+void scavengeTSO1 (StgTSO *tso);
+void scavenge_stack1 (StgPtr p, StgPtr stack_end);
+void scavenge_fun_srt1 (const StgInfoTable *info);
+void scavenge_thunk_srt1 (const StgInfoTable *info);
+StgPtr scavenge_mut_arr_ptrs1 (StgMutArrPtrs *a);
+StgPtr scavenge_PAP1 (StgPAP *pap);
+StgPtr scavenge_AP1 (StgAP *ap);
+void scavenge_compact1 (StgCompactNFData *str);
#endif
#include "EndPrivate.h"
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 0130a08f7c..f04b3c5929 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -29,6 +29,7 @@
#include "Trace.h"
#include "GC.h"
#include "Evac.h"
+#include "NonMoving.h"
#if defined(ios_HOST_OS)
#include "Hash.h"
#endif
@@ -82,7 +83,7 @@ Mutex sm_mutex;
static void allocNurseries (uint32_t from, uint32_t to);
static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
-static void
+void
initGeneration (generation *gen, int g)
{
gen->no = g;
@@ -170,6 +171,18 @@ initStorage (void)
}
oldest_gen->to = oldest_gen;
+ // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
+ nonmovingInit();
+
+#if defined(THREADED_RTS)
+ // nonmovingAddCapabilities allocates segments, which requires taking the gc
+ // sync lock, so initialize it before nonmovingAddCapabilities
+ initSpinLock(&gc_alloc_block_sync);
+#endif
+
+ if (RtsFlags.GcFlags.useNonmoving)
+ nonmovingAddCapabilities(n_capabilities);
+
/* The oldest generation has one step. */
if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
if (RtsFlags.GcFlags.generations == 1) {
@@ -195,9 +208,6 @@ initStorage (void)
exec_block = NULL;
-#if defined(THREADED_RTS)
- initSpinLock(&gc_alloc_block_sync);
-#endif
N = 0;
for (n = 0; n < n_numa_nodes; n++) {
@@ -271,6 +281,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
}
}
+ // Initialize NonmovingAllocators and UpdRemSets
+ if (RtsFlags.GcFlags.useNonmoving) {
+ nonmovingAddCapabilities(to);
+ for (i = 0; i < to; ++i) {
+ init_upd_rem_set(&capabilities[i]->upd_rem_set);
+ }
+ }
+
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
newThreadLocalKey(&gctKey);
#endif
@@ -282,6 +300,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
void
exitStorage (void)
{
+ nonmovingExit();
updateNurseriesStats();
stat_exit();
}
@@ -302,7 +321,8 @@ freeStorage (bool free_heap)
}
/* -----------------------------------------------------------------------------
- Note [CAF management].
+ Note [CAF management]
+ ~~~~~~~~~~~~~~~~~~~~~
The entry code for every CAF does the following:
@@ -337,6 +357,7 @@ freeStorage (bool free_heap)
------------------
Note [atomic CAF entry]
+ ~~~~~~~~~~~~~~~~~~~~~~~
With THREADED_RTS, newCAF() is required to be atomic (see
#5558). This is because if two threads happened to enter the same
@@ -350,6 +371,7 @@ freeStorage (bool free_heap)
------------------
Note [GHCi CAFs]
+ ~~~~~~~~~~~~~~~~
For GHCI, we have additional requirements when dealing with CAFs:
@@ -369,6 +391,51 @@ freeStorage (bool free_heap)
-- SDM 29/1/01
+ ------------------
+ Note [Static objects under the nonmoving collector]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Static object management is a bit tricky under the nonmoving collector as we
+ need to maintain a bit more state than in the moving collector. In
+ particular, the moving collector uses the low bits of the STATIC_LINK field
+ to determine whether the object has been moved to the scavenger's work list
+ (see Note [STATIC_LINK fields] in Storage.h).
+
+ However, the nonmoving collector also needs a place to keep its mark bit.
+ This is problematic as we therefore need at least three bits of state
+ but can assume only two bits are available in STATIC_LINK (due to 32-bit
+ systems).
+
+ To accomodate this we move handling of static objects entirely to the
+ oldest generation when the nonmoving collector is in use. To do this safely
+ and efficiently we allocate the blackhole created by lockCAF() directly in
+ the non-moving heap. This means that the moving collector can completely
+ ignore static objects in minor collections since they are guaranteed not to
+ have any references into the moving heap. Of course, the blackhole itself
+ likely will contain a reference into the moving heap but this is
+ significantly easier to handle, being a heap-allocated object (see Note
+ [Aging under the non-moving collector] in NonMoving.c for details).
+
+ During the moving phase of a major collection we treat static objects
+ as we do any other reference into the non-moving heap by pushing them
+ to the non-moving mark queue (see Note [Aging under the non-moving
+ collector]).
+
+ This allows the non-moving collector to have full control over the flags
+ in STATIC_LINK, which it uses as described in Note [STATIC_LINK fields]).
+ This is implemented by NonMovingMark.c:bump_static_flag.
+
+ In short, the plan is:
+
+ - lockCAF allocates its blackhole in the nonmoving heap. This is important
+ to ensure that we do not need to place the static object on the mut_list
+ lest we would need somw way to ensure that it evacuate only once during
+ a moving collection.
+
+ - evacuate_static_object adds merely pushes objects to the mark queue
+
+ - the nonmoving collector uses the flags in STATIC_LINK as its mark bit.
+
-------------------------------------------------------------------------- */
STATIC_INLINE StgInd *
@@ -402,11 +469,36 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// successfully claimed by us; overwrite with IND_STATIC
#endif
+ // Push stuff that will become unreachable after updating to UpdRemSet to
+ // maintain snapshot invariant
+ const StgInfoTable *orig_info_tbl = INFO_PTR_TO_STRUCT(orig_info);
+ // OSA: Assertions to make sure my understanding of static thunks is correct
+ ASSERT(orig_info_tbl->type == THUNK_STATIC);
+ // Secondly I think static thunks can't have payload: anything that they
+ // reference should be in SRTs
+ ASSERT(orig_info_tbl->layout.payload.ptrs == 0);
+ // Becuase the payload is empty we just push the SRT
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(orig_info_tbl);
+ if (thunk_info->i.srt) {
+ updateRemembSetPushClosure(cap, GET_SRT(thunk_info));
+ }
+ }
+
// For the benefit of revertCAFs(), save the original info pointer
caf->saved_info = orig_info;
// Allocate the blackhole indirection closure
- bh = (StgInd *)allocate(cap, sizeofW(*bh));
+ if (RtsFlags.GcFlags.useNonmoving) {
+ // See Note [Static objects under the nonmoving collector].
+ ACQUIRE_SM_LOCK;
+ bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
+ RELEASE_SM_LOCK;
+ recordMutableCap((StgClosure*)bh,
+ regTableToCapability(reg), oldest_gen->no);
+ } else {
+ bh = (StgInd *)allocate(cap, sizeofW(*bh));
+ }
bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
// Ensure that above writes are visible before we introduce reference as CAF indirectee.
@@ -448,7 +540,9 @@ newCAF(StgRegTable *reg, StgIndStatic *caf)
else
{
// Put this CAF on the mutable list for the old generation.
- if (oldest_gen->no != 0) {
+ // N.B. the nonmoving collector works a bit differently: see
+ // Note [Static objects under the nonmoving collector].
+ if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
recordMutableCap((StgClosure*)caf,
regTableToCapability(reg), oldest_gen->no);
}
@@ -525,7 +619,9 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
if (!bh) return NULL;
// Put this CAF on the mutable list for the old generation.
- if (oldest_gen->no != 0) {
+ // N.B. the nonmoving collector works a bit differently:
+ // see Note [Static objects under the nonmoving collector].
+ if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
recordMutableCap((StgClosure*)caf,
regTableToCapability(reg), oldest_gen->no);
}
@@ -1073,6 +1169,27 @@ allocatePinned (Capability *cap, W_ n)
Write Barriers
-------------------------------------------------------------------------- */
+/* These write barriers on heavily mutated objects serve two purposes:
+ *
+ * - Efficient maintenance of the generational invariant: Record whether or not
+ * we have added a particular mutable object to mut_list as they may contain
+ * references to younger generations.
+ *
+ * - Maintenance of the nonmoving collector's snapshot invariant: Record objects
+ * which are about to no longer be reachable due to mutation.
+ *
+ * In each case we record whether the object has been added to the mutable list
+ * by way of either the info pointer or a dedicated "dirty" flag. The GC will
+ * clear this flag and remove the object from mut_list (or rather, not re-add it)
+ * to if it finds the object contains no references into any younger generation.
+ *
+ * Note that all dirty objects will be marked as clean during preparation for a
+ * concurrent collection. Consequently, we can use the dirtiness flag to determine
+ * whether or not we need to add overwritten pointers to the update remembered
+ * set (since we need only write the value prior to the first update to maintain
+ * the snapshot invariant).
+ */
+
/*
This is the write barrier for MUT_VARs, a.k.a. IORefs. A
MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
@@ -1080,25 +1197,39 @@ allocatePinned (Capability *cap, W_ n)
and is put on the mutable list.
*/
void
-dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
+dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mvar, StgClosure *old)
{
Capability *cap = regTableToCapability(reg);
// No barrier required here as no other heap object fields are read. See
// note [Heap memory barriers] in SMP.h.
- if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
- p->header.info = &stg_MUT_VAR_DIRTY_info;
- recordClosureMutated(cap,p);
+ if (mvar->header.info == &stg_MUT_VAR_CLEAN_info) {
+ mvar->header.info = &stg_MUT_VAR_DIRTY_info;
+ recordClosureMutated(cap, (StgClosure *) mvar);
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushClosure_(reg, old);
+ }
}
}
+/*
+ * This is the write barrier for TVARs.
+ * old is the pointer that we overwrote, which is required by the concurrent
+ * garbage collector. Note that we, while StgTVars contain multiple pointers,
+ * only overwrite one per dirty_TVAR call so we only need to take one old
+ * pointer argument.
+ */
void
-dirty_TVAR(Capability *cap, StgTVar *p)
+dirty_TVAR(Capability *cap, StgTVar *p,
+ StgClosure *old)
{
// No barrier required here as no other heap object fields are read. See
// note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_TVAR_CLEAN_info) {
p->header.info = &stg_TVAR_DIRTY_info;
recordClosureMutated(cap,(StgClosure*)p);
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushClosure(cap, old);
+ }
}
}
@@ -1113,6 +1244,9 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
if (tso->dirty == 0) {
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushClosure(cap, (StgClosure *) tso->_link);
+ }
}
tso->_link = target;
}
@@ -1123,6 +1257,9 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
if (tso->dirty == 0) {
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushClosure(cap, (StgClosure *) tso->block_info.prev);
+ }
}
tso->block_info.prev = target;
}
@@ -1134,15 +1271,49 @@ dirty_TSO (Capability *cap, StgTSO *tso)
tso->dirty = 1;
recordClosureMutated(cap,(StgClosure*)tso);
}
+
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushTSO(cap, tso);
+ }
}
void
dirty_STACK (Capability *cap, StgStack *stack)
{
- if (stack->dirty == 0) {
- stack->dirty = 1;
+ // First push to upd_rem_set before we set stack->dirty since we
+ // the nonmoving collector may already be marking the stack.
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ updateRemembSetPushStack(cap, stack);
+ }
+
+ if (! (stack->dirty & STACK_DIRTY)) {
+ stack->dirty = STACK_DIRTY;
recordClosureMutated(cap,(StgClosure*)stack);
}
+
+}
+
+/*
+ * This is the concurrent collector's write barrier for MVARs. In the other
+ * write barriers above this is folded into the dirty_* functions. However, in
+ * the case of MVars we need to separate the acts of adding the MVar to the
+ * mutable list and adding its fields to the update remembered set.
+ *
+ * Specifically, the wakeup loop in stg_putMVarzh wants to freely mutate the
+ * pointers of the MVar but needs to keep its lock, meaning we can't yet add it
+ * to the mutable list lest the assertion checking for clean MVars on the
+ * mutable list would fail.
+ */
+void
+update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val)
+{
+ Capability *cap = regTableToCapability(reg);
+ IF_NONMOVING_WRITE_BARRIER_ENABLED {
+ StgMVar *mvar = (StgMVar *) p;
+ updateRemembSetPushClosure(cap, old_val);
+ updateRemembSetPushClosure(cap, (StgClosure *) mvar->head);
+ updateRemembSetPushClosure(cap, (StgClosure *) mvar->tail);
+ }
}
/*
@@ -1154,9 +1325,11 @@ dirty_STACK (Capability *cap, StgStack *stack)
such as Chaneneos and cheap-concurrency.
*/
void
-dirty_MVAR(StgRegTable *reg, StgClosure *p)
+dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val)
{
- recordClosureMutated(regTableToCapability(reg),p);
+ Capability *cap = regTableToCapability(reg);
+ update_MVAR(reg, p, old_val);
+ recordClosureMutated(cap, p);
}
/* -----------------------------------------------------------------------------
@@ -1232,8 +1405,8 @@ W_ countOccupied (bdescr *bd)
W_ genLiveWords (generation *gen)
{
- return gen->n_words + gen->n_large_words +
- gen->n_compact_blocks * BLOCK_SIZE_W;
+ return (gen->live_estimate ? gen->live_estimate : gen->n_words) +
+ gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W;
}
W_ genLiveBlocks (generation *gen)
@@ -1289,9 +1462,9 @@ calcNeeded (bool force_major, memcount *blocks_needed)
for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
generation *gen = &generations[g];
- W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
- + gen->n_large_blocks
- + gen->n_compact_blocks;
+ W_ blocks = gen->live_estimate ? (gen->live_estimate / BLOCK_SIZE_W) : gen->n_blocks;
+ blocks += gen->n_large_blocks
+ + gen->n_compact_blocks;
// we need at least this much space
needed += blocks;
@@ -1309,7 +1482,7 @@ calcNeeded (bool force_major, memcount *blocks_needed)
// mark stack:
needed += gen->n_blocks / 100;
}
- if (gen->compact) {
+ if (gen->compact || (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
continue; // no additional space needed for compaction
} else {
needed += gen->n_blocks;
@@ -1408,6 +1581,7 @@ void flushExec (W_ len, AdjustorExecutable exec_addr)
__clear_cache((void*)begin, (void*)end);
# endif
#elif defined(__GNUC__)
+ /* For all other platforms, fall back to a libgcc builtin. */
unsigned char* begin = (unsigned char*)exec_addr;
unsigned char* end = begin + len;
# if GCC_HAS_BUILTIN_CLEAR_CACHE
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index aaa44428b3..cdb9720650 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -17,6 +17,7 @@
-------------------------------------------------------------------------- */
void initStorage(void);
+void initGeneration(generation *gen, int g);
void exitStorage(void);
void freeStorage(bool free_heap);
@@ -46,8 +47,9 @@ extern Mutex sm_mutex;
The write barrier for MVARs and TVARs
-------------------------------------------------------------------------- */
-void dirty_MVAR(StgRegTable *reg, StgClosure *p);
-void dirty_TVAR(Capability *cap, StgTVar *p);
+void update_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val);
+void dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old);
+void dirty_TVAR(Capability *cap, StgTVar *p, StgClosure *old);
/* -----------------------------------------------------------------------------
Nursery manipulation