summaryrefslogtreecommitdiff
path: root/rts/sm/GCThread.h
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/GCThread.h')
-rw-r--r--rts/sm/GCThread.h209
1 files changed, 209 insertions, 0 deletions
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
new file mode 100644
index 0000000000..bb206db64c
--- /dev/null
+++ b/rts/sm/GCThread.h
@@ -0,0 +1,209 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2008
+ *
+ * Generational garbage collector
+ *
+ * Documentation on the architecture of the Garbage Collector can be
+ * found in the online commentary:
+ *
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "WSDeque.h"
+#include "GetTime.h" // for Ticks
+
+#include "BeginPrivate.h"
+
+/* -----------------------------------------------------------------------------
+ General scheme
+
+ ToDo: move this to the wiki when the implementation is done.
+
+ We're only going to try to parallelise the copying GC for now. The
+ Plan is as follows.
+
+ Each thread has a gc_thread structure (see below) which holds its
+ thread-local data. We'll keep a pointer to this in a thread-local
+ variable, or possibly in a register.
+
+ In the gc_thread structure is a gen_workspace for each generation. The
+ primary purpose of the gen_workspace is to hold evacuated objects;
+ when an object is evacuated, it is copied to the "todo" block in
+ the thread's workspace for the appropriate generation. When the todo
+ block is full, it is pushed to the global gen->todos list, which
+ is protected by a lock. (in fact we intervene a one-place buffer
+ here to reduce contention).
+
+ A thread repeatedly grabs a block of work from one of the
+ gen->todos lists, scavenges it, and keeps the scavenged block on
+ its own ws->scavd_list (this is to avoid unnecessary contention
+ returning the completed buffers back to the generation: we can just
+ collect them all later).
+
+ When there is no global work to do, we start scavenging the todo
+ blocks in the workspaces. This is where the scan_bd field comes
+ in: we can scan the contents of the todo block, when we have
+ scavenged the contents of the todo block (up to todo_bd->free), we
+ don't want to move this block immediately to the scavd_list,
+ because it is probably only partially full. So we remember that we
+ have scanned up to this point by saving the block in ws->scan_bd,
+ with the current scan pointer in ws->scan. Later, when more
+ objects have been copied to this block, we can come back and scan
+ the rest. When we visit this workspace again in the future,
+ scan_bd may still be the same as todo_bd, or it might be different:
+ if enough objects were copied into this block that it filled up,
+ then we will have allocated a new todo block, but *not* pushed the
+ old one to the generation, because it is partially scanned.
+
+ The reason to leave scanning the todo blocks until last is that we
+ want to deal with full blocks as far as possible.
+ ------------------------------------------------------------------------- */
+
+
+/* -----------------------------------------------------------------------------
+ Generation Workspace
+
+ A generation workspace exists for each generation for each GC
+ thread. The GC thread takes a block from the todos list of the
+ generation into the scanbd and then scans it. Objects referred to
+ by those in the scan block are copied into the todo or scavd blocks
+ of the relevant generation.
+
+ ------------------------------------------------------------------------- */
+
+typedef struct gen_workspace_ {
+ generation * gen; // the gen for this workspace
+ struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
+
+ // where objects to be scavenged go
+ bdescr * todo_bd;
+ StgPtr todo_free; // free ptr for todo_bd
+ StgPtr todo_lim; // lim for todo_bd
+
+ WSDeque * todo_q;
+ bdescr * todo_overflow;
+ uint32_t n_todo_overflow;
+
+ // where large objects to be scavenged go
+ bdescr * todo_large_objects;
+
+ // Objects that have already been scavenged.
+ bdescr * scavd_list;
+ StgWord n_scavd_blocks; // count of blocks in this list
+ StgWord n_scavd_words;
+
+ // Partially-full, scavenged, blocks
+ bdescr * part_list;
+ StgWord n_part_blocks; // count of above
+ StgWord n_part_words;
+
+ StgWord pad[1];
+
+} gen_workspace ATTRIBUTE_ALIGNED(64);
+// align so that computing gct->gens[n] is a shift, not a multiply
+// fails if the size is <64, which is why we need the pad above
+
+/* ----------------------------------------------------------------------------
+ GC thread object
+
+ Every GC thread has one of these. It contains all the generation
+ specific workspaces and other GC thread local information. At some
+ later point it maybe useful to move this other into the TLS store
+ of the GC threads
+ ------------------------------------------------------------------------- */
+
+typedef struct gc_thread_ {
+ Capability *cap;
+
+#if defined(THREADED_RTS)
+ OSThreadId id; // The OS thread that this struct belongs to
+ SpinLock gc_spin;
+ SpinLock mut_spin;
+ volatile StgWord wakeup; // NB not StgWord8; only StgWord is guaranteed atomic
+#endif
+ uint32_t thread_index; // a zero based index identifying the thread
+
+ bdescr * free_blocks; // a buffer of free blocks for this thread
+ // during GC without accessing the block
+ // allocators spin lock.
+
+ // These two lists are chained through the STATIC_LINK() fields of static
+ // objects. Pointers are tagged with the current static_flag, so before
+ // following a pointer, untag it with UNTAG_STATIC_LIST_PTR().
+ StgClosure* static_objects; // live static objects
+ StgClosure* scavenged_static_objects; // static objects scavenged so far
+
+ W_ gc_count; // number of GCs this thread has done
+
+ // block that is currently being scanned
+ bdescr * scan_bd;
+
+ // Remembered sets on this CPU. Each GC thread has its own
+ // private per-generation remembered sets, so it can add an item
+ // to the remembered set without taking a lock. The mut_lists
+ // array on a gc_thread is the same as the one on the
+ // corresponding Capability; we stash it here too for easy access
+ // during GC; see recordMutableGen_GC().
+ bdescr ** mut_lists;
+
+ // --------------------
+ // evacuate flags
+
+ uint32_t evac_gen_no; // Youngest generation that objects
+ // should be evacuated to in
+ // evacuate(). (Logically an
+ // argument to evacuate, but it's
+ // static a lot of the time so we
+ // optimise it into a per-thread
+ // variable).
+
+ bool failed_to_evac; // failure to evacuate an object typically
+ // Causes it to be recorded in the mutable
+ // object list
+
+ bool eager_promotion; // forces promotion to the evac gen
+ // instead of the to-space
+ // corresponding to the object
+
+ W_ thunk_selector_depth; // used to avoid unbounded recursion in
+ // evacuate() for THUNK_SELECTOR
+
+ // -------------------
+ // stats
+
+ W_ copied;
+ W_ scanned;
+ W_ any_work;
+ W_ no_work;
+ W_ scav_find_work;
+
+ Time gc_start_cpu; // process CPU time
+ Time gc_sync_start_elapsed; // start of GC sync
+ Time gc_start_elapsed; // process elapsed time
+ W_ gc_start_faults;
+
+ // -------------------
+ // workspaces
+
+ // array of workspaces, indexed by gen->abs_no. This is placed
+ // directly at the end of the gc_thread structure so that we can get from
+ // the gc_thread pointer to a workspace using only pointer
+ // arithmetic, no memory access. This happens in the inner loop
+ // of the GC, see Evac.c:alloc_for_copy().
+ gen_workspace gens[];
+} gc_thread;
+
+
+extern uint32_t n_gc_threads;
+
+extern gc_thread **gc_threads;
+
+#if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR)
+extern ThreadLocalKey gctKey;
+#endif
+
+#include "EndPrivate.h"