1 files changed, 209 insertions, 0 deletions
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
new file mode 100644
index 0000000000..bb206db64c
--- /dev/null
+++ b/rts/sm/GCThread.h
@@ -0,0 +1,209 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2008
+ *
+ * Generational garbage collector
+ *
+ * Documentation on the architecture of the Garbage Collector can be
+ * found in the online commentary:
+ *
+ *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "WSDeque.h"
+#include "GetTime.h" // for Ticks
+
+#include "BeginPrivate.h"
+
+/* -----------------------------------------------------------------------------
+   General scheme
+
+   ToDo: move this to the wiki when the implementation is done.
+
+   We're only going to try to parallelise the copying GC for now.  The
+   Plan is as follows.
+
+   Each thread has a gc_thread structure (see below) which holds its
+   thread-local data.  We'll keep a pointer to this in a thread-local
+   variable, or possibly in a register.
+
+   In the gc_thread structure is a gen_workspace for each generation.  The
+   primary purpose of the gen_workspace is to hold evacuated objects;
+   when an object is evacuated, it is copied to the "todo" block in
+   the thread's workspace for the appropriate generation.  When the todo
+   block is full, it is pushed to the global gen->todos list, which
+   is protected by a lock.  (in fact we intervene a one-place buffer
+   here to reduce contention).
+
+   A thread repeatedly grabs a block of work from one of the
+   gen->todos lists, scavenges it, and keeps the scavenged block on
+   its own ws->scavd_list (this is to avoid unnecessary contention
+   returning the completed buffers back to the generation: we can just
+   collect them all later).
+
+   When there is no global work to do, we start scavenging the todo
+   blocks in the workspaces.  This is where the scan_bd field comes
+   in: we can scan the contents of the todo block, when we have
+   scavenged the contents of the todo block (up to todo_bd->free), we
+   don't want to move this block immediately to the scavd_list,
+   because it is probably only partially full.  So we remember that we
+   have scanned up to this point by saving the block in ws->scan_bd,
+   with the current scan pointer in ws->scan.  Later, when more
+   objects have been copied to this block, we can come back and scan
+   the rest.  When we visit this workspace again in the future,
+   scan_bd may still be the same as todo_bd, or it might be different:
+   if enough objects were copied into this block that it filled up,
+   then we will have allocated a new todo block, but *not* pushed the
+   old one to the generation, because it is partially scanned.
+
+   The reason to leave scanning the todo blocks until last is that we
+   want to deal with full blocks as far as possible.
+   ------------------------------------------------------------------------- */
+
+
+/* -----------------------------------------------------------------------------
+   Generation Workspace
+
+   A generation workspace exists for each generation for each GC
+   thread. The GC thread takes a block from the todos list of the
+   generation into the scanbd and then scans it.  Objects referred to
+   by those in the scan block are copied into the todo or scavd blocks
+   of the relevant generation.
+
+   ------------------------------------------------------------------------- */
+
+typedef struct gen_workspace_ {
+    generation * gen;           // the gen for this workspace
+    struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
+
+    // where objects to be scavenged go
+    bdescr *     todo_bd;
+    StgPtr       todo_free;            // free ptr for todo_bd
+    StgPtr       todo_lim;             // lim for todo_bd
+
+    WSDeque *    todo_q;
+    bdescr *     todo_overflow;
+    uint32_t     n_todo_overflow;
+
+    // where large objects to be scavenged go
+    bdescr *     todo_large_objects;
+
+    // Objects that have already been scavenged.
+    bdescr *     scavd_list;
+    StgWord      n_scavd_blocks;     // count of blocks in this list
+    StgWord      n_scavd_words;
+
+    // Partially-full, scavenged, blocks
+    bdescr *     part_list;
+    StgWord      n_part_blocks;      // count of above
+    StgWord      n_part_words;
+
+    StgWord pad[1];
+
+} gen_workspace ATTRIBUTE_ALIGNED(64);
+// align so that computing gct->gens[n] is a shift, not a multiply
+// fails if the size is <64, which is why we need the pad above
+
+/* ----------------------------------------------------------------------------
+   GC thread object
+
+   Every GC thread has one of these. It contains all the generation
+   specific workspaces and other GC thread local information. At some
+   later point it maybe useful to move this other into the TLS store
+   of the GC threads
+   ------------------------------------------------------------------------- */
+
+typedef struct gc_thread_ {
+    Capability *cap;
+
+#if defined(THREADED_RTS)
+    OSThreadId id;                 // The OS thread that this struct belongs to
+    SpinLock   gc_spin;
+    SpinLock   mut_spin;
+    volatile StgWord wakeup;       // NB not StgWord8; only StgWord is guaranteed atomic
+#endif
+    uint32_t thread_index;         // a zero based index identifying the thread
+
+    bdescr * free_blocks;          // a buffer of free blocks for this thread
+                                   //  during GC without accessing the block
+                                   //   allocators spin lock.
+
+    // These two lists are chained through the STATIC_LINK() fields of static
+    // objects.  Pointers are tagged with the current static_flag, so before
+    // following a pointer, untag it with UNTAG_STATIC_LIST_PTR().
+    StgClosure* static_objects;            // live static objects
+    StgClosure* scavenged_static_objects;  // static objects scavenged so far
+
+    W_ gc_count;                   // number of GCs this thread has done
+
+    // block that is currently being scanned
+    bdescr *     scan_bd;
+
+    // Remembered sets on this CPU.  Each GC thread has its own
+    // private per-generation remembered sets, so it can add an item
+    // to the remembered set without taking a lock.  The mut_lists
+    // array on a gc_thread is the same as the one on the
+    // corresponding Capability; we stash it here too for easy access
+    // during GC; see recordMutableGen_GC().
+    bdescr **    mut_lists;
+
+    // --------------------
+    // evacuate flags
+
+    uint32_t evac_gen_no;          // Youngest generation that objects
+                                   // should be evacuated to in
+                                   // evacuate().  (Logically an
+                                   // argument to evacuate, but it's
+                                   // static a lot of the time so we
+                                   // optimise it into a per-thread
+                                   // variable).
+
+    bool failed_to_evac;           // failure to evacuate an object typically
+                                   // Causes it to be recorded in the mutable
+                                   // object list
+
+    bool eager_promotion;          // forces promotion to the evac gen
+                                   // instead of the to-space
+                                   // corresponding to the object
+
+    W_ thunk_selector_depth;       // used to avoid unbounded recursion in
+                                   // evacuate() for THUNK_SELECTOR
+
+    // -------------------
+    // stats
+
+    W_ copied;
+    W_ scanned;
+    W_ any_work;
+    W_ no_work;
+    W_ scav_find_work;
+
+    Time gc_start_cpu;   // process CPU time
+    Time gc_sync_start_elapsed;  // start of GC sync
+    Time gc_start_elapsed;  // process elapsed time
+    W_ gc_start_faults;
+
+    // -------------------
+    // workspaces
+
+    // array of workspaces, indexed by gen->abs_no.  This is placed
+    // directly at the end of the gc_thread structure so that we can get from
+    // the gc_thread pointer to a workspace using only pointer
+    // arithmetic, no memory access.  This happens in the inner loop
+    // of the GC, see Evac.c:alloc_for_copy().
+    gen_workspace gens[];
+} gc_thread;
+
+
+extern uint32_t n_gc_threads;
+
+extern gc_thread **gc_threads;
+
+#if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR)
+extern ThreadLocalKey gctKey;
+#endif
+
+#include "EndPrivate.h"