53 files changed, 5718 insertions, 0 deletions
diff --git a/rts/include/rts/Adjustor.h b/rts/include/rts/Adjustor.h
new file mode 100644
index 0000000000..8965c7c8bb
--- /dev/null
+++ b/rts/include/rts/Adjustor.h
@@ -0,0 +1,22 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Adjustor API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* Creating and destroying an adjustor thunk */
+void* createAdjustor (int cconv,
+                      StgStablePtr hptr,
+                      StgFunPtr wptr,
+                      char *typeString);
+
+void freeHaskellFunctionPtr (void* ptr);
diff --git a/rts/include/rts/BlockSignals.h b/rts/include/rts/BlockSignals.h
new file mode 100644
index 0000000000..46b0b0f562
--- /dev/null
+++ b/rts/include/rts/BlockSignals.h
@@ -0,0 +1,34 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS signal handling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Used by runProcess() in the process package
+ */
+
+/*
+ * Function: blockUserSignals()
+ *
+ * Temporarily block the delivery of further console events. Needed to
+ * avoid race conditions when GCing the queue of outstanding handlers or
+ * when emptying the queue by running the handlers.
+ *
+ */
+void blockUserSignals(void);
+
+/*
+ * Function: unblockUserSignals()
+ *
+ * The inverse of blockUserSignals(); re-enable the deliver of console events.
+ */
+void unblockUserSignals(void);
diff --git a/rts/include/rts/Bytecodes.h b/rts/include/rts/Bytecodes.h
new file mode 100644
index 0000000000..b97d4d4f60
--- /dev/null
+++ b/rts/include/rts/Bytecodes.h
@@ -0,0 +1,109 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Bytecode definitions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* --------------------------------------------------------------------------
+ * Instructions
+ *
+ * Notes:
+ * o CASEFAIL is generated by the compiler whenever it tests an "irrefutable"
+ *   pattern which fails.  If we don't see too many of these, we could
+ *   optimise out the redundant test.
+ * ------------------------------------------------------------------------*/
+
+/* NOTE:
+
+   THIS FILE IS INCLUDED IN HASKELL SOURCES (ghc/compiler/GHC/ByteCode/Asm.hs).
+   DO NOT PUT C-SPECIFIC STUFF IN HERE!
+
+   I hope that's clear :-)
+*/
+
+#define bci_STKCHECK                    1
+#define bci_PUSH_L                      2
+#define bci_PUSH_LL                     3
+#define bci_PUSH_LLL                    4
+#define bci_PUSH8                       5
+#define bci_PUSH16                      6
+#define bci_PUSH32                      7
+#define bci_PUSH8_W                     8
+#define bci_PUSH16_W                    9
+#define bci_PUSH32_W                    10
+#define bci_PUSH_G                      11
+#define bci_PUSH_ALTS                   12
+#define bci_PUSH_ALTS_P                 13
+#define bci_PUSH_ALTS_N                 14
+#define bci_PUSH_ALTS_F                 15
+#define bci_PUSH_ALTS_D                 16
+#define bci_PUSH_ALTS_L                 17
+#define bci_PUSH_ALTS_V                 18
+#define bci_PUSH_PAD8                   19
+#define bci_PUSH_PAD16                  20
+#define bci_PUSH_PAD32                  21
+#define bci_PUSH_UBX8                   22
+#define bci_PUSH_UBX16                  23
+#define bci_PUSH_UBX32                  24
+#define bci_PUSH_UBX                    25
+#define bci_PUSH_APPLY_N                26
+#define bci_PUSH_APPLY_F                27
+#define bci_PUSH_APPLY_D                28
+#define bci_PUSH_APPLY_L                29
+#define bci_PUSH_APPLY_V                30
+#define bci_PUSH_APPLY_P                31
+#define bci_PUSH_APPLY_PP               32
+#define bci_PUSH_APPLY_PPP              33
+#define bci_PUSH_APPLY_PPPP             34
+#define bci_PUSH_APPLY_PPPPP            35
+#define bci_PUSH_APPLY_PPPPPP           36
+/* #define bci_PUSH_APPLY_PPPPPPP          37 */
+#define bci_SLIDE                       38
+#define bci_ALLOC_AP                    39
+#define bci_ALLOC_AP_NOUPD              40
+#define bci_ALLOC_PAP                   41
+#define bci_MKAP                        42
+#define bci_MKPAP                       43
+#define bci_UNPACK                      44
+#define bci_PACK                        45
+#define bci_TESTLT_I                    46
+#define bci_TESTEQ_I                    47
+#define bci_TESTLT_F                    48
+#define bci_TESTEQ_F                    49
+#define bci_TESTLT_D                    50
+#define bci_TESTEQ_D                    51
+#define bci_TESTLT_P                    52
+#define bci_TESTEQ_P                    53
+#define bci_CASEFAIL                    54
+#define bci_JMP                         55
+#define bci_CCALL                       56
+#define bci_SWIZZLE                     57
+#define bci_ENTER                       58
+#define bci_RETURN                      59
+#define bci_RETURN_P                    60
+#define bci_RETURN_N                    61
+#define bci_RETURN_F                    62
+#define bci_RETURN_D                    63
+#define bci_RETURN_L                    64
+#define bci_RETURN_V                    65
+#define bci_BRK_FUN                     66
+#define bci_TESTLT_W                    67
+#define bci_TESTEQ_W                    68
+
+#define bci_RETURN_T                    69
+#define bci_PUSH_ALTS_T                 70
+/* If you need to go past 255 then you will run into the flags */
+
+/* If you need to go below 0x0100 then you will run into the instructions */
+#define bci_FLAG_LARGE_ARGS 0x8000
+
+/* If a BCO definitely requires less than this many words of stack,
+   don't include an explicit STKCHECK insn in it.  The interpreter
+   will check for this many words of stack before running each BCO,
+   rendering an explicit check unnecessary in the majority of
+   cases. */
+#define INTERP_STACK_CHECK_THRESH 50
+
+/*-------------------------------------------------------------------------*/
diff --git a/rts/include/rts/Config.h b/rts/include/rts/Config.h
new file mode 100644
index 0000000000..289950af45
--- /dev/null
+++ b/rts/include/rts/Config.h
@@ -0,0 +1,52 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Rts settings.
+ *
+ * NOTE: assumes #include "ghcconfig.h"
+ *
+ * NB: THIS FILE IS INCLUDED IN NON-C CODE AND DATA!  #defines only please.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(TICKY_TICKY) && defined(THREADED_RTS)
+#error TICKY_TICKY is incompatible with THREADED_RTS
+#endif
+
+/*
+ * Whether the runtime system will use libbfd for debugging purposes.
+ */
+#if defined(DEBUG) && defined(HAVE_BFD_H) && defined(HAVE_LIBBFD) && !defined(_WIN32)
+#define USING_LIBBFD 1
+#endif
+
+/* DEBUG and PROFILING both imply TRACING */
+#if defined(DEBUG) || defined(PROFILING)
+#if !defined(TRACING)
+#define TRACING
+#endif
+#endif
+
+/* DEBUG implies TICKY_TICKY */
+#if defined(DEBUG)
+#if !defined(TICKY_TICKY)
+#define TICKY_TICKY
+#endif
+#endif
+
+
+/* -----------------------------------------------------------------------------
+   Signals - supported on non-PAR versions of the runtime.  See RtsSignals.h.
+   -------------------------------------------------------------------------- */
+
+#define RTS_USER_SIGNALS 1
+
+/* Profile spin locks */
+
+#define PROF_SPIN
diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h
new file mode 100644
index 0000000000..9cbe47752e
--- /dev/null
+++ b/rts/include/rts/Constants.h
@@ -0,0 +1,340 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Constants
+ *
+ * NOTE: this information is used by both the compiler and the RTS.
+ * Some of it is tweakable, and some of it must be kept up to date
+ * with various other parts of the system.
+ *
+ * Constants which are derived automatically from other definitions in
+ * the system (eg. structure sizes) are generated into the file
+ * DerivedConstants.h by a C program (utils/deriveConstants).
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   Minimum closure sizes
+
+   This is the minimum number of words in the payload of a heap-allocated
+   closure, so that the closure has two bits in the bitmap for mark-compact
+   collection.
+
+   See Note [Mark bits in mark-compact collector] in rts/sm/Compact.h
+   -------------------------------------------------------------------------- */
+
+#define MIN_PAYLOAD_SIZE 1
+
+/* -----------------------------------------------------------------------------
+   Constants to do with specialised closure types.
+   -------------------------------------------------------------------------- */
+
+/* We have some pre-compiled selector thunks defined in rts/StgStdThunks.hc.
+ * This constant defines the highest selectee index that we can replace with a
+ * reference to the pre-compiled code.
+ */
+
+#define MAX_SPEC_SELECTEE_SIZE 15
+
+/* Vector-apply thunks.  These thunks just push their free variables
+ * on the stack and enter the first one.  They're a bit like PAPs, but
+ * don't have a dynamic size.  We've pre-compiled a few to save
+ * space.
+ */
+
+#define MAX_SPEC_AP_SIZE       7
+
+/* Specialised FUN/THUNK/CONSTR closure types */
+
+#define MAX_SPEC_THUNK_SIZE    2
+#define MAX_SPEC_FUN_SIZE      2
+#define MAX_SPEC_CONSTR_SIZE   2
+
+/* Range of built-in table of static small int-like and char-like closures.
+ *
+ *   NB. This corresponds with the number of actual INTLIKE/CHARLIKE
+ *   closures defined in rts/StgMiscClosures.cmm.
+ */
+#define MAX_INTLIKE             255
+#define MIN_INTLIKE             (-16)
+
+#define MAX_CHARLIKE            255
+#define MIN_CHARLIKE            0
+
+/* Each byte in the card table for an StgMutaArrPtrs covers
+ * (1<<MUT_ARR_PTRS_CARD_BITS) elements in the array.  To find a good
+ * value for this, I used the benchmarks nofib/gc/hash,
+ * nofib/gc/graph, and nofib/gc/gc_bench.
+ */
+#define MUT_ARR_PTRS_CARD_BITS 7
+
+/* -----------------------------------------------------------------------------
+   STG Registers.
+
+   Note that in MachRegs.h we define how many of these registers are
+   *real* machine registers, and not just offsets in the Register Table.
+   -------------------------------------------------------------------------- */
+
+#define MAX_VANILLA_REG 10
+#define MAX_FLOAT_REG   6
+#define MAX_DOUBLE_REG  6
+#define MAX_LONG_REG    1
+#define MAX_XMM_REG     6
+
+/* -----------------------------------------------------------------------------
+   Semi-Tagging constants
+
+   Old Comments about this stuff:
+
+   Tags for indirection nodes and ``other'' (probably unevaluated) nodes;
+   normal-form values of algebraic data types will have tags 0, 1, ...
+
+   @INFO_IND_TAG@ is different from @INFO_OTHER_TAG@ just so we can count
+   how often we bang into indirection nodes; that's all.  (WDP 95/11)
+
+   ToDo: find out if we need any of this.
+   -------------------------------------------------------------------------- */
+
+#define INFO_OTHER_TAG          (-1)
+#define INFO_IND_TAG            (-2)
+#define INFO_FIRST_TAG          0
+
+/* -----------------------------------------------------------------------------
+   How much C stack to reserve for local temporaries when in the STG
+   world.  Used in StgCRun.c.
+   -------------------------------------------------------------------------- */
+
+#define RESERVED_C_STACK_BYTES (2048 * SIZEOF_LONG)
+
+/* -----------------------------------------------------------------------------
+   How large is the stack frame saved by StgRun?
+   world.  Used in StgCRun.c.
+
+   The size has to be enough to save the registers (see StgCRun)
+   plus padding if the result is not 16 byte aligned.
+   See the Note [Stack Alignment on X86] in StgCRun.c for details.
+
+   -------------------------------------------------------------------------- */
+#if defined(x86_64_HOST_ARCH)
+#  if defined(mingw32_HOST_OS)
+#    define STG_RUN_STACK_FRAME_SIZE 144
+#  else
+#    define STG_RUN_STACK_FRAME_SIZE 48
+#  endif
+#endif
+
+/* -----------------------------------------------------------------------------
+   StgRun related labels shared between StgCRun.c and StgStartup.cmm.
+   -------------------------------------------------------------------------- */
+
+#if defined(LEADING_UNDERSCORE)
+#define STG_RUN "_StgRun"
+#define STG_RUN_JMP _StgRunJmp
+#define STG_RETURN "_StgReturn"
+#else
+#define STG_RUN "StgRun"
+#define STG_RUN_JMP StgRunJmp
+#define STG_RETURN "StgReturn"
+#endif
+
+/* -----------------------------------------------------------------------------
+   How much Haskell stack space to reserve for the saving of registers
+   etc. in the case of a stack/heap overflow.
+
+   This must be large enough to accommodate the largest stack frame
+   pushed in one of the heap check fragments in HeapStackCheck.hc
+   (ie. currently the generic heap checks - 3 words for StgRetDyn,
+   18 words for the saved registers, see StgMacros.h).
+   -------------------------------------------------------------------------- */
+
+#define RESERVED_STACK_WORDS 21
+
+/* -----------------------------------------------------------------------------
+   The limit on the size of the stack check performed when we enter an
+   AP_STACK, in words.  See raiseAsync() and bug #1466.
+   -------------------------------------------------------------------------- */
+
+#define AP_STACK_SPLIM 1024
+
+/* -----------------------------------------------------------------------------
+   Storage manager constants
+   -------------------------------------------------------------------------- */
+
+/* The size of a block (2^BLOCK_SHIFT bytes) */
+#define BLOCK_SHIFT  12
+
+/* The size of a megablock (2^MBLOCK_SHIFT bytes) */
+#define MBLOCK_SHIFT   20
+
+/* -----------------------------------------------------------------------------
+   Bitmap/size fields (used in info tables)
+   -------------------------------------------------------------------------- */
+
+/* In a 32-bit bitmap field, we use 5 bits for the size, and 27 bits
+ * for the bitmap.  If the bitmap requires more than 27 bits, then we
+ * store it in a separate array, and leave a pointer in the bitmap
+ * field.  On a 64-bit machine, the sizes are extended accordingly.
+ */
+#if SIZEOF_VOID_P == 4
+#define BITMAP_SIZE_MASK     0x1f
+#define BITMAP_BITS_SHIFT    5
+#elif SIZEOF_VOID_P == 8
+#define BITMAP_SIZE_MASK     0x3f
+#define BITMAP_BITS_SHIFT    6
+#else
+#error unknown SIZEOF_VOID_P
+#endif
+
+/* -----------------------------------------------------------------------------
+   Lag/Drag/Void constants
+   -------------------------------------------------------------------------- */
+
+/*
+  An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation
+  time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK).
+ */
+#if SIZEOF_VOID_P == 8
+#define LDV_SHIFT               30
+#define LDV_STATE_MASK          0x1000000000000000
+#define LDV_CREATE_MASK         0x0FFFFFFFC0000000
+#define LDV_LAST_MASK           0x000000003FFFFFFF
+#define LDV_STATE_CREATE        0x0000000000000000
+#define LDV_STATE_USE           0x1000000000000000
+#else
+#define LDV_SHIFT               15
+#define LDV_STATE_MASK          0x40000000
+#define LDV_CREATE_MASK         0x3FFF8000
+#define LDV_LAST_MASK           0x00007FFF
+#define LDV_STATE_CREATE        0x00000000
+#define LDV_STATE_USE           0x40000000
+#endif /* SIZEOF_VOID_P */
+
+/* -----------------------------------------------------------------------------
+   TSO related constants
+   -------------------------------------------------------------------------- */
+
+/*
+ * Constants for the what_next field of a TSO, which indicates how it
+ * is to be run.
+ */
+#define ThreadRunGHC    1       /* return to address on top of stack */
+#define ThreadInterpret 2       /* interpret this thread */
+#define ThreadKilled    3       /* thread has died, don't run it */
+#define ThreadComplete  4       /* thread has finished */
+
+/*
+ * Constants for the why_blocked field of a TSO
+ * NB. keep these in sync with GHC/Conc/Sync.hs: threadStatus
+ */
+#define NotBlocked          0
+#define BlockedOnMVar       1
+#define BlockedOnMVarRead   14 /* TODO: renumber me, see #9003 */
+#define BlockedOnBlackHole  2
+#define BlockedOnRead       3
+#define BlockedOnWrite      4
+#define BlockedOnDelay      5
+#define BlockedOnSTM        6
+
+/* Win32 only: */
+#define BlockedOnDoProc     7
+
+/* Only relevant for THREADED_RTS: */
+#define BlockedOnCCall      10
+#define BlockedOnCCall_Interruptible 11
+   /* same as above but permit killing the worker thread */
+
+/* Involved in a message sent to tso->msg_cap */
+#define BlockedOnMsgThrowTo 12
+
+/* The thread is not on any run queues, but can be woken up
+   by tryWakeupThread() */
+#define ThreadMigrating     13
+
+/* Lightweight non-deadlock checked version of MVar.  Used for the why_blocked
+   field of a TSO. Threads blocked for this reason are not forcibly release by
+   the GC, as we expect them to be unblocked in the future based on outstanding
+   IO events.  */
+#define BlockedOnIOCompletion  15
+
+/* Next number is 16.  */
+
+/*
+ * These constants are returned to the scheduler by a thread that has
+ * stopped for one reason or another.  See typedef StgThreadReturnCode
+ * in TSO.h.
+ */
+#define HeapOverflow   1                /* might also be StackOverflow */
+#define StackOverflow  2
+#define ThreadYielding 3
+#define ThreadBlocked  4
+#define ThreadFinished 5
+
+/*
+ * Flags for the tso->flags field.
+ */
+
+/*
+ * TSO_LOCKED is set when a TSO is locked to a particular Capability.
+ */
+#define TSO_LOCKED  2
+
+/*
+ * TSO_BLOCKEX: the TSO is blocking exceptions
+ *
+ * TSO_INTERRUPTIBLE: the TSO can be interrupted if it blocks
+ * interruptibly (eg. with BlockedOnMVar).
+ *
+ * TSO_STOPPED_ON_BREAKPOINT: the thread is currently stopped in a breakpoint
+ */
+#define TSO_BLOCKEX       4
+#define TSO_INTERRUPTIBLE 8
+#define TSO_STOPPED_ON_BREAKPOINT 16
+
+/*
+ * Used by the sanity checker to check whether TSOs are on the correct
+ * mutable list.
+ */
+#define TSO_MARKED 64
+
+/*
+ * Used to communicate between stackSqueeze() and
+ * threadStackOverflow() that a thread's stack was squeezed and the
+ * stack may not need to be expanded.
+ */
+#define TSO_SQUEEZED 128
+
+/*
+ * Enables the AllocationLimitExceeded exception when the thread's
+ * allocation limit goes negative.
+ */
+#define TSO_ALLOC_LIMIT 256
+
+/*
+ * The number of times we spin in a spin lock before yielding (see
+ * #3758).  To tune this value, use the benchmark in #3758: run the
+ * server with -N2 and the client both on a dual-core.  Also make sure
+ * that the chosen value doesn't slow down any of the parallel
+ * benchmarks in nofib/parallel.
+ */
+#define SPIN_COUNT 1000
+
+/* -----------------------------------------------------------------------------
+   Spare workers per Capability in the threaded RTS
+
+   No more than MAX_SPARE_WORKERS will be kept in the thread pool
+   associated with each Capability.
+   -------------------------------------------------------------------------- */
+
+#define MAX_SPARE_WORKERS 6
+
+/*
+ * The maximum number of NUMA nodes we support.  This is a fixed limit so that
+ * we can have static arrays of this size in the RTS for speed.
+ */
+#define MAX_NUMA_NODES 16
diff --git a/rts/include/rts/EventLogFormat.h b/rts/include/rts/EventLogFormat.h
new file mode 100644
index 0000000000..4a2b339a9e
--- /dev/null
+++ b/rts/include/rts/EventLogFormat.h
@@ -0,0 +1,246 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Event log format
+ *
+ * The log format is designed to be extensible: old tools should be
+ * able to parse (but not necessarily understand all of) new versions
+ * of the format, and new tools will be able to understand old log
+ * files.
+ *
+ * The canonical documentation for the event log format and record layouts is
+ * the "Eventlog encodings" section of the GHC User's Guide.
+ *
+ * To add a new event
+ * ------------------
+ *
+ *  - In this file:
+ *    - give it a new number, add a new #define EVENT_XXX
+ *      below. Do not reuse event ids from deprecated event types.
+ *
+ *  - In EventLog.c
+ *    - add it to the EventDesc array
+ *    - emit the event type in initEventLogging()
+ *    - emit the new event in postEvent_()
+ *    - generate the event itself by calling postEvent() somewhere
+ *
+ *  - Describe the meaning and encoding of the event in the users guide
+ *    (docs/user_guide/eventlog-formats.rst)
+ *
+ *  - In the Haskell code to parse the event log file:
+ *    - add types and code to read the new event
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * Markers for begin/end of the Header.
+ */
+#define EVENT_HEADER_BEGIN    0x68647262 /* 'h' 'd' 'r' 'b' */
+#define EVENT_HEADER_END      0x68647265 /* 'h' 'd' 'r' 'e' */
+
+#define EVENT_DATA_BEGIN      0x64617462 /* 'd' 'a' 't' 'b' */
+#define EVENT_DATA_END        0xffff
+
+/*
+ * Markers for begin/end of the list of Event Types in the Header.
+ * Header, Event Type, Begin = hetb
+ * Header, Event Type, End = hete
+ */
+#define EVENT_HET_BEGIN       0x68657462 /* 'h' 'e' 't' 'b' */
+#define EVENT_HET_END         0x68657465 /* 'h' 'e' 't' 'e' */
+
+#define EVENT_ET_BEGIN        0x65746200 /* 'e' 't' 'b' 0 */
+#define EVENT_ET_END          0x65746500 /* 'e' 't' 'e' 0 */
+
+/*
+ * Types of event
+ */
+#define EVENT_CREATE_THREAD        0 /* (thread)               */
+#define EVENT_RUN_THREAD           1 /* (thread)               */
+#define EVENT_STOP_THREAD          2 /* (thread, status, blockinfo) */
+#define EVENT_THREAD_RUNNABLE      3 /* (thread)               */
+#define EVENT_MIGRATE_THREAD       4 /* (thread, new_cap)      */
+/* 5, 6, 7 deprecated */
+#define EVENT_THREAD_WAKEUP        8 /* (thread, other_cap)    */
+#define EVENT_GC_START             9 /* ()                     */
+#define EVENT_GC_END              10 /* ()                     */
+#define EVENT_REQUEST_SEQ_GC      11 /* ()                     */
+#define EVENT_REQUEST_PAR_GC      12 /* ()                     */
+/* 13, 14 deprecated */
+#define EVENT_CREATE_SPARK_THREAD 15 /* (spark_thread)         */
+#define EVENT_LOG_MSG             16 /* (message ...)          */
+/* 17 deprecated */
+#define EVENT_BLOCK_MARKER        18 /* (size, end_time, capability) */
+#define EVENT_USER_MSG            19 /* (message ...)          */
+#define EVENT_GC_IDLE             20 /* () */
+#define EVENT_GC_WORK             21 /* () */
+#define EVENT_GC_DONE             22 /* () */
+/* 23, 24 used by eden */
+#define EVENT_CAPSET_CREATE       25 /* (capset, capset_type)  */
+#define EVENT_CAPSET_DELETE       26 /* (capset)               */
+#define EVENT_CAPSET_ASSIGN_CAP   27 /* (capset, cap)          */
+#define EVENT_CAPSET_REMOVE_CAP   28 /* (capset, cap)          */
+/* the RTS identifier is in the form of "GHC-version rts_way"  */
+#define EVENT_RTS_IDENTIFIER      29 /* (capset, name_version_string) */
+/* the vectors in these events are null separated strings             */
+#define EVENT_PROGRAM_ARGS        30 /* (capset, commandline_vector)  */
+#define EVENT_PROGRAM_ENV         31 /* (capset, environment_vector)  */
+#define EVENT_OSPROCESS_PID       32 /* (capset, pid)          */
+#define EVENT_OSPROCESS_PPID      33 /* (capset, parent_pid)   */
+#define EVENT_SPARK_COUNTERS      34 /* (crt,dud,ovf,cnv,gcd,fiz,rem) */
+#define EVENT_SPARK_CREATE        35 /* ()                     */
+#define EVENT_SPARK_DUD           36 /* ()                     */
+#define EVENT_SPARK_OVERFLOW      37 /* ()                     */
+#define EVENT_SPARK_RUN           38 /* ()                     */
+#define EVENT_SPARK_STEAL         39 /* (victim_cap)           */
+#define EVENT_SPARK_FIZZLE        40 /* ()                     */
+#define EVENT_SPARK_GC            41 /* ()                     */
+#define EVENT_INTERN_STRING       42 /* (string, id) {not used by ghc} */
+#define EVENT_WALL_CLOCK_TIME     43 /* (capset, unix_epoch_seconds, nanoseconds) */
+#define EVENT_THREAD_LABEL        44 /* (thread, name_string)  */
+#define EVENT_CAP_CREATE          45 /* (cap)                  */
+#define EVENT_CAP_DELETE          46 /* (cap)                  */
+#define EVENT_CAP_DISABLE         47 /* (cap)                  */
+#define EVENT_CAP_ENABLE          48 /* (cap)                  */
+#define EVENT_HEAP_ALLOCATED      49 /* (heap_capset, alloc_bytes) */
+#define EVENT_HEAP_SIZE           50 /* (heap_capset, size_bytes) */
+#define EVENT_HEAP_LIVE           51 /* (heap_capset, live_bytes) */
+#define EVENT_HEAP_INFO_GHC       52 /* (heap_capset, n_generations,
+                                         max_heap_size, alloc_area_size,
+                                         mblock_size, block_size) */
+#define EVENT_GC_STATS_GHC        53 /* (heap_capset, generation,
+                                         copied_bytes, slop_bytes, frag_bytes,
+                                         par_n_threads,
+                                         par_max_copied,
+                                         par_tot_copied, par_balanced_copied) */
+#define EVENT_GC_GLOBAL_SYNC      54 /* ()                     */
+#define EVENT_TASK_CREATE         55 /* (taskID, cap, tid)       */
+#define EVENT_TASK_MIGRATE        56 /* (taskID, cap, new_cap)   */
+#define EVENT_TASK_DELETE         57 /* (taskID)                 */
+#define EVENT_USER_MARKER         58 /* (marker_name) */
+#define EVENT_HACK_BUG_T9003      59 /* Hack: see trac #9003 */
+
+/* Range 60 - 80 is used by eden for parallel tracing
+ * see http://www.mathematik.uni-marburg.de/~eden/
+ */
+
+#define EVENT_MEM_RETURN         90 /* (cap, current_mblocks, needed_mblocks, returned_mblocks) */
+#define EVENT_BLOCKS_SIZE        91 /* (heapcapset, size_bytes) */
+
+/* Range 100 - 139 is reserved for Mercury. */
+
+/* Range 140 - 159 is reserved for Perf events. */
+
+/* Range 160 - 180 is reserved for cost-centre heap profiling events. */
+
+#define EVENT_HEAP_PROF_BEGIN              160
+#define EVENT_HEAP_PROF_COST_CENTRE        161
+#define EVENT_HEAP_PROF_SAMPLE_BEGIN       162
+#define EVENT_HEAP_PROF_SAMPLE_COST_CENTRE 163
+#define EVENT_HEAP_PROF_SAMPLE_STRING      164
+#define EVENT_HEAP_PROF_SAMPLE_END         165
+#define EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN   166
+#define EVENT_PROF_SAMPLE_COST_CENTRE      167
+#define EVENT_PROF_BEGIN                   168
+#define EVENT_IPE                          169
+
+#define EVENT_USER_BINARY_MSG              181
+
+#define EVENT_CONC_MARK_BEGIN              200
+#define EVENT_CONC_MARK_END                201
+#define EVENT_CONC_SYNC_BEGIN              202
+#define EVENT_CONC_SYNC_END                203
+#define EVENT_CONC_SWEEP_BEGIN             204
+#define EVENT_CONC_SWEEP_END               205
+#define EVENT_CONC_UPD_REM_SET_FLUSH       206
+#define EVENT_NONMOVING_HEAP_CENSUS        207
+
+#define EVENT_TICKY_COUNTER_DEF            210
+#define EVENT_TICKY_COUNTER_SAMPLE         211
+#define EVENT_TICKY_COUNTER_BEGIN_SAMPLE   212
+
+/*
+ * The highest event code +1 that ghc itself emits. Note that some event
+ * ranges higher than this are reserved but not currently emitted by ghc.
+ * This must match the size of the EventDesc[] array in EventLog.c
+ */
+#define NUM_GHC_EVENT_TAGS        213
+
+#if 0  /* DEPRECATED EVENTS: */
+/* we don't actually need to record the thread, it's implicit */
+#define EVENT_RUN_SPARK            5 /* (thread)               */
+#define EVENT_STEAL_SPARK          6 /* (thread, victim_cap)   */
+/* shutdown replaced by EVENT_CAP_DELETE */
+#define EVENT_SHUTDOWN             7 /* ()                     */
+/* ghc changed how it handles sparks so these are no longer applicable */
+#define EVENT_CREATE_SPARK        13 /* (cap, thread) */
+#define EVENT_SPARK_TO_THREAD     14 /* (cap, thread, spark_thread) */
+#define EVENT_STARTUP             17 /* (num_capabilities)     */
+/* these are used by eden but are replaced by new alternatives for ghc */
+#define EVENT_VERSION             23 /* (version_string) */
+#define EVENT_PROGRAM_INVOCATION  24 /* (commandline_string) */
+#endif
+
+/*
+ * Status values for EVENT_STOP_THREAD
+ *
+ * 1-5 are the StgRun return values (from rts/include/Constants.h):
+ *
+ * #define HeapOverflow   1
+ * #define StackOverflow  2
+ * #define ThreadYielding 3
+ * #define ThreadBlocked  4
+ * #define ThreadFinished 5
+ * #define ForeignCall                  6
+ * #define BlockedOnMVar                7
+ * #define BlockedOnBlackHole           8
+ * #define BlockedOnRead                9
+ * #define BlockedOnWrite               10
+ * #define BlockedOnDelay               11
+ * #define BlockedOnSTM                 12
+ * #define BlockedOnDoProc              13
+ * #define BlockedOnCCall               -- not used (see ForeignCall)
+ * #define BlockedOnCCall_NoUnblockExc  -- not used (see ForeignCall)
+ * #define BlockedOnMsgThrowTo          16
+ */
+#define THREAD_SUSPENDED_FOREIGN_CALL 6
+
+/*
+ * Capset type values for EVENT_CAPSET_CREATE
+ */
+#define CAPSET_TYPE_CUSTOM      1  /* reserved for end-user applications */
+#define CAPSET_TYPE_OSPROCESS   2  /* caps belong to the same OS process */
+#define CAPSET_TYPE_CLOCKDOMAIN 3  /* caps share a local clock/time      */
+
+/*
+ * Heap profile breakdown types. See EVENT_HEAP_PROF_BEGIN.
+ */
+typedef enum {
+    HEAP_PROF_BREAKDOWN_COST_CENTRE = 0x1,
+    HEAP_PROF_BREAKDOWN_MODULE,
+    HEAP_PROF_BREAKDOWN_CLOSURE_DESCR,
+    HEAP_PROF_BREAKDOWN_TYPE_DESCR,
+    HEAP_PROF_BREAKDOWN_RETAINER,
+    HEAP_PROF_BREAKDOWN_BIOGRAPHY,
+    HEAP_PROF_BREAKDOWN_CLOSURE_TYPE,
+    HEAP_PROF_BREAKDOWN_INFO_TABLE
+} HeapProfBreakdown;
+
+#if !defined(EVENTLOG_CONSTANTS_ONLY)
+
+typedef StgWord16 EventTypeNum;
+typedef StgWord64 EventTimestamp; /* in nanoseconds */
+typedef StgWord32 EventThreadID;
+typedef StgWord16 EventCapNo;
+typedef StgWord16 EventPayloadSize; /* variable-size events */
+typedef StgWord16 EventThreadStatus; /* status for EVENT_STOP_THREAD */
+typedef StgWord32 EventCapsetID;
+typedef StgWord16 EventCapsetType;   /* types for EVENT_CAPSET_CREATE */
+typedef StgWord64 EventTaskId;         /* for EVENT_TASK_* */
+typedef StgWord64 EventKernelThreadId; /* for EVENT_TASK_CREATE */
+
+#define EVENT_PAYLOAD_SIZE_MAX STG_WORD16_MAX
+#endif
diff --git a/rts/include/rts/EventLogWriter.h b/rts/include/rts/EventLogWriter.h
new file mode 100644
index 0000000000..73a2aec64c
--- /dev/null
+++ b/rts/include/rts/EventLogWriter.h
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2017
+ *
+ * Support for fast binary event logging.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+
+/*
+ *  Abstraction for writing eventlog data.
+ */
+typedef struct {
+    // Initialize an EventLogWriter (may be NULL)
+    void (* initEventLogWriter) (void);
+
+    // Write a series of events returning true on success.
+    // Note that this may be called by multiple threads simultaneously.
+    // The writer is responsible for concurrency control.
+    bool (* writeEventLog) (void *eventlog, size_t eventlog_size);
+
+    // Flush possibly existing buffers (may be NULL)
+    // Note that this may be called by multiple threads simultaneously.
+    // The writer is responsible for concurrency control.
+    void (* flushEventLog) (void);
+
+    // Close an initialized EventLogOutput (may be NULL)
+    void (* stopEventLogWriter) (void);
+} EventLogWriter;
+
+/*
+ * An EventLogWriter which writes eventlogs to
+ * a file `program.eventlog`.
+ */
+extern const EventLogWriter FileEventLogWriter;
+
+enum EventLogStatus {
+  /* The runtime system wasn't compiled with eventlog support. */
+  EVENTLOG_NOT_SUPPORTED,
+  /* An EventLogWriter has not yet been configured */
+  EVENTLOG_NOT_CONFIGURED,
+  /* An EventLogWriter has been configured and is running. */
+  EVENTLOG_RUNNING,
+};
+
+/*
+ * Query whether the current runtime system supports eventlogging.
+ */
+enum EventLogStatus eventLogStatus(void);
+
+/*
+ * Initialize event logging using the given EventLogWriter.
+ * Returns true on success or false if an EventLogWriter is already configured
+ * or eventlogging isn't supported by the runtime.
+ */
+bool startEventLogging(const EventLogWriter *writer);
+
+/*
+ * Stop event logging and destroy the current EventLogWriter.
+ */
+void endEventLogging(void);
+
+/*
+ * Flush the eventlog. cap can be NULL if one is not held.
+ */
+void flushEventLog(Capability **cap);
diff --git a/rts/include/rts/ExecPage.h b/rts/include/rts/ExecPage.h
new file mode 100644
index 0000000000..4261b71259
--- /dev/null
+++ b/rts/include/rts/ExecPage.h
@@ -0,0 +1,18 @@
+/*
+ * Utilities for managing dynamically-allocated executable pages.
+ */
+
+#pragma once
+
+typedef struct {
+    char contents;
+} ExecPage;
+
+/* Allocate a writable page. */
+ExecPage *allocateExecPage(void);
+
+/* Make a page previously allocated by allocateExecPage. */
+void freezeExecPage(ExecPage *page);
+
+/* Free a page previously allocated by allocateExecPage. */
+void freeExecPage(ExecPage *page);
diff --git a/rts/include/rts/FileLock.h b/rts/include/rts/FileLock.h
new file mode 100644
index 0000000000..3d8056d7a0
--- /dev/null
+++ b/rts/include/rts/FileLock.h
@@ -0,0 +1,37 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2007-2009
+ *
+ * File locking support as required by Haskell
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* Note [RTS File locking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The Haskell report dictates certain file locking behaviour.
+ * This is specified in the Haskell98 report under: 21.2.3  File locking
+ *
+ * GHC does not rely on the platform it's on to implement this.
+ * Instead we keep track of locked files in a data structure in
+ * the RTS. This file provides the interface to this data structure.
+ *
+ * In the base libraries we then use this interface to "lock" files.
+ * This means it's very much still possible for users outside of the
+ * rts/base library to open the files in question even if they are
+ * locked.
+ * */
+
+#pragma once
+
+#include "Stg.h"
+
+/* No valid FD would be negative, so use a word instead of int so the value
+   is compatible with a Windows handle.  */
+int  lockFile(StgWord64 id, StgWord64 dev, StgWord64 ino, int for_writing);
+int  unlockFile(StgWord64 id);
diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h
new file mode 100644
index 0000000000..11e7bfdaa7
--- /dev/null
+++ b/rts/include/rts/Flags.h
@@ -0,0 +1,330 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Datatypes that holds the command-line flag settings.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "stg/Types.h"
+#include "Time.h"
+
+/* For defaults, see the @initRtsFlagsDefaults@ routine. */
+
+/* Note [Synchronization of flags and base APIs]
+ *
+ * We provide accessors to RTS flags in base. (GHC.RTS module)
+ * The API should be updated whenever RTS flags are modified.
+ */
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _GC_FLAGS {
+    FILE   *statsFile;
+    uint32_t  giveStats;
+#define NO_GC_STATS         0
+#define COLLECT_GC_STATS 1
+#define ONELINE_GC_STATS 2
+#define SUMMARY_GC_STATS 3
+#define VERBOSE_GC_STATS 4
+
+    uint32_t     maxStkSize;         /* in *words* */
+    uint32_t     initialStkSize;     /* in *words* */
+    uint32_t     stkChunkSize;       /* in *words* */
+    uint32_t     stkChunkBufferSize; /* in *words* */
+
+    uint32_t     maxHeapSize;        /* in *blocks* */
+    uint32_t     minAllocAreaSize;   /* in *blocks* */
+    uint32_t     largeAllocLim;      /* in *blocks* */
+    uint32_t     nurseryChunkSize;   /* in *blocks* */
+    uint32_t     minOldGenSize;      /* in *blocks* */
+    uint32_t     heapSizeSuggestion; /* in *blocks* */
+    bool heapSizeSuggestionAuto;
+    double  oldGenFactor;
+    double  returnDecayFactor;
+    double  pcFreeHeap;
+
+    bool         useNonmoving; // default = false
+    bool         nonmovingSelectorOpt; // Do selector optimization in the
+                                       // non-moving heap, default = false
+    uint32_t     generations;
+    bool squeezeUpdFrames;
+
+    bool compact;               /* True <=> "compact all the time" */
+    double  compactThreshold;
+
+    bool sweep;                 /* use "mostly mark-sweep" instead of copying
+                                 * for the oldest generation */
+    bool ringBell;
+
+    Time    idleGCDelayTime;    /* units: TIME_RESOLUTION */
+    Time    interIdleGCWait;    /* units: TIME_RESOLUTION */
+    bool doIdleGC;
+
+    Time    longGCSync;         /* units: TIME_RESOLUTION */
+
+    StgWord heapBase;           /* address to ask the OS for memory */
+
+    StgWord allocLimitGrace;    /* units: *blocks*
+                                 * After an AllocationLimitExceeded
+                                 * exception has been raised, how much
+                                 * extra space is given to the thread
+                                 * to handle the exception before we
+                                 * raise it again.
+                                 */
+    StgWord heapLimitGrace;     /* units: *blocks*
+                                 * After a HeapOverflow exception has
+                                 * been raised, how much extra space is
+                                 * given to the thread to handle the
+                                 * exception before we raise it again.
+                                 */
+
+    bool numa;                   /* Use NUMA */
+    StgWord numaMask;
+} GC_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _DEBUG_FLAGS {
+    /* flags to control debugging output & extra checking in various subsystems */
+    bool scheduler;      /* 's' */
+    bool interpreter;    /* 'i' */
+    bool weak;           /* 'w' */
+    bool gccafs;         /* 'G' */
+    bool gc;             /* 'g' */
+    bool nonmoving_gc;   /* 'n' */
+    bool block_alloc;    /* 'b' */
+    bool sanity;         /* 'S'   warning: might be expensive! */
+    bool zero_on_gc;     /* 'Z' */
+    bool stable;         /* 't' */
+    bool prof;           /* 'p' */
+    bool linker;         /* 'l'   the object linker */
+    bool apply;          /* 'a' */
+    bool stm;            /* 'm' */
+    bool squeeze;        /* 'z'  stack squeezing & lazy blackholing */
+    bool hpc;            /* 'c' coverage */
+    bool sparks;         /* 'r' */
+    bool numa;           /* '--debug-numa' */
+    bool compact;        /* 'C' */
+} DEBUG_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _COST_CENTRE_FLAGS {
+    uint32_t    doCostCentres;
+# define COST_CENTRES_NONE      0
+# define COST_CENTRES_SUMMARY   1
+# define COST_CENTRES_VERBOSE   2 /* incl. serial time profile */
+# define COST_CENTRES_ALL       3
+# define COST_CENTRES_JSON      4
+
+    int profilerTicks;   /* derived */
+    int msecsPerTick;    /* derived */
+    char const *outputFileNameStem;
+} COST_CENTRE_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _PROFILING_FLAGS {
+    uint32_t doHeapProfile;
+# define NO_HEAP_PROFILING      0 /* N.B. Used as indexes into arrays */
+# define HEAP_BY_CCS            1
+# define HEAP_BY_MOD            2
+# define HEAP_BY_DESCR          4
+# define HEAP_BY_TYPE           5
+# define HEAP_BY_RETAINER       6
+# define HEAP_BY_LDV            7
+
+# define HEAP_BY_CLOSURE_TYPE   8
+# define HEAP_BY_INFO_TABLE     9
+
+    Time        heapProfileInterval; /* time between samples */
+    uint32_t    heapProfileIntervalTicks; /* ticks between samples (derived) */
+    bool        startHeapProfileAtStartup; /* true if we start profiling from program startup */
+
+
+    bool        showCCSOnException;
+
+    uint32_t    maxRetainerSetSize;
+
+    uint32_t    ccsLength;
+
+    const char*         modSelector;
+    const char*         descrSelector;
+    const char*         typeSelector;
+    const char*         ccSelector;
+    const char*         ccsSelector;
+    const char*         retainerSelector;
+    const char*         bioSelector;
+
+} PROFILING_FLAGS;
+
+#define TRACE_NONE      0
+#define TRACE_EVENTLOG  1
+#define TRACE_STDERR    2
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _TRACE_FLAGS {
+    int tracing;
+    bool timestamp;      /* show timestamp in stderr output */
+    bool scheduler;      /* trace scheduler events */
+    bool gc;             /* trace GC events */
+    bool nonmoving_gc;   /* trace nonmoving GC events */
+    bool sparks_sampled; /* trace spark events by a sampled method */
+    bool sparks_full;    /* trace spark events 100% accurately */
+    bool ticky;          /* trace ticky-ticky samples */
+    bool user;           /* trace user events (emitted from Haskell code) */
+    Time eventlogFlushTime;  /* Time between force eventlog flushes (or 0 if disabled) */
+    int eventlogFlushTicks;
+    char *trace_output;  /* output filename for eventlog */
+} TRACE_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _CONCURRENT_FLAGS {
+    Time ctxtSwitchTime;         /* units: TIME_RESOLUTION */
+    int ctxtSwitchTicks;         /* derived */
+} CONCURRENT_FLAGS;
+
+/*
+ * The tickInterval is the time interval between "ticks", ie.
+ * timer signals (see Timer.{c,h}).  It is the frequency at
+ * which we sample CCCS for profiling.
+ *
+ * It is changed by the +RTS -V<secs> flag.
+ */
+#define DEFAULT_TICK_INTERVAL USToTime(10000)
+
+/*
+ * When linkerAlwaysPic is true, the runtime linker assume that all object
+ * files were compiled with -fPIC -fexternal-dynamic-refs and load them
+ * anywhere in the address space.
+ * Note that there is no 32bit darwin system we can realistically expect to
+ * run on or compile for.
+ */
+#if defined(darwin_HOST_OS) || defined(aarch64_HOST_ARCH) || defined(arm_HOST_ARCH)
+#define DEFAULT_LINKER_ALWAYS_PIC true
+#else
+#define DEFAULT_LINKER_ALWAYS_PIC false
+#endif
+
+/* Which I/O Manager to use in the target program.  */
+typedef enum _IO_MANAGER { IO_MNGR_NATIVE, IO_MNGR_POSIX } IO_MANAGER;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _MISC_FLAGS {
+    Time    tickInterval;        /* units: TIME_RESOLUTION */
+    bool install_signal_handlers;
+    bool install_seh_handlers;
+    bool generate_dump_file;
+    bool generate_stack_trace;
+    bool machineReadable;
+    bool disableDelayedOsMemoryReturn; /* See Note [MADV_FREE and MADV_DONTNEED].
+                                          It's in `MiscFlags` instead of
+                                          `GcFlags` because if GHC used madvise()
+                                          memory management for non-GC related
+                                          tasks in the future, we'd respect it
+                                          there as well. */
+    bool internalCounters;       /* See Note [Internal Counter Stats] */
+    bool linkerAlwaysPic;        /* Assume the object code is always PIC */
+    StgWord linkerMemBase;       /* address to ask the OS for memory
+                                  * for the linker, NULL ==> off */
+    IO_MANAGER ioManager;        /* The I/O manager to use.  */
+    uint32_t numIoWorkerThreads; /* Number of I/O worker threads to use.  */
+} MISC_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _PAR_FLAGS {
+  uint32_t       nCapabilities;  /* number of threads to run simultaneously */
+  bool           migrate;        /* migrate threads between capabilities */
+  uint32_t       maxLocalSparks;
+  bool           parGcEnabled;   /* enable parallel GC */
+  uint32_t       parGcGen;       /* do parallel GC in this generation
+                                  * and higher only */
+  bool           parGcLoadBalancingEnabled;
+                                 /* enable load-balancing in the
+                                  * parallel GC */
+  uint32_t       parGcLoadBalancingGen;
+                                 /* do load-balancing in this
+                                  * generation and higher only */
+
+  uint32_t       parGcNoSyncWithIdle;
+                                 /* if a Capability has been idle for
+                                  * this many GCs, do not try to wake
+                                  * it up when doing a
+                                  * non-load-balancing parallel GC.
+                                  * (zero disables) */
+
+  uint32_t       parGcThreads;
+                                 /* Use this many threads for parallel
+                                  * GC (default: use all nNodes). */
+
+  bool           setAffinity;    /* force thread affinity with CPUs */
+} PAR_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _TICKY_FLAGS {
+    bool showTickyStats;
+    FILE   *tickyFile;
+} TICKY_FLAGS;
+
+/* Put them together: */
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _RTS_FLAGS {
+    /* The first portion of RTS_FLAGS is invariant. */
+    GC_FLAGS          GcFlags;
+    CONCURRENT_FLAGS  ConcFlags;
+    MISC_FLAGS        MiscFlags;
+    DEBUG_FLAGS       DebugFlags;
+    COST_CENTRE_FLAGS CcFlags;
+    PROFILING_FLAGS   ProfFlags;
+    TRACE_FLAGS       TraceFlags;
+    TICKY_FLAGS       TickyFlags;
+    PAR_FLAGS         ParFlags;
+} RTS_FLAGS;
+
+#if defined(COMPILING_RTS_MAIN)
+extern DLLIMPORT RTS_FLAGS RtsFlags;
+#elif IN_STG_CODE
+/* Note [RtsFlags is a pointer in STG code]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * When compiling with IN_STG_CODE the RtsFlags symbol is defined as a pointer.
+ * This is necessary because the C code generator can't generate '&label'.
+ */
+extern RTS_FLAGS RtsFlags[];
+#else
+extern RTS_FLAGS RtsFlags;
+#endif
+
+/*
+ * The printf formats are here, so we are less likely to make
+ * overly-long filenames (with disastrous results).  No more than 128
+ * chars, please!
+ */
+
+#define STATS_FILENAME_MAXLEN 128
+
+#define GR_FILENAME_FMT         "%0.124s.gr"
+#define HP_FILENAME_FMT         "%0.124s.hp"
+#define LIFE_FILENAME_FMT       "%0.122s.life"
+#define PROF_FILENAME_FMT       "%0.122s.prof"
+#define PROF_FILENAME_FMT_GUM   "%0.118s.%03d.prof"
+#define QP_FILENAME_FMT         "%0.124s.qp"
+#define STAT_FILENAME_FMT       "%0.122s.stat"
+#define TICKY_FILENAME_FMT      "%0.121s.ticky"
+#define TIME_FILENAME_FMT       "%0.122s.time"
+#define TIME_FILENAME_FMT_GUM   "%0.118s.%03d.time"
+
+/* an "int" so as to match normal "argc" */
+/* Now defined in Stg.h (lib/std/cbits need these too.)
+extern int     prog_argc;
+extern char  **prog_argv;
+*/
+extern int      rts_argc;  /* ditto */
+extern char   **rts_argv;
diff --git a/rts/include/rts/ForeignExports.h b/rts/include/rts/ForeignExports.h
new file mode 100644
index 0000000000..aeb524aebf
--- /dev/null
+++ b/rts/include/rts/ForeignExports.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS's foreign export tracking code.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+struct _ObjectCode;
+
+/* N.B. See Note [Tracking foreign exports] in
+ * rts/ForeignExports.c. */
+struct ForeignExportsList {
+      /* a link field for linking these together into lists.
+       */
+    struct ForeignExportsList *next;
+      /* the length of ->exports */
+    int n_entries;
+      /* if the RTS linker loaded the module,
+       * to which ObjectCode these exports belong. */
+    struct _ObjectCode *oc;
+      /* if the RTS linker loaded the module,
+       * this points to an array of length ->n_entries
+       * recording the StablePtr for each export. */
+    StgStablePtr **stable_ptrs;
+      /* the exported closures. of length ->exports. */
+    StgPtr exports[];
+};
+
+void registerForeignExports(struct ForeignExportsList *exports);
+
diff --git a/rts/include/rts/GetTime.h b/rts/include/rts/GetTime.h
new file mode 100644
index 0000000000..53207ce307
--- /dev/null
+++ b/rts/include/rts/GetTime.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS time
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+StgWord64 getMonotonicNSec (void);
diff --git a/rts/include/rts/Globals.h b/rts/include/rts/Globals.h
new file mode 100644
index 0000000000..bd3aa637db
--- /dev/null
+++ b/rts/include/rts/Globals.h
@@ -0,0 +1,36 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * The RTS stores some "global" values on behalf of libraries, so that
+ * some libraries can ensure that certain top-level things are shared
+ * even when multiple versions of the library are loaded.  e.g. see
+ * Data.Typeable and GHC.Conc.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#define mkStoreAccessorPrototype(name)                                  \
+    StgStablePtr                                                        \
+    getOrSet##name(StgStablePtr ptr);
+
+mkStoreAccessorPrototype(GHCConcSignalSignalHandlerStore)
+mkStoreAccessorPrototype(GHCConcWindowsPendingDelaysStore)
+mkStoreAccessorPrototype(GHCConcWindowsIOManagerThreadStore)
+mkStoreAccessorPrototype(GHCConcWindowsProddingStore)
+mkStoreAccessorPrototype(SystemEventThreadEventManagerStore)
+mkStoreAccessorPrototype(SystemEventThreadIOManagerThreadStore)
+mkStoreAccessorPrototype(SystemTimerThreadEventManagerStore)
+mkStoreAccessorPrototype(SystemTimerThreadIOManagerThreadStore)
+mkStoreAccessorPrototype(LibHSghcFastStringTable)
+mkStoreAccessorPrototype(LibHSghcGlobalHasPprDebug)
+mkStoreAccessorPrototype(LibHSghcGlobalHasNoDebugOutput)
+mkStoreAccessorPrototype(LibHSghcGlobalHasNoStateHack)
+extern HsInt ghc_unique_counter;
+extern HsInt ghc_unique_inc;
diff --git a/rts/include/rts/Hpc.h b/rts/include/rts/Hpc.h
new file mode 100644
index 0000000000..85d00ca485
--- /dev/null
+++ b/rts/include/rts/Hpc.h
@@ -0,0 +1,34 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Haskell Program Coverage
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+// Simple linked list of modules
+typedef struct _HpcModuleInfo {
+  char *modName;                // name of module
+  StgWord32 tickCount;          // number of ticks
+  StgWord32 hashNo;             // Hash number for this module's mix info
+  StgWord64 *tixArr;            // tix Array; local for this module
+  bool from_file;               // data was read from the .tix file
+  struct _HpcModuleInfo *next;
+} HpcModuleInfo;
+
+void hs_hpc_module (char *modName,
+                    StgWord32 modCount,
+                    StgWord32 modHashNo,
+                    StgWord64 *tixArr);
+
+HpcModuleInfo * hs_hpc_rootModule (void);
+
+void startupHpc(void);
+void exitHpc(void);
diff --git a/rts/include/rts/IOInterface.h b/rts/include/rts/IOInterface.h
new file mode 100644
index 0000000000..9a646cc5cf
--- /dev/null
+++ b/rts/include/rts/IOInterface.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * IO Manager functionality in the RTS
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+#if defined(mingw32_HOST_OS)
+
+#define IO_MANAGER_WAKEUP 0xffffffff
+#define IO_MANAGER_DIE    0xfffffffe
+/* spurious wakeups are returned as zero.  */
+/* console events are ((event<<1) | 1).  */
+
+int  rts_InstallConsoleEvent ( int action, StgStablePtr *handler );
+void rts_ConsoleHandlerDone  ( int ev );
+extern StgInt console_handler;
+
+void *   getIOManagerEvent  (void);
+HsWord32 readIOManagerEvent (void);
+void     sendIOManagerEvent (HsWord32 event);
+
+#else
+
+void     setIOManagerControlFd   (uint32_t cap_no, int fd);
+void     setTimerManagerControlFd(int fd);
+void     setIOManagerWakeupFd   (int fd);
+
+#endif
+
diff --git a/rts/include/rts/IPE.h b/rts/include/rts/IPE.h
new file mode 100644
index 0000000000..81a6d553d0
--- /dev/null
+++ b/rts/include/rts/IPE.h
@@ -0,0 +1,35 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2017-2018
+ *
+ * IPE API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+
+typedef struct InfoProv_{
+    char * table_name;
+    char * closure_desc;
+    char * ty_desc;
+    char * label;
+    char * module;
+    char * srcloc;
+} InfoProv;
+
+typedef struct InfoProvEnt_ {
+    StgInfoTable * info;
+    InfoProv prov;
+    struct InfoProvEnt_ *link;
+} InfoProvEnt;
+
+extern InfoProvEnt * RTS_VAR(IPE_LIST);               // registered IP list
+
+void registerInfoProvList(InfoProvEnt **cc_list);
+InfoProvEnt * lookupIPE(StgInfoTable *info);
diff --git a/rts/include/rts/Libdw.h b/rts/include/rts/Libdw.h
new file mode 100644
index 0000000000..d7bd55d06e
--- /dev/null
+++ b/rts/include/rts/Libdw.h
@@ -0,0 +1,97 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2014-2015
+ *
+ * Producing DWARF-based stacktraces with libdw.
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+// for FILE
+#include <stdio.h>
+
+// Chunk capacity
+// This is rather arbitrary
+#define BACKTRACE_CHUNK_SZ 256
+
+/*
+ * Note [Chunked stack representation]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Consider the stack,
+ *     main                   calls                        (bottom of stack)
+ *       func1                which in turn calls
+ *         func2              which calls
+ *          func3             which calls
+ *            func4           which calls
+ *              func5         which calls
+ *                func6       which calls
+ *                  func7     which requests a backtrace   (top of stack)
+ *
+ * This would produce the Backtrace (using a smaller chunk size of three for
+ * illustrative purposes),
+ *
+ * Backtrace     /----> Chunk         /----> Chunk         /----> Chunk
+ * last --------/       next --------/       next --------/       next
+ * n_frames=8           n_frames=2           n_frames=3           n_frames=3
+ *                      ~~~~~~~~~~           ~~~~~~~~~~           ~~~~~~~~~~
+ *                      func1                func4                func7
+ *                      main                 func3                func6
+ *                                           func2                func5
+ *
+ */
+
+/* A chunk of code addresses from an execution stack
+ *
+ * The first address in this list corresponds to the stack frame
+ * nearest to the "top" of the stack.
+ */
+typedef struct BacktraceChunk_ {
+    StgWord n_frames;                      // number of frames in this chunk
+    struct BacktraceChunk_ *next;          // the chunk following this one
+    StgPtr frames[BACKTRACE_CHUNK_SZ];     // the code addresses from the
+                                           // frames
+} __attribute__((packed)) BacktraceChunk;
+
+/* A chunked list of code addresses from an execution stack
+ *
+ * This structure is optimized for append operations since we append O(stack
+ * depth) times yet typically only traverse the stack trace once. Consequently,
+ * the "top" stack frame (that is, the one where we started unwinding) can be
+ * found in the last chunk. Yes, this is a bit inconsistent with the ordering
+ * within a chunk. See Note [Chunked stack representation] for a depiction.
+ */
+typedef struct Backtrace_ {
+    StgWord n_frames;        // Total number of frames in the backtrace
+    BacktraceChunk *last;    // The first chunk of frames (corresponding to the
+                             // bottom of the stack)
+} Backtrace;
+
+/* Various information describing the location of an address */
+typedef struct Location_ {
+    const char *object_file;
+    const char *function;
+
+    // lineno and colno are only valid if source_file /= NULL
+    const char *source_file;
+    StgWord32 lineno;
+    StgWord32 colno;
+} __attribute__((packed)) Location;
+
+struct LibdwSession_;
+typedef struct LibdwSession_ LibdwSession;
+
+/* Free a backtrace */
+void backtraceFree(Backtrace *bt);
+
+/* Request a backtrace of the current stack state.
+ * May return NULL if a backtrace can't be acquired. */
+Backtrace *libdwGetBacktrace(LibdwSession *session);
+
+/* Lookup Location information for the given address.
+ * Returns 0 if successful, 1 if address could not be found. */
+int libdwLookupLocation(LibdwSession *session, Location *loc, StgPtr pc);
+
+/* Pretty-print a backtrace to the given FILE */
+void libdwPrintBacktrace(LibdwSession *session, FILE *file, Backtrace *bt);
diff --git a/rts/include/rts/LibdwPool.h b/rts/include/rts/LibdwPool.h
new file mode 100644
index 0000000000..76ff41c8c7
--- /dev/null
+++ b/rts/include/rts/LibdwPool.h
@@ -0,0 +1,19 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2015-2016
+ *
+ * A pool of libdw sessions
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Claim a session from the pool */
+LibdwSession *libdwPoolTake(void);
+
+/* Return a session to the pool */
+void libdwPoolRelease(LibdwSession *sess);
+
+/* Free any sessions in the pool forcing a reload of any loaded debug
+ * information */
+void libdwPoolClear(void);
diff --git a/rts/include/rts/Linker.h b/rts/include/rts/Linker.h
new file mode 100644
index 0000000000..1f3719c0c7
--- /dev/null
+++ b/rts/include/rts/Linker.h
@@ -0,0 +1,114 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * RTS Object Linker
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(mingw32_HOST_OS)
+typedef wchar_t pathchar;
+#define PATH_FMT "ls"
+#else
+typedef char    pathchar;
+#define PATH_FMT "s"
+#endif
+
+/* Initialize the object linker. Equivalent to initLinker_(1). */
+void initLinker (void);
+
+/* Initialize the object linker.
+ * The retain_cafs argument is:
+ *
+ *   non-zero => Retain CAFs unconditionally in linked Haskell code.
+ *               Note that this prevents any code from being unloaded.
+ *               It should not be necessary unless you are GHCi or
+ *               hs-plugins, which needs to be able call any function
+ *               in the compiled code.
+ *
+ *   zero     => Do not retain CAFs.  Everything reachable from foreign
+ *               exports will be retained, due to the StablePtrs
+ *               created by the module initialisation code.  unloadObj
+ *               frees these StablePtrs, which will allow the CAFs to
+ *               be GC'd and the code to be removed.
+ */
+void initLinker_ (int retain_cafs);
+
+/* insert a symbol in the hash table */
+HsInt insertSymbol(pathchar* obj_name, char* key, void* data);
+
+/* lookup a symbol in the hash table */
+void *lookupSymbol( char *lbl );
+
+/* See Linker.c Note [runtime-linker-phases] */
+typedef enum {
+    OBJECT_LOADED,
+    OBJECT_NEEDED,
+    OBJECT_RESOLVED,
+    OBJECT_UNLOADED,
+    OBJECT_DONT_RESOLVE,
+    OBJECT_NOT_LOADED     /* The object was either never loaded or has been
+                             fully unloaded */
+} OStatus;
+
+/* check object load status */
+OStatus getObjectLoadStatus( pathchar *path );
+
+/* delete an object from the pool */
+HsInt unloadObj( pathchar *path );
+
+/* purge an object's symbols from the symbol table, but don't unload it */
+HsInt purgeObj( pathchar *path );
+
+/* add an obj (populate the global symbol table, but don't resolve yet) */
+HsInt loadObj( pathchar *path );
+
+/* add an arch (populate the global symbol table, but don't resolve yet) */
+HsInt loadArchive( pathchar *path );
+
+/* resolve all the currently unlinked objects in memory */
+HsInt resolveObjs( void );
+
+/* Load an .so using the system linker.
+   Returns a handle that can be passed to dlsym() or NULL on error.
+
+   In the case of error, stores the error message in errmsg. The caller
+   is responsible for freeing it. */
+void *loadNativeObj( pathchar *path, char **errmsg );
+
+/* Mark the .so loaded with the system linker for unloading.
+   The RTS will unload it when all the references to the .so disappear from
+   the heap.
+   Takes the handle returned from loadNativeObj() as an argument. */
+HsInt unloadNativeObj( void *handle );
+
+/* load a dynamic library */
+const char *addDLL( pathchar* dll_name );
+
+/* add a path to the library search path */
+HsPtr addLibrarySearchPath(pathchar* dll_path);
+
+/* removes a directory from the search path,
+   path must have been added using addLibrarySearchPath */
+HsBool removeLibrarySearchPath(HsPtr dll_path_index);
+
+/* give a warning about missing Windows patches that would make
+   the linker work better */
+void warnMissingKBLibraryPaths( void );
+
+/* -----------------------------------------------------------------------------
+* Searches the system directories to determine if there is a system DLL that
+* satisfies the given name. This prevent GHCi from linking against a static
+* library if a DLL is available.
+*/
+pathchar* findSystemLibrary(pathchar* dll_name);
+
+/* called by the initialization code for a module, not a user API */
+StgStablePtr foreignExportStablePtr (StgPtr p);
diff --git a/rts/include/rts/Main.h b/rts/include/rts/Main.h
new file mode 100644
index 0000000000..05924ad92b
--- /dev/null
+++ b/rts/include/rts/Main.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * Entry point for standalone Haskell programs.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * The entry point for Haskell programs that use a Haskell main function
+ * -------------------------------------------------------------------------- */
+
+int hs_main (int argc, char *argv[],     // program args
+             StgClosure *main_closure,   // closure for Main.main
+             RtsConfig rts_config)       // RTS configuration
+   GNUC3_ATTRIBUTE(__noreturn__);
diff --git a/rts/include/rts/Messages.h b/rts/include/rts/Messages.h
new file mode 100644
index 0000000000..dbaf37bbc7
--- /dev/null
+++ b/rts/include/rts/Messages.h
@@ -0,0 +1,104 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Message API for use inside the RTS.  All messages generated by the
+ * RTS should go through one of the functions declared here, and we
+ * also provide hooks so that messages from the RTS can be redirected
+ * as appropriate.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stdarg.h>
+
+#if defined(mingw32_HOST_OS) && !defined(__clang__)
+/* On Win64, if we say "printf" then gcc thinks we are going to use
+   MS format specifiers like %I64d rather than %llu */
+#define PRINTF gnu_printf
+#else
+/* However, on OS X, "gnu_printf" isn't recognised */
+#define PRINTF printf
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Message generation
+ * -------------------------------------------------------------------------- */
+
+/*
+ * A fatal internal error: this is for errors that probably indicate
+ * bugs in the RTS or compiler.  We normally output bug reporting
+ * instructions along with the error message.
+ *
+ * barf() invokes (*fatalInternalErrorFn)().  This function is not
+ * expected to return.
+ */
+void barf(const char *s, ...)
+   GNUC3_ATTRIBUTE(__noreturn__)
+   GNUC3_ATTRIBUTE(format(PRINTF, 1, 2));
+
+void vbarf(const char *s, va_list ap)
+   GNUC3_ATTRIBUTE(__noreturn__);
+
+// declared in Rts.h:
+// extern void _assertFail(const char *filename, unsigned int linenum)
+//    GNUC3_ATTRIBUTE(__noreturn__);
+
+/*
+ * An error condition which is caused by and/or can be corrected by
+ * the user.
+ *
+ * errorBelch() invokes (*errorMsgFn)().
+ */
+void errorBelch(const char *s, ...)
+   GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void verrorBelch(const char *s, va_list ap);
+
+/*
+ * An error condition which is caused by and/or can be corrected by
+ * the user, and which has an associated error condition reported
+ * by the system (in errno on Unix, and GetLastError() on Windows).
+ * The system error message is appended to the message generated
+ * from the supplied format string.
+ *
+ * sysErrorBelch() invokes (*sysErrorMsgFn)().
+ */
+void sysErrorBelch(const char *s, ...)
+   GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void vsysErrorBelch(const char *s, va_list ap);
+
+/*
+ * A debugging message.  Debugging messages are generated either as a
+ * virtue of having DEBUG turned on, or by being explicitly selected
+ * via RTS options (eg. +RTS -Ds).
+ *
+ * debugBelch() invokes (*debugMsgFn)().
+ */
+void debugBelch(const char *s, ...)
+   GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void vdebugBelch(const char *s, va_list ap);
+
+
+/* Hooks for redirecting message generation: */
+
+typedef void RtsMsgFunction(const char *, va_list);
+
+extern RtsMsgFunction *fatalInternalErrorFn;
+extern RtsMsgFunction *debugMsgFn;
+extern RtsMsgFunction *errorMsgFn;
+
+/* Default stdio implementation of the message hooks: */
+
+extern RtsMsgFunction rtsFatalInternalErrorFn;
+extern RtsMsgFunction rtsDebugMsgFn;
+extern RtsMsgFunction rtsErrorMsgFn;
+extern RtsMsgFunction rtsSysErrorMsgFn;
diff --git a/rts/include/rts/NonMoving.h b/rts/include/rts/NonMoving.h
new file mode 100644
index 0000000000..35a04fe940
--- /dev/null
+++ b/rts/include/rts/NonMoving.h
@@ -0,0 +1,43 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2018-2019
+ *
+ * Non-moving garbage collector
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+// Forward declaration for Stg.h
+struct StgClosure_;
+struct StgThunk_;
+struct Capability_;
+
+/* This is called by the code generator */
+extern DLL_IMPORT_RTS
+void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p);
+
+extern DLL_IMPORT_RTS
+void updateRemembSetPushThunk_(StgRegTable *reg, struct StgThunk_ *p);
+
+// Forward declaration for unregisterised backend.
+EF_(stg_copyArray_barrier);
+
+// Note that RTS code should not condition on this directly by rather
+// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
+// the barrier is eliminated in the non-threaded RTS.
+extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
+
+// A similar macro is defined in rts/include/Cmm.h for C-- code.
+#if defined(THREADED_RTS)
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+    if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+#else
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+    if (0)
+#endif
diff --git a/rts/include/rts/OSThreads.h b/rts/include/rts/OSThreads.h
new file mode 100644
index 0000000000..d24a1313a6
--- /dev/null
+++ b/rts/include/rts/OSThreads.h
@@ -0,0 +1,267 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001-2009
+ *
+ * Accessing OS threads functionality in a (mostly) OS-independent
+ * manner.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(HAVE_PTHREAD_H) && !defined(mingw32_HOST_OS)
+
+#if defined(CMINUSMINUS)
+
+#define OS_ACQUIRE_LOCK(mutex) foreign "C" pthread_mutex_lock(mutex)
+#define OS_RELEASE_LOCK(mutex) foreign "C" pthread_mutex_unlock(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else
+
+#include <pthread.h>
+#include <errno.h>
+
+typedef struct {
+    pthread_cond_t cond;
+
+    // Which clock are pthread_cond_timedwait calls referenced against?
+    // N.B. Some older Darwin releases don't support clock_gettime. However, we
+    // do want to reference to CLOCK_MONOTONIC whenever possible as it is more
+    // robust against system time changes and is likely cheaper to query.
+#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_PTHREAD_CONDATTR_SETCLOCK)
+    clockid_t timeout_clk;
+#endif
+} Condition;
+typedef pthread_mutex_t Mutex;
+typedef pthread_t       OSThreadId;
+typedef pthread_key_t   ThreadLocalKey;
+
+#define OSThreadProcAttr /* nothing */
+
+#define INIT_COND_VAR       PTHREAD_COND_INITIALIZER
+
+#if defined(LOCK_DEBUG)
+#define LOCK_DEBUG_BELCH(what, mutex) \
+  debugBelch("%s(0x%p) %s %d\n", what, mutex, __FILE__, __LINE__)
+#else
+#define LOCK_DEBUG_BELCH(what, mutex) /* nothing */
+#endif
+
+/* Always check the result of lock and unlock. */
+#define OS_ACQUIRE_LOCK(mutex) { \
+  LOCK_DEBUG_BELCH("ACQUIRE_LOCK", mutex); \
+  int __r = pthread_mutex_lock(mutex); \
+  if (__r != 0) { \
+    barf("ACQUIRE_LOCK failed (%s:%d): %d", __FILE__, __LINE__, __r); \
+  } }
+
+// Returns zero if the lock was acquired.
+EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex);
+EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex)
+{
+    LOCK_DEBUG_BELCH("TRY_ACQUIRE_LOCK", mutex);
+    return pthread_mutex_trylock(mutex);
+}
+
+#define OS_RELEASE_LOCK(mutex) \
+  LOCK_DEBUG_BELCH("RELEASE_LOCK", mutex); \
+  if (pthread_mutex_unlock(mutex) != 0) { \
+    barf("RELEASE_LOCK: I do not own this lock: %s %d", __FILE__,__LINE__); \
+  }
+
+// Note: this assertion calls pthread_mutex_lock() on a mutex that
+// is already held by the calling thread.  The mutex should therefore
+// have been created with PTHREAD_MUTEX_ERRORCHECK, otherwise this
+// assertion will hang.  We always initialise mutexes with
+// PTHREAD_MUTEX_ERRORCHECK when DEBUG is on (see rts/posix/OSThreads.h).
+#define OS_ASSERT_LOCK_HELD(mutex) ASSERT(pthread_mutex_lock(mutex) == EDEADLK)
+
+#endif // CMINUSMINUS
+
+# elif defined(HAVE_WINDOWS_H)
+
+#if defined(CMINUSMINUS)
+
+/* We jump through a hoop here to get a CCall AcquireSRWLockExclusive
+   and ReleaseSRWLockExclusive, as that's what C-- wants. */
+
+#define OS_ACQUIRE_LOCK(mutex) foreign "stdcall" AcquireSRWLockExclusive(mutex)
+#define OS_RELEASE_LOCK(mutex) foreign "stdcall" ReleaseSRWLockExclusive(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else // CMINUSMINUS
+
+#include <windows.h>
+#include <synchapi.h>
+
+/* Use native conditional variables coupled with SRW locks, these are more
+   efficient and occur a smaller overhead then emulating them with events.
+   See Note [SRW locks].  */
+typedef CONDITION_VARIABLE Condition;
+typedef DWORD OSThreadId;
+// don't be tempted to use HANDLE as the OSThreadId: there can be
+// many HANDLES to a given thread, so comparison would not work.
+typedef DWORD ThreadLocalKey;
+
+#define OSThreadProcAttr __stdcall
+
+#define INIT_COND_VAR  0
+
+/* Note [SRW locks]
+   We have a choice for implementing Mutexes on Windows.  Standard
+   Mutexes are kernel objects that require kernel calls to
+   acquire/release, whereas CriticalSections are spin-locks that block
+   in the kernel after spinning for a configurable number of times.
+   CriticalSections are *much* faster than Mutexes, however not as fast as
+   slim reader/writer locks.  CriticalSections also require a 48 byte structure
+   to provide lock re-entrancy.  We don't need that because the other primitives
+   used for other platforms don't have this, as such locks are used defensively
+   in the RTS in a way that we don't need re-entrancy.  This means that SRW's
+   8 byte size is much more appropriate.  With an 8 byte payload there's a
+   higher chance of it being in your cache line.  They're also a lot faster than
+   CriticalSections when multiple threads are involved.  CS requires setup and
+   teardown via kernel calls while SRWL is zero-initialized via
+   SRWLOCK_INIT assignment. */
+
+typedef SRWLOCK Mutex;
+
+#if defined(LOCK_DEBUG)
+
+#define OS_ACQUIRE_LOCK(mutex) \
+  debugBelch("ACQUIRE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \
+  AcquireSRWLockExclusive(mutex)
+#define OS_RELEASE_LOCK(mutex) \
+  debugBelch("RELEASE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \
+  ReleaseSRWLockExclusive(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else
+
+#define OS_ACQUIRE_LOCK(mutex)      AcquireSRWLockExclusive(mutex)
+#define OS_TRY_ACQUIRE_LOCK(mutex)  (TryAcquireSRWLockExclusive(mutex) == 0)
+#define OS_RELEASE_LOCK(mutex)      ReleaseSRWLockExclusive(mutex)
+#define OS_INIT_LOCK(mutex)         InitializeSRWLock(mutex)
+#define OS_CLOSE_LOCK(mutex)
+
+// I don't know how to do this.  TryEnterCriticalSection() doesn't do
+// the right thing.
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#endif // LOCK_DEBUG
+
+#endif // CMINUSMINUS
+
+# elif defined(THREADED_RTS)
+#  error "Threads not supported"
+# endif
+
+
+#if !defined(CMINUSMINUS)
+//
+// General thread operations
+//
+extern OSThreadId osThreadId      ( void );
+extern void shutdownThread        ( void )   GNUC3_ATTRIBUTE(__noreturn__);
+extern void yieldThread           ( void );
+
+typedef void* OSThreadProcAttr OSThreadProc(void *);
+
+extern int  createOSThread        ( OSThreadId* tid, char *name,
+                                    OSThreadProc *startProc, void *param);
+extern bool osThreadIsAlive       ( OSThreadId id );
+extern void interruptOSThread     ( OSThreadId id );
+extern void joinOSThread          ( OSThreadId id );
+
+//
+// Condition Variables
+//
+extern void initCondition         ( Condition* pCond );
+extern void closeCondition        ( Condition* pCond );
+extern void broadcastCondition    ( Condition* pCond );
+extern void signalCondition       ( Condition* pCond );
+extern void waitCondition         ( Condition* pCond, Mutex* pMut );
+// Returns false on timeout, true otherwise.
+extern bool timedWaitCondition    ( Condition* pCond, Mutex* pMut, Time timeout);
+
+//
+// Mutexes
+//
+extern void initMutex             ( Mutex* pMut );
+extern void closeMutex            ( Mutex* pMut );
+
+//
+// Thread-local storage
+//
+void  newThreadLocalKey (ThreadLocalKey *key);
+void *getThreadLocalVar (ThreadLocalKey *key);
+void  setThreadLocalVar (ThreadLocalKey *key, void *value);
+void  freeThreadLocalKey (ThreadLocalKey *key);
+
+// Processors and affinity
+void setThreadAffinity (uint32_t n, uint32_t m);
+void setThreadNode (uint32_t node);
+void releaseThreadNode (void);
+#endif // !CMINUSMINUS
+
+#if defined(THREADED_RTS)
+
+#define ACQUIRE_LOCK(l) OS_ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK(l) OS_TRY_ACQUIRE_LOCK(l)
+#define RELEASE_LOCK(l) OS_RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD(l) OS_ASSERT_LOCK_HELD(l)
+
+#else
+
+#define ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK(l) 0
+#define RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD(l)
+
+#endif /* defined(THREADED_RTS) */
+
+#if !defined(CMINUSMINUS)
+//
+// Support for forkOS (defined regardless of THREADED_RTS, but does
+// nothing when !THREADED_RTS).
+//
+int forkOS_createThread ( HsStablePtr entry );
+
+//
+// Free any global resources created in OSThreads.
+//
+void freeThreadingResources(void);
+
+//
+// Returns the number of processor cores in the machine
+//
+uint32_t getNumberOfProcessors (void);
+
+//
+// Support for getting at the kernel thread Id for tracing/profiling.
+//
+// This stuff is optional and only used for tracing/profiling purposes, to
+// match up thread ids recorded by other tools. For example, on Linux and OSX
+// the pthread_t type is not the same as the kernel thread id, and system
+// profiling tools like Linux perf, and OSX's DTrace use the kernel thread Id.
+// So if we want to match up RTS tasks with kernel threads recorded by these
+// tools then we need to know the kernel thread Id, and this must be a separate
+// type from the OSThreadId.
+//
+// If the feature cannot be supported on an OS, it is OK to always return 0.
+// In particular it would almost certainly be meaningless on systems not using
+// a 1:1 threading model.
+
+// We use a common serialisable representation on all OSs
+// This is ok for Windows, OSX and Linux.
+typedef StgWord64 KernelThreadId;
+
+// Get the current kernel thread id
+KernelThreadId kernelThreadId (void);
+
+#endif /* CMINUSMINUS */
diff --git a/rts/include/rts/Parallel.h b/rts/include/rts/Parallel.h
new file mode 100644
index 0000000000..7577a3967c
--- /dev/null
+++ b/rts/include/rts/Parallel.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Parallelism-related functionality
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+StgInt newSpark (StgRegTable *reg, StgClosure *p);
diff --git a/rts/include/rts/PosixSource.h b/rts/include/rts/PosixSource.h
new file mode 100644
index 0000000000..13fd7b0ff5
--- /dev/null
+++ b/rts/include/rts/PosixSource.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Include this file into sources which should not need any non-Posix services.
+ * That includes most RTS C sources.
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <ghcplatform.h>
+
+/* We aim for C99 so we need to define following two defines in a consistent way
+   with what POSIX/XOPEN provide for C99. Some OSes are particularly picky about
+   the right versions defined here, e.g. Solaris
+   We also settle on lowest version of POSIX/XOPEN needed for proper C99 support
+   here which is POSIX.1-2001 compilation and Open Group Technical Standard,
+   Issue 6 (XPG6). XPG6 itself is a result of the merge of X/Open and POSIX
+   specification. It is also referred as IEEE Std. 1003.1-2001 or ISO/IEC
+   9945:2002 or UNIX 03 and SUSv3.
+   Please also see trac ticket #11757 for more information about switch
+   to C99/C11.
+
+   However, the use of `strnlen`, which is strictly speaking only available in
+   IEEE Std 1003.1-2008 (XPG7), requires lifting the bounds, to be able to
+   compile ghc on systems that are strict about enforcing the standard, e.g.
+   Apples mobile platforms.
+
+   Oracle's Solaris 11 supports only up to XPG6, hence the ifdef.
+  */
+
+#if defined(solaris2_HOST_OS)
+#define _POSIX_C_SOURCE 200112L
+#define _XOPEN_SOURCE   600
+#else
+#define _POSIX_C_SOURCE 200809L
+#define _XOPEN_SOURCE   700
+#endif
diff --git a/rts/include/rts/PrimFloat.h b/rts/include/rts/PrimFloat.h
new file mode 100644
index 0000000000..26b6f7ceec
--- /dev/null
+++ b/rts/include/rts/PrimFloat.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Primitive floating-point operations
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+StgDouble __int_encodeDouble (I_ j, I_ e);
+StgFloat  __int_encodeFloat (I_ j, I_ e);
+StgDouble __word_encodeDouble (W_ j, I_ e);
+StgFloat  __word_encodeFloat (W_ j, I_ e);
diff --git a/rts/include/rts/Profiling.h b/rts/include/rts/Profiling.h
new file mode 100644
index 0000000000..b329a493db
--- /dev/null
+++ b/rts/include/rts/Profiling.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2017-2018
+ *
+ * Cost-centre profiling API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+void registerCcList(CostCentre **cc_list);
+void registerCcsList(CostCentreStack **cc_list);
diff --git a/rts/include/rts/Signals.h b/rts/include/rts/Signals.h
new file mode 100644
index 0000000000..96f0756538
--- /dev/null
+++ b/rts/include/rts/Signals.h
@@ -0,0 +1,23 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS signal handling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* NB. #included in Haskell code, no prototypes in here. */
+
+/* arguments to stg_sig_install() */
+#define STG_SIG_DFL   (-1)
+#define STG_SIG_IGN   (-2)
+#define STG_SIG_ERR   (-3)
+#define STG_SIG_HAN   (-4)
+#define STG_SIG_RST   (-5)
diff --git a/rts/include/rts/SpinLock.h b/rts/include/rts/SpinLock.h
new file mode 100644
index 0000000000..c1fe6c866c
--- /dev/null
+++ b/rts/include/rts/SpinLock.h
@@ -0,0 +1,75 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * Spin locks
+ *
+ * These are simple spin-only locks as opposed to Mutexes which
+ * probably spin for a while before blocking in the kernel.  We use
+ * these when we are sure that all our threads are actively running on
+ * a CPU, eg. in the GC.
+ *
+ * TODO: measure whether we really need these, or whether Mutexes
+ * would do (and be a bit safer if a CPU becomes loaded).
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+#if defined(THREADED_RTS)
+
+typedef struct SpinLock_
+{
+    StgWord   lock;
+#if defined(PROF_SPIN)
+    StgWord64 spin;  // incremented every time we spin in ACQUIRE_SPIN_LOCK
+    StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK
+#endif
+} SpinLock;
+
+// PROF_SPIN enables counting the number of times we spin on a lock
+#if defined(PROF_SPIN)
+#define IF_PROF_SPIN(x) x
+#else
+#define IF_PROF_SPIN(x)
+#endif
+
+void acquire_spin_lock_slow_path(SpinLock * p);
+
+// acquire spin lock
+INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
+{
+    StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+    if (RTS_UNLIKELY(r == 0))
+        acquire_spin_lock_slow_path(p);
+}
+
+// release spin lock
+INLINE_HEADER void RELEASE_SPIN_LOCK(SpinLock * p)
+{
+    RELEASE_STORE(&p->lock, 1);
+}
+
+// initialise spin lock
+INLINE_HEADER void initSpinLock(SpinLock * p)
+{
+    IF_PROF_SPIN(p->spin = 0);
+    IF_PROF_SPIN(p->yield = 0);
+    RELEASE_STORE(&p->lock, 1);
+}
+
+#else /* !THREADED_RTS */
+
+// Using macros here means we don't have to ensure the argument is in scope
+#define ACQUIRE_SPIN_LOCK(p) /* nothing */
+#define RELEASE_SPIN_LOCK(p) /* nothing */
+
+INLINE_HEADER void initSpinLock(void * p STG_UNUSED)
+{ /* nothing */ }
+
+#endif /* THREADED_RTS */
diff --git a/rts/include/rts/StableName.h b/rts/include/rts/StableName.h
new file mode 100644
index 0000000000..4e4f976dae
--- /dev/null
+++ b/rts/include/rts/StableName.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Stable Names
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   PRIVATE from here.
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+    StgPtr  addr;        // Haskell object when entry is in use, next free
+                         // entry (NULL when this is the last free entry)
+                         // otherwise. May be NULL temporarily during GC (when
+                         // pointee dies).
+
+    StgPtr  old;         // Old Haskell object, used during GC
+
+    StgClosure *sn_obj;  // The StableName object, or NULL when the entry is
+                         // free
+} snEntry;
+
+extern DLL_IMPORT_RTS snEntry *stable_name_table;
diff --git a/rts/include/rts/StablePtr.h b/rts/include/rts/StablePtr.h
new file mode 100644
index 0000000000..73cd5bed4d
--- /dev/null
+++ b/rts/include/rts/StablePtr.h
@@ -0,0 +1,39 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Stable Pointers
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+ATTR_ALWAYS_INLINE EXTERN_INLINE StgPtr deRefStablePtr (StgStablePtr stable_ptr);
+StgStablePtr getStablePtr  (StgPtr p);
+
+/* -----------------------------------------------------------------------------
+   PRIVATE from here.
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+    StgPtr addr;         // Haskell object when entry is in use, next free
+                         // entry (NULL when this is the last free entry)
+                         // otherwise.
+} spEntry;
+
+extern DLL_IMPORT_RTS spEntry *stable_ptr_table;
+
+ATTR_ALWAYS_INLINE EXTERN_INLINE
+StgPtr deRefStablePtr(StgStablePtr sp)
+{
+    // acquire load to ensure that we see the new SPT if it has been recently
+    // enlarged.
+    const spEntry *spt = ACQUIRE_LOAD(&stable_ptr_table);
+    // acquire load to ensure that the referenced object is visible.
+    return ACQUIRE_LOAD(&spt[(StgWord)sp].addr);
+}
diff --git a/rts/include/rts/StaticPtrTable.h b/rts/include/rts/StaticPtrTable.h
new file mode 100644
index 0000000000..5753e957bd
--- /dev/null
+++ b/rts/include/rts/StaticPtrTable.h
@@ -0,0 +1,44 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Initialization of the Static Pointer Table
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/** Inserts an entry in the Static Pointer Table.
+ *
+ * The key is a fingerprint computed from the static pointer and the spe_closure
+ * is a pointer to the closure defining the table entry.
+ *
+ * A stable pointer to the closure is made to prevent it from being garbage
+ * collected while the entry exists on the table.
+ *
+ * This function is called from the code generated by
+ * compiler/deSugar/StaticPtrTable.sptInitCode
+ *
+ * */
+void hs_spt_insert (StgWord64 key[2],void* spe_closure);
+
+/** Inserts an entry for a StgTablePtr in the Static Pointer Table.
+ *
+ * This function is called from the GHCi interpreter to insert
+ * SPT entries for bytecode objects.
+ *
+ * */
+void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry);
+
+/** Removes an entry from the Static Pointer Table.
+ *
+ * This function is called from the code generated by
+ * compiler/deSugar/StaticPtrTable.sptInitCode
+ *
+ * */
+void hs_spt_remove (StgWord64 key[2]);
diff --git a/rts/include/rts/TSANUtils.h b/rts/include/rts/TSANUtils.h
new file mode 100644
index 0000000000..72f4541a89
--- /dev/null
+++ b/rts/include/rts/TSANUtils.h
@@ -0,0 +1,67 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2019
+ *
+ * Utilities for annotating "safe" data races for Thread Sanitizer
+ * -------------------------------------------------------------------------- */
+
+/*
+ * Note [ThreadSanitizer]
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ * ThreadSanitizer (abbreviated TSAN) is a library and set of compiler
+ * instrumentation (supported by both GCC and Clang) for checking C/C++ code
+ * for data races.
+ *
+ * In GHC we use it to check the runtime system implementation (but not yet
+ * generated code). TSAN requires that the checked program uses C++11-style
+ * atomics for all potentially-racing accesses. Note that we use the __atomic_*
+ * builtin operations but not the C11 _Atomic types to maintain compatibility
+ * with older compilers.
+ *
+ * In addition to the atomic operations themselves, TSAN provides a variety of
+ * annotation operations which can be used to annotate cases where the
+ * intended semantics are either ambiguous or intentionally racy (known as a
+ * *benign race*).
+ *
+ * Finally, there are a few benign races which we can't easily annotate. To
+ * silence these errors we have a suppressions file in rts/.tsan-suppressions.
+ * In general it's best to add suppressions only as a last resort, when the
+ * more precise annotation functions prove to be insufficient.
+ *
+ * Users guide: https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual
+ */
+
+#if defined(__SANITIZE_THREAD__)
+#define TSAN_ENABLED
+#elif defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define TSAN_ENABLED
+#endif
+#endif
+
+#if defined(TSAN_ENABLED)
+#if !defined(HAVE_C11_ATOMICS)
+#error TSAN cannot be enabled without C11 atomics suppoort.
+#endif
+
+#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr)                              \
+    AnnotateHappensBefore(__FILE__, __LINE__, (void*)(addr))
+#define TSAN_ANNOTATE_HAPPENS_AFTER(addr)                               \
+    AnnotateHappensAfter(__FILE__, __LINE__, (void*)(addr))
+#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc)                 \
+    AnnotateBenignRaceSized(__FILE__, __LINE__, (void*)(addr), size, desc)
+void AnnotateHappensBefore(const char* f, int l, void* addr);
+void AnnotateHappensAfter(const char* f, int l, void* addr);
+void AnnotateBenignRaceSized(const char *file,
+                             int line,
+                             const volatile void *mem,
+                             long size,
+                             const char *description);
+#else
+#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr)
+#define TSAN_ANNOTATE_HAPPENS_AFTER(addr)
+#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc)
+#endif
+
+#define TSAN_ANNOTATE_BENIGN_RACE(addr,desc)                            \
+    TSAN_ANNOTATE_BENIGN_RACE_SIZED((void*)(addr), sizeof(*addr), desc)
diff --git a/rts/include/rts/TTY.h b/rts/include/rts/TTY.h
new file mode 100644
index 0000000000..9892571a17
--- /dev/null
+++ b/rts/include/rts/TTY.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * POSIX TTY-related functionality
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+void* __hscore_get_saved_termios(int fd);
+void  __hscore_set_saved_termios(int fd, void* ts);
diff --git a/rts/include/rts/Threads.h b/rts/include/rts/Threads.h
new file mode 100644
index 0000000000..51c11742ca
--- /dev/null
+++ b/rts/include/rts/Threads.h
@@ -0,0 +1,79 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2009
+ *
+ * External API for the scheduler.  For most uses, the functions in
+ * RtsAPI.h should be enough.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+//
+// Creating threads
+//
+StgTSO *createThread (Capability *cap, W_ stack_size);
+
+// precondition:
+//   (*cap)->running_task != NULL
+//   (*cap)->running_task must be a bound task (e.g. newBoundTask() has been
+//                        called on that thread).
+void scheduleWaitThread (/* in    */ StgTSO *tso,
+                         /* out   */ HaskellObj* ret,
+                         /* inout */ Capability **cap);
+
+StgTSO *createGenThread       (Capability *cap, W_ stack_size,
+                               StgClosure *closure);
+StgTSO *createIOThread        (Capability *cap, W_ stack_size,
+                               StgClosure *closure);
+StgTSO *createStrictIOThread  (Capability *cap, W_ stack_size,
+                               StgClosure *closure);
+
+// Suspending/resuming threads around foreign calls
+void *        suspendThread (StgRegTable *, bool interruptible);
+StgRegTable * resumeThread  (void *);
+
+//
+// Thread operations from Threads.c
+//
+bool    eq_thread                        (StgPtr tso1, StgPtr tso2);
+int     cmp_thread                       (StgPtr tso1, StgPtr tso2);
+long    rts_getThreadId                  (StgPtr tso);
+void    rts_enableThreadAllocationLimit  (StgPtr tso);
+void    rts_disableThreadAllocationLimit (StgPtr tso);
+
+#if !defined(mingw32_HOST_OS)
+pid_t  forkProcess     (HsStablePtr *entry);
+#else
+pid_t  forkProcess     (HsStablePtr *entry)
+    GNU_ATTRIBUTE(__noreturn__);
+#endif
+
+HsBool rtsSupportsBoundThreads (void);
+
+// The number of Capabilities.
+// ToDo: I would like this to be private to the RTS and instead expose a
+// function getNumCapabilities(), but it is used in compiler/cbits/genSym.c
+extern unsigned int n_capabilities;
+
+// The number of Capabilities that are not disabled
+extern uint32_t enabled_capabilities;
+
+#if !IN_STG_CODE
+extern Capability MainCapability;
+#endif
+
+//
+// Change the number of capabilities (only supports increasing the
+// current value at the moment).
+//
+extern void setNumCapabilities (uint32_t new_);
diff --git a/rts/include/rts/Ticky.h b/rts/include/rts/Ticky.h
new file mode 100644
index 0000000000..93043d8514
--- /dev/null
+++ b/rts/include/rts/Ticky.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * TICKY_TICKY types
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The StgEntCounter type - needed regardless of TICKY_TICKY
+   -------------------------------------------------------------------------- */
+
+typedef struct _StgEntCounter {
+  /* Using StgWord for everything, because both the C and asm code
+     generators make trouble if you try to pack things tighter */
+    StgWord     registeredp;    /* 0 == no, 1 == yes */
+    StgInt      arity;          /* arity (static info) */
+    StgInt      allocd;         /* # allocation of this closure */
+                                /* (rest of args are in registers) */
+    char        *str;           /* name of the thing */
+    char        *arg_kinds;     /* info about the args types */
+    StgInt      entry_count;    /* Trips to fast entry code */
+    StgInt      allocs;         /* number of allocations by this fun */
+    struct _StgEntCounter *link;/* link to chain them all together */
+} StgEntCounter;
diff --git a/rts/include/rts/Time.h b/rts/include/rts/Time.h
new file mode 100644
index 0000000000..ab291cd6c5
--- /dev/null
+++ b/rts/include/rts/Time.h
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Time values in the RTS
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+// For most time values in the RTS we use a fixed resolution of nanoseconds,
+// normalising the time we get from platform-dependent APIs to this
+// resolution.
+#define TIME_RESOLUTION 1000000000
+typedef int64_t Time;
+
+#define TIME_MAX HS_INT64_MAX
+
+#if TIME_RESOLUTION == 1000000000
+// I'm being lazy, but it's awkward to define fully general versions of these
+#define TimeToMS(t)      ((t) / 1000000)
+#define TimeToUS(t)      ((t) / 1000)
+#define TimeToNS(t)      (t)
+#define MSToTime(t)      ((Time)(t) * 1000000)
+#define USToTime(t)      ((Time)(t) * 1000)
+#define NSToTime(t)      ((Time)(t))
+#else
+#error Fix TimeToNS(), TimeToUS() etc.
+#endif
+
+#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
+#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
+#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
+
+// Use instead of SecondsToTime() when we have a floating-point
+// seconds value, to avoid truncating it.
+INLINE_HEADER Time fsecondsToTime (double t)
+{
+    return (Time)(t * TIME_RESOLUTION);
+}
+
+Time getProcessElapsedTime (void);
diff --git a/rts/include/rts/Timer.h b/rts/include/rts/Timer.h
new file mode 100644
index 0000000000..c60cd37590
--- /dev/null
+++ b/rts/include/rts/Timer.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS timer signal (uses OS-dependent Ticker.h underneath)
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+void startTimer (void);
+void stopTimer  (void);
+int rtsTimerSignal (void);
diff --git a/rts/include/rts/Types.h b/rts/include/rts/Types.h
new file mode 100644
index 0000000000..51e8f80e66
--- /dev/null
+++ b/rts/include/rts/Types.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS-specific types.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+
+// Deprecated, use uint32_t instead.
+typedef unsigned int nat __attribute__((deprecated));  /* uint32_t */
+
+/* ullong (64|128-bit) type: only include if needed (not ANSI) */
+#if defined(__GNUC__)
+#define LL(x) (x##LL)
+#else
+#define LL(x) (x##L)
+#endif
+
+typedef struct StgClosure_   StgClosure;
+typedef struct StgInfoTable_ StgInfoTable;
+typedef struct StgTSO_       StgTSO;
diff --git a/rts/include/rts/Utils.h b/rts/include/rts/Utils.h
new file mode 100644
index 0000000000..4aee9c3a67
--- /dev/null
+++ b/rts/include/rts/Utils.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS external APIs.  This file declares everything that the GHC RTS
+ * exposes externally.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Alternate to raise(3) for threaded rts, for BSD-based OSes */
+int genericRaise(int sig);
diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h
new file mode 100644
index 0000000000..7685f03003
--- /dev/null
+++ b/rts/include/rts/prof/CCS.h
@@ -0,0 +1,226 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009-2012
+ *
+ * Macros for profiling operations in STG code
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------------*/
+/*
+ * Note [struct alignment]
+ * NB. be careful to avoid unwanted padding between fields, by
+ * putting the 8-byte fields on an 8-byte boundary.  Padding can
+ * vary between C compilers, and we don't take into account any
+ * possible padding when generating CCS and CC decls in the code
+ * generator (GHC.StgToCmm.Prof).
+ */
+
+typedef struct CostCentre_ {
+    StgInt ccID;              // Unique Id, allocated by the RTS
+
+    char * label;
+    char * module;
+    char * srcloc;
+
+    // used for accumulating costs at the end of the run...
+    StgWord64 mem_alloc;      // align 8 (Note [struct alignment])
+    StgWord   time_ticks;
+
+    StgBool is_caf;           // true <=> CAF cost centre
+
+    struct CostCentre_ *link;
+} CostCentre;
+
+typedef struct CostCentreStack_ {
+    StgInt ccsID;               // unique ID, allocated by the RTS
+
+    CostCentre *cc;             // Cost centre at the top of the stack
+
+    struct CostCentreStack_ *prevStack;   // parent
+    struct IndexTable_      *indexTable;  // children
+    struct CostCentreStack_ *root;        // root of stack
+    StgWord    depth;           // number of items in the stack
+
+    StgWord64  scc_count;       // Count of times this CCS is entered
+                                // align 8 (Note [struct alignment])
+
+    StgWord    selected;        // is this CCS shown in the heap
+                                // profile? (zero if excluded via -hc
+                                // -hm etc.)
+
+    StgWord    time_ticks;      // number of time ticks accumulated by
+                                // this CCS
+
+    StgWord64  mem_alloc;       // mem allocated by this CCS
+                                // align 8 (Note [struct alignment])
+
+    StgWord64  inherited_alloc; // sum of mem_alloc over all children
+                                // (calculated at the end)
+                                // align 8 (Note [struct alignment])
+
+    StgWord    inherited_ticks; // sum of time_ticks over all children
+                                // (calculated at the end)
+} CostCentreStack;
+
+
+/* -----------------------------------------------------------------------------
+ * Start and stop the profiling timer.  These can be called from
+ * Haskell to restrict the profile to portion(s) of the execution.
+ * See the module GHC.Profiling.
+ * ---------------------------------------------------------------------------*/
+
+void stopProfTimer      ( void );
+void startProfTimer     ( void );
+
+/* -----------------------------------------------------------------------------
+ * The rest is PROFILING only...
+ * ---------------------------------------------------------------------------*/
+
+#if defined(PROFILING)
+
+/* -----------------------------------------------------------------------------
+ * Constants
+ * ---------------------------------------------------------------------------*/
+
+#define EMPTY_STACK NULL
+#define EMPTY_TABLE NULL
+
+/* Constants used to set is_caf flag on CostCentres */
+#define CC_IS_CAF      true
+#define CC_NOT_CAF     false
+/* -----------------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------------*/
+
+// IndexTable is the list of children of a CCS. (Alternatively it is a
+// cache of the results of pushing onto a CCS, so that the second and
+// subsequent times we push a certain CC on a CCS we get the same
+// result).
+
+typedef struct IndexTable_ {
+    // Just a linked list of (cc, ccs) pairs, where the `ccs` is the result of
+    // pushing `cc` to the owner of the index table (another CostCentreStack).
+    CostCentre *cc;
+    CostCentreStack *ccs;
+    struct IndexTable_ *next;
+    // back_edge is true when `cc` is already in the stack, so pushing it
+    // truncates or drops (see RECURSION_DROPS and RECURSION_TRUNCATES in
+    // Profiling.c).
+    bool back_edge;
+} IndexTable;
+
+
+/* -----------------------------------------------------------------------------
+   Pre-defined cost centres and cost centre stacks
+   -------------------------------------------------------------------------- */
+
+#if IN_STG_CODE
+
+extern StgWord CC_MAIN[];
+extern StgWord CCS_MAIN[];      // Top CCS
+
+extern StgWord CC_SYSTEM[];
+extern StgWord CCS_SYSTEM[];    // RTS costs
+
+extern StgWord CC_GC[];
+extern StgWord CCS_GC[];         // Garbage collector costs
+
+extern StgWord CC_OVERHEAD[];
+extern StgWord CCS_OVERHEAD[];   // Profiling overhead
+
+extern StgWord CC_DONT_CARE[];
+extern StgWord CCS_DONT_CARE[];  // CCS attached to static constructors
+
+#else
+
+extern CostCentre      CC_MAIN[];
+extern CostCentreStack CCS_MAIN[];      // Top CCS
+
+extern CostCentre      CC_SYSTEM[];
+extern CostCentreStack CCS_SYSTEM[];    // RTS costs
+
+extern CostCentre      CC_GC[];
+extern CostCentreStack CCS_GC[];         // Garbage collector costs
+
+extern CostCentre      CC_OVERHEAD[];
+extern CostCentreStack CCS_OVERHEAD[];   // Profiling overhead
+
+extern CostCentre      CC_DONT_CARE[];
+extern CostCentreStack CCS_DONT_CARE[];  // shouldn't ever get set
+
+extern CostCentre      CC_PINNED[];
+extern CostCentreStack CCS_PINNED[];     // pinned memory
+
+extern CostCentre      CC_IDLE[];
+extern CostCentreStack CCS_IDLE[];       // capability is idle
+
+#endif /* IN_STG_CODE */
+
+extern unsigned int RTS_VAR(era);
+
+/* -----------------------------------------------------------------------------
+ * Functions
+ * ---------------------------------------------------------------------------*/
+
+CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *);
+void              enterFunCCS    (StgRegTable *reg, CostCentreStack *);
+CostCentre *mkCostCentre (char *label, char *module, char *srcloc);
+
+extern CostCentre * RTS_VAR(CC_LIST);               // registered CC list
+
+/* -----------------------------------------------------------------------------
+ * Declaring Cost Centres & Cost Centre Stacks.
+ * -------------------------------------------------------------------------- */
+
+# define CC_DECLARE(cc_ident,name,mod,loc,caf,is_local)  \
+     is_local CostCentre cc_ident[1]                     \
+       = {{ .ccID       = 0,                             \
+            .label      = name,                          \
+            .module     = mod,                           \
+            .srcloc     = loc,                           \
+            .time_ticks = 0,                             \
+            .mem_alloc  = 0,                             \
+            .link       = 0,                             \
+            .is_caf     = caf                            \
+         }};
+
+# define CCS_DECLARE(ccs_ident,cc_ident,is_local)        \
+     is_local CostCentreStack ccs_ident[1]               \
+       = {{ .ccsID               = 0,                    \
+            .cc                  = cc_ident,             \
+            .prevStack           = NULL,                 \
+            .indexTable          = NULL,                 \
+            .root                = NULL,                 \
+            .depth               = 0,                    \
+            .selected            = 0,                    \
+            .scc_count           = 0,                    \
+            .time_ticks          = 0,                    \
+            .mem_alloc           = 0,                    \
+            .inherited_ticks     = 0,                    \
+            .inherited_alloc     = 0                     \
+       }};
+
+/* -----------------------------------------------------------------------------
+ * Time / Allocation Macros
+ * ---------------------------------------------------------------------------*/
+
+/* eliminate profiling overhead from allocation costs */
+#define CCS_ALLOC(ccs, size) (ccs)->mem_alloc += ((size)-sizeofW(StgProfHeader))
+#define ENTER_CCS_THUNK(cap,p) cap->r.rCCCS = p->header.prof.ccs
+
+#else /* !PROFILING */
+
+#define CCS_ALLOC(ccs, amount) doNothing()
+#define ENTER_CCS_THUNK(cap,p) doNothing()
+
+#endif /* PROFILING */
diff --git a/rts/include/rts/prof/Heap.h b/rts/include/rts/prof/Heap.h
new file mode 100644
index 0000000000..90700c809b
--- /dev/null
+++ b/rts/include/rts/prof/Heap.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow, 2009
+ *
+ * Heap Census Profiling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * Fine-grained control over heap census profiling which can be called from
+ * Haskell to restrict the profile to portion(s) of the execution.
+ * See the module GHC.Profiling.
+ * ---------------------------------------------------------------------------*/
+
+void requestHeapCensus ( void );
+void startHeapProfTimer ( void );
+void stopHeapProfTimer ( void );
diff --git a/rts/include/rts/prof/LDV.h b/rts/include/rts/prof/LDV.h
new file mode 100644
index 0000000000..73f7786537
--- /dev/null
+++ b/rts/include/rts/prof/LDV.h
@@ -0,0 +1,44 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow, 2009
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(PROFILING)
+
+/* retrieves the LDV word from closure c */
+#define LDVW(c)                 (((StgClosure *)(c))->header.prof.hp.ldvw)
+
+/*
+ * Stores the creation time for closure c.
+ * This macro is called at the very moment of closure creation.
+ *
+ * NOTE: this initializes LDVW(c) to zero, which ensures that there
+ * is no conflict between retainer profiling and LDV profiling,
+ * because retainer profiling also expects LDVW(c) to be initialised
+ * to zero.
+ */
+
+#if defined(CMINUSMINUS)
+
+#else
+
+#define LDV_RECORD_CREATE(c)   \
+  LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE
+
+#endif
+
+#else  /* !PROFILING */
+
+#define LDV_RECORD_CREATE(c)   /* nothing */
+
+#endif /* PROFILING */
diff --git a/rts/include/rts/storage/Block.h b/rts/include/rts/storage/Block.h
new file mode 100644
index 0000000000..730947e375
--- /dev/null
+++ b/rts/include/rts/storage/Block.h
@@ -0,0 +1,368 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Block structure for the storage manager
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "ghcconfig.h"
+
+/* The actual block and megablock-size constants are defined in
+ * rts/include/Constants.h, all constants here are derived from these.
+ */
+
+/* Block related constants (BLOCK_SHIFT is defined in Constants.h) */
+
+#if SIZEOF_LONG == SIZEOF_VOID_P
+#define UNIT 1UL
+#elif SIZEOF_LONG_LONG == SIZEOF_VOID_P
+#define UNIT 1ULL
+#else
+#error "Size of pointer is suspicious."
+#endif
+
+#if defined(CMINUSMINUS)
+#define BLOCK_SIZE   (1<<BLOCK_SHIFT)
+#else
+#define BLOCK_SIZE   (UNIT<<BLOCK_SHIFT)
+// Note [integer overflow]
+#endif
+
+#define BLOCK_SIZE_W (BLOCK_SIZE/sizeof(W_))
+#define BLOCK_MASK   (BLOCK_SIZE-1)
+
+#define BLOCK_ROUND_UP(p)   (((W_)(p)+BLOCK_SIZE-1) & ~BLOCK_MASK)
+#define BLOCK_ROUND_DOWN(p) ((void *) ((W_)(p) & ~BLOCK_MASK))
+
+/* Megablock related constants (MBLOCK_SHIFT is defined in Constants.h) */
+
+#if defined(CMINUSMINUS)
+#define MBLOCK_SIZE    (1<<MBLOCK_SHIFT)
+#else
+#define MBLOCK_SIZE    (UNIT<<MBLOCK_SHIFT)
+// Note [integer overflow]
+#endif
+
+#define MBLOCK_SIZE_W  (MBLOCK_SIZE/sizeof(W_))
+#define MBLOCK_MASK    (MBLOCK_SIZE-1)
+
+#define MBLOCK_ROUND_UP(p)   ((void *)(((W_)(p)+MBLOCK_SIZE-1) & ~MBLOCK_MASK))
+#define MBLOCK_ROUND_DOWN(p) ((void *)((W_)(p) & ~MBLOCK_MASK ))
+
+/* The largest size an object can be before we give it a block of its
+ * own and treat it as an immovable object during GC, expressed as a
+ * fraction of BLOCK_SIZE.
+ */
+#define LARGE_OBJECT_THRESHOLD ((uint32_t)(BLOCK_SIZE * 8 / 10))
+
+/*
+ * Note [integer overflow]
+ *
+ * The UL suffix in BLOCK_SIZE and MBLOCK_SIZE promotes the expression
+ * to an unsigned long, which means that expressions involving these
+ * will be promoted to unsigned long, which makes integer overflow
+ * less likely.  Historically, integer overflow in expressions like
+ *    (n * BLOCK_SIZE)
+ * where n is int or unsigned int, have caused obscure segfaults in
+ * programs that use large amounts of memory (e.g. #7762, #5086).
+ */
+
+/* -----------------------------------------------------------------------------
+ * Block descriptor.  This structure *must* be the right length, so we
+ * can do pointer arithmetic on pointers to it.
+ */
+
+/* The block descriptor is 64 bytes on a 64-bit machine, and 32-bytes
+ * on a 32-bit machine.
+ */
+
+// Note: fields marked with [READ ONLY] must not be modified by the
+// client of the block allocator API.  All other fields can be
+// freely modified.
+
+#if !defined(CMINUSMINUS)
+
+
+struct NonmovingSegmentInfo {
+  StgWord8 log_block_size;
+  StgWord16 next_free_snap;
+};
+
+typedef struct bdescr_ {
+
+    StgPtr start;              // [READ ONLY] start addr of memory
+
+
+    union {
+        StgPtr free;               // First free byte of memory.
+                                   // allocGroup() sets this to the value of start.
+                                   // NB. during use this value should lie
+                                   // between start and start + blocks *
+                                   // BLOCK_SIZE.  Values outside this
+                                   // range are reserved for use by the
+                                   // block allocator.  In particular, the
+                                   // value (StgPtr)(-1) is used to
+                                   // indicate that a block is unallocated.
+                                   //
+                                   // Unused by the non-moving allocator.
+        struct NonmovingSegmentInfo nonmoving_segment;
+    };
+
+    struct bdescr_ *link;      // used for chaining blocks together
+
+    union {
+        struct bdescr_ *back;  // used (occasionally) for doubly-linked lists
+        StgWord *bitmap;       // bitmap for marking GC
+        StgPtr  scan;          // scan pointer for copying GC
+    } u;
+
+    struct generation_ *gen;   // generation
+
+    StgWord16 gen_no;          // gen->no, cached
+    StgWord16 dest_no;         // number of destination generation
+    StgWord16 node;            // which memory node does this block live on?
+
+    StgWord16 flags;           // block flags, see below
+
+    StgWord32 blocks;          // [READ ONLY] no. of blocks in a group
+                               // (if group head, 0 otherwise)
+
+#if SIZEOF_VOID_P == 8
+    StgWord32 _padding[3];
+#else
+    StgWord32 _padding[0];
+#endif
+} bdescr;
+#endif
+
+#if SIZEOF_VOID_P == 8
+#define BDESCR_SIZE  0x40
+#define BDESCR_MASK  0x3f
+#define BDESCR_SHIFT 6
+#else
+#define BDESCR_SIZE  0x20
+#define BDESCR_MASK  0x1f
+#define BDESCR_SHIFT 5
+#endif
+
+/* Block contains objects evacuated during this GC */
+#define BF_EVACUATED 1
+/* Block is a large object */
+#define BF_LARGE     2
+/* Block is pinned */
+#define BF_PINNED    4
+/* Block is to be marked, not copied. Also used for marked large objects in
+ * non-moving heap. */
+#define BF_MARKED    8
+/* Block is executable */
+#define BF_EXEC      32
+/* Block contains only a small amount of live data */
+#define BF_FRAGMENTED 64
+/* we know about this block (for finding leaks) */
+#define BF_KNOWN     128
+/* Block was swept in the last generation */
+#define BF_SWEPT     256
+/* Block is part of a Compact */
+#define BF_COMPACT   512
+/* A non-moving allocator segment (see NonMoving.c) */
+#define BF_NONMOVING 1024
+/* A large object which has been moved to off of oldest_gen->large_objects and
+ * onto nonmoving_large_objects. The mark phase ignores objects which aren't
+ * so-flagged */
+#define BF_NONMOVING_SWEEPING 2048
+/* Maximum flag value (do not define anything higher than this!) */
+#define BF_FLAG_MAX  (1 << 15)
+
+/* Finding the block descriptor for a given block -------------------------- */
+
+#if defined(CMINUSMINUS)
+
+#define Bdescr(p) \
+    ((((p) &  MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT)) \
+     | ((p) & ~MBLOCK_MASK))
+
+#else
+
+EXTERN_INLINE bdescr *Bdescr(StgPtr p);
+EXTERN_INLINE bdescr *Bdescr(StgPtr p)
+{
+  return (bdescr *)
+    ((((W_)p &  MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT))
+     | ((W_)p & ~MBLOCK_MASK)
+     );
+}
+
+#endif
+
+/* Useful Macros ------------------------------------------------------------ */
+
+/* Offset of first real data block in a megablock */
+
+#define FIRST_BLOCK_OFF \
+   ((W_)BLOCK_ROUND_UP(BDESCR_SIZE * (MBLOCK_SIZE / BLOCK_SIZE)))
+
+/* First data block in a given megablock */
+
+#define FIRST_BLOCK(m) ((void *)(FIRST_BLOCK_OFF + (W_)(m)))
+
+/* Last data block in a given megablock */
+
+#define LAST_BLOCK(m)  ((void *)(MBLOCK_SIZE-BLOCK_SIZE + (W_)(m)))
+
+/* First real block descriptor in a megablock */
+
+#define FIRST_BDESCR(m) \
+   ((bdescr *)((FIRST_BLOCK_OFF>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m)))
+
+/* Last real block descriptor in a megablock */
+
+#define LAST_BDESCR(m) \
+  ((bdescr *)(((MBLOCK_SIZE-BLOCK_SIZE)>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m)))
+
+/* Number of usable blocks in a megablock */
+
+#if !defined(CMINUSMINUS) // already defined in DerivedConstants.h
+#define BLOCKS_PER_MBLOCK ((MBLOCK_SIZE - FIRST_BLOCK_OFF) / BLOCK_SIZE)
+#endif
+
+/* How many blocks in this megablock group */
+
+#define MBLOCK_GROUP_BLOCKS(n) \
+   (BLOCKS_PER_MBLOCK + (n-1) * (MBLOCK_SIZE / BLOCK_SIZE))
+
+/* Compute the required size of a megablock group */
+
+#define BLOCKS_TO_MBLOCKS(n) \
+   (1 + (W_)MBLOCK_ROUND_UP((n-BLOCKS_PER_MBLOCK) * BLOCK_SIZE) / MBLOCK_SIZE)
+
+
+#if !defined(CMINUSMINUS)
+/* to the end... */
+
+/* Double-linked block lists: --------------------------------------------- */
+
+INLINE_HEADER void
+dbl_link_onto(bdescr *bd, bdescr **list)
+{
+  bd->link = *list;
+  bd->u.back = NULL;
+  if (*list) {
+    (*list)->u.back = bd; /* double-link the list */
+  }
+  *list = bd;
+}
+
+INLINE_HEADER void
+dbl_link_remove(bdescr *bd, bdescr **list)
+{
+    if (bd->u.back) {
+        bd->u.back->link = bd->link;
+    } else {
+        *list = bd->link;
+    }
+    if (bd->link) {
+        bd->link->u.back = bd->u.back;
+    }
+}
+
+INLINE_HEADER void
+dbl_link_insert_after(bdescr *bd, bdescr *after)
+{
+    bd->link = after->link;
+    bd->u.back = after;
+    if (after->link) {
+        after->link->u.back = bd;
+    }
+    after->link = bd;
+}
+
+INLINE_HEADER void
+dbl_link_replace(bdescr *new_, bdescr *old, bdescr **list)
+{
+    new_->link = old->link;
+    new_->u.back = old->u.back;
+    if (old->link) {
+        old->link->u.back = new_;
+    }
+    if (old->u.back) {
+        old->u.back->link = new_;
+    } else {
+        *list = new_;
+    }
+}
+
+/* Initialisation ---------------------------------------------------------- */
+
+extern void initBlockAllocator(void);
+
+/* Allocation -------------------------------------------------------------- */
+
+bdescr *allocGroup(W_ n);
+
+EXTERN_INLINE bdescr* allocBlock(void);
+EXTERN_INLINE bdescr* allocBlock(void)
+{
+    return allocGroup(1);
+}
+
+bdescr *allocGroupOnNode(uint32_t node, W_ n);
+
+// Allocate n blocks, aligned at n-block boundary. The returned bdescr will
+// have this invariant
+//
+//     bdescr->start % BLOCK_SIZE*n == 0
+//
+bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
+
+EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
+EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
+{
+    return allocGroupOnNode(node,1);
+}
+
+// versions that take the storage manager lock for you:
+bdescr *allocGroup_lock(W_ n);
+bdescr *allocBlock_lock(void);
+
+bdescr *allocGroupOnNode_lock(uint32_t node, W_ n);
+bdescr *allocBlockOnNode_lock(uint32_t node);
+
+/* De-Allocation ----------------------------------------------------------- */
+
+void freeGroup(bdescr *p);
+void freeChain(bdescr *p);
+
+// versions that take the storage manager lock for you:
+void freeGroup_lock(bdescr *p);
+void freeChain_lock(bdescr *p);
+
+bdescr * splitBlockGroup (bdescr *bd, uint32_t blocks);
+
+/* Round a value to megablocks --------------------------------------------- */
+
+// We want to allocate an object around a given size, round it up or
+// down to the nearest size that will fit in an mblock group.
+INLINE_HEADER StgWord
+round_to_mblocks(StgWord words)
+{
+    if (words > BLOCKS_PER_MBLOCK * BLOCK_SIZE_W) {
+        // first, ignore the gap at the beginning of the first mblock by
+        // adding it to the total words.  Then we can pretend we're
+        // dealing in a uniform unit of megablocks.
+        words += FIRST_BLOCK_OFF/sizeof(W_);
+
+        if ((words % MBLOCK_SIZE_W) < (MBLOCK_SIZE_W / 2)) {
+            words = (words / MBLOCK_SIZE_W) * MBLOCK_SIZE_W;
+        } else {
+            words = ((words / MBLOCK_SIZE_W) + 1) * MBLOCK_SIZE_W;
+        }
+
+        words -= FIRST_BLOCK_OFF/sizeof(W_);
+    }
+    return words;
+}
+
+#endif /* !CMINUSMINUS */
diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h
new file mode 100644
index 0000000000..b841ef8be0
--- /dev/null
+++ b/rts/include/rts/storage/ClosureMacros.h
@@ -0,0 +1,645 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2012
+ *
+ * Macros for building and manipulating closures
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   Info tables are slammed up against the entry code, and the label
+   for the info table is at the *end* of the table itself.  This
+   inline function adjusts an info pointer to point to the beginning
+   of the table, so we can use standard C structure indexing on it.
+
+   Note: this works for SRT info tables as long as you don't want to
+   access the SRT, since they are laid out the same with the SRT
+   pointer as the first word in the table.
+
+   NOTES ABOUT MANGLED C VS. MINI-INTERPRETER:
+
+   A couple of definitions:
+
+       "info pointer"    The first word of the closure.  Might point
+                         to either the end or the beginning of the
+                         info table, depending on whether we're using
+                         the mini interpreter or not.  GET_INFO(c)
+                         retrieves the info pointer of a closure.
+
+       "info table"      The info table structure associated with a
+                         closure.  This is always a pointer to the
+                         beginning of the structure, so we can
+                         use standard C structure indexing to pull out
+                         the fields.  get_itbl(c) returns a pointer to
+                         the info table for closure c.
+
+   An address of the form xxxx_info points to the end of the info
+   table or the beginning of the info table depending on whether we're
+   mangling or not respectively.  So,
+
+         c->header.info = xxx_info
+
+   makes absolute sense, whether mangling or not.
+
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER void SET_INFO(StgClosure *c, const StgInfoTable *info) {
+    RELAXED_STORE(&c->header.info, info);
+}
+INLINE_HEADER void SET_INFO_RELEASE(StgClosure *c, const StgInfoTable *info) {
+    RELEASE_STORE(&c->header.info, info);
+}
+INLINE_HEADER const StgInfoTable *GET_INFO(StgClosure *c) {
+    return RELAXED_LOAD(&c->header.info);
+}
+
+#if defined(TABLES_NEXT_TO_CODE)
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info - 1;}
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info - 1;}
+INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info - 1;}
+INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info - 1;}
+INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info - 1;}
+INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)(i + 1) - 1;}
+#else
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info;}
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info;}
+INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info;}
+INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info;}
+INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info;}
+INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)i;}
+INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)i;}
+INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)i;}
+INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)i;}
+#endif
+
+EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c);
+EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c)
+{
+    return INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c);
+EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c)
+{
+    return RET_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgFunInfoTable *get_fun_itbl(const StgClosure *c)
+{
+    return FUN_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgThunkInfoTable *get_thunk_itbl(const StgClosure *c)
+{
+    return THUNK_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
+{
+    return CON_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
+{
+    return get_itbl(con)->srt;
+}
+
+/* -----------------------------------------------------------------------------
+   Macros for building closures
+   -------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+/*
+  The following macro works for both retainer profiling and LDV profiling. For
+ retainer profiling, 'era' remains 0, so by setting the 'ldvw' field we also set
+ 'rs' to zero.
+
+ Note that we don't have to bother handling the 'flip' bit properly[1] since the
+ retainer profiling code will just set 'rs' to NULL upon visiting a closure with
+ an invalid 'flip' bit anyways.
+
+ See Note [Profiling heap traversal visited bit] for details.
+
+ [1]: Technically we should set 'rs' to `NULL | flip`.
+ */
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_,   \
+        LDV_RECORD_CREATE((c)))
+#else
+#define SET_PROF_HDR(c,ccs)
+#endif
+
+#define SET_HDR(c,_info,ccs)                            \
+   {                                                    \
+        SET_PROF_HDR((StgClosure *)(c),ccs);            \
+        RELAXED_STORE(&(c)->header.info, _info);        \
+   }
+
+#define SET_HDR_RELEASE(c,_info,ccs)                    \
+   {                                                    \
+        SET_PROF_HDR((StgClosure *)(c),ccs);            \
+        RELEASE_STORE(&(c)->header.info, _info);        \
+   }
+
+#define SET_ARR_HDR(c,info,costCentreStack,n_bytes)     \
+   (c)->bytes = n_bytes;                                \
+   SET_HDR(c,info,costCentreStack);
+
+// Use when changing a closure from one kind to another
+#define OVERWRITE_INFO(c, new_info)                             \
+    OVERWRITING_CLOSURE((StgClosure *)(c));                     \
+    SET_INFO((StgClosure *)(c), (new_info));                    \
+    LDV_RECORD_CREATE(c);
+
+/* -----------------------------------------------------------------------------
+   How to get hold of the static link field for a static closure.
+   -------------------------------------------------------------------------- */
+
+/* These are hard-coded. */
+#define THUNK_STATIC_LINK(p) (&(p)->payload[1])
+#define IND_STATIC_LINK(p)   (&(p)->payload[1])
+
+INLINE_HEADER StgClosure **
+STATIC_LINK(const StgInfoTable *info, StgClosure *p)
+{
+    switch (info->type) {
+    case THUNK_STATIC:
+        return THUNK_STATIC_LINK(p);
+    case IND_STATIC:
+        return IND_STATIC_LINK(p);
+    default:
+        return &p->payload[info->layout.payload.ptrs +
+                           info->layout.payload.nptrs];
+    }
+}
+
+/* -----------------------------------------------------------------------------
+   INTLIKE and CHARLIKE closures.
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER P_ CHARLIKE_CLOSURE(int n) {
+    return (P_)&stg_CHARLIKE_closure[(n)-MIN_CHARLIKE];
+}
+INLINE_HEADER P_ INTLIKE_CLOSURE(int n) {
+    return (P_)&stg_INTLIKE_closure[(n)-MIN_INTLIKE];
+}
+
+/* ----------------------------------------------------------------------------
+   Macros for untagging and retagging closure pointers
+   For more information look at the comments in Cmm.h
+   ------------------------------------------------------------------------- */
+
+static inline StgWord
+GET_CLOSURE_TAG(const StgClosure * p)
+{
+    return (StgWord)p & TAG_MASK;
+}
+
+static inline StgClosure *
+UNTAG_CLOSURE(StgClosure * p)
+{
+    return (StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline const StgClosure *
+UNTAG_CONST_CLOSURE(const StgClosure * p)
+{
+    return (const StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline StgClosure *
+TAG_CLOSURE(StgWord tag,StgClosure * p)
+{
+    return (StgClosure*)((StgWord)p | tag);
+}
+
+/* -----------------------------------------------------------------------------
+   Forwarding pointers
+   -------------------------------------------------------------------------- */
+
+#define IS_FORWARDING_PTR(p) ((((StgWord)p) & 1) != 0)
+#define MK_FORWARDING_PTR(p) (((StgWord)p) | 1)
+#define UN_FORWARDING_PTR(p) (((StgWord)p) - 1)
+
+/* -----------------------------------------------------------------------------
+   DEBUGGING predicates for pointers
+
+   LOOKS_LIKE_INFO_PTR(p)    returns False if p is definitely not an info ptr
+   LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
+
+   These macros are complete but not sound.  That is, they might
+   return false positives.  Do not rely on them to distinguish info
+   pointers from closure pointers, for example.
+
+   We don't use address-space predicates these days, for portability
+   reasons, and the fact that code/data can be scattered about the
+   address space in a dynamically-linked environment.  Our best option
+   is to look at the alleged info table and see whether it seems to
+   make sense...
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
+{
+    StgInfoTable *info = INFO_PTR_TO_STRUCT((StgInfoTable *)p);
+    return info->type != INVALID_OBJECT && info->type < N_CLOSURE_TYPES;
+}
+
+INLINE_HEADER bool LOOKS_LIKE_INFO_PTR (StgWord p)
+{
+    return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
+}
+
+INLINE_HEADER bool LOOKS_LIKE_CLOSURE_PTR (const void *p)
+{
+    const StgInfoTable *info = RELAXED_LOAD(&UNTAG_CONST_CLOSURE((const StgClosure *) (p))->header.info);
+    return LOOKS_LIKE_INFO_PTR((StgWord) info);
+}
+
+/* -----------------------------------------------------------------------------
+   Macros for calculating the size of a closure
+   -------------------------------------------------------------------------- */
+
+EXTERN_INLINE StgOffset PAP_sizeW   ( uint32_t n_args );
+EXTERN_INLINE StgOffset PAP_sizeW   ( uint32_t n_args )
+{ return sizeofW(StgPAP) + n_args; }
+
+EXTERN_INLINE StgOffset AP_sizeW   ( uint32_t n_args );
+EXTERN_INLINE StgOffset AP_sizeW   ( uint32_t n_args )
+{ return sizeofW(StgAP) + n_args; }
+
+EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size );
+EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size )
+{ return sizeofW(StgAP_STACK) + size; }
+
+EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np );
+EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np )
+{ return sizeofW(StgHeader) + p + np; }
+
+EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void );
+EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void )
+{ return sizeofW(StgSelector); }
+
+EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void );
+EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void )
+{ return sizeofW(StgInd); } // a BLACKHOLE is a kind of indirection
+
+/* --------------------------------------------------------------------------
+   Sizes of closures
+   ------------------------------------------------------------------------*/
+
+EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl );
+EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgClosure)
+       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs
+       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl );
+EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgThunk)
+       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs
+       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x );
+EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x )
+{ return AP_STACK_sizeW(x->size); }
+
+EXTERN_INLINE StgOffset ap_sizeW( StgAP* x );
+EXTERN_INLINE StgOffset ap_sizeW( StgAP* x )
+{ return AP_sizeW(x->n_args); }
+
+EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x );
+EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x )
+{ return PAP_sizeW(x->n_args); }
+
+EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x);
+EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x)
+{ return ROUNDUP_BYTES_TO_WDS(x->bytes); }
+
+EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x );
+EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x )
+{ return sizeofW(StgArrBytes) + arr_words_words(x); }
+
+EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x );
+EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x )
+{ return sizeofW(StgMutArrPtrs) + x->size; }
+
+EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x );
+EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x )
+{ return sizeofW(StgSmallMutArrPtrs) + x->ptrs; }
+
+EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack );
+EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack )
+{ return sizeofW(StgStack) + stack->stack_size; }
+
+EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco );
+EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco )
+{ return bco->size; }
+
+EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str );
+EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str )
+{ return str->totalW; }
+
+/*
+ * TODO: Consider to switch return type from 'uint32_t' to 'StgWord' #8742
+ *
+ * (Also for 'closure_sizeW' below)
+ */
+EXTERN_INLINE uint32_t
+closure_sizeW_ (const StgClosure *p, const StgInfoTable *info);
+EXTERN_INLINE uint32_t
+closure_sizeW_ (const StgClosure *p, const StgInfoTable *info)
+{
+    switch (info->type) {
+    case THUNK_0_1:
+    case THUNK_1_0:
+        return sizeofW(StgThunk) + 1;
+    case FUN_0_1:
+    case CONSTR_0_1:
+    case FUN_1_0:
+    case CONSTR_1_0:
+        return sizeofW(StgHeader) + 1;
+    case THUNK_0_2:
+    case THUNK_1_1:
+    case THUNK_2_0:
+        return sizeofW(StgThunk) + 2;
+    case FUN_0_2:
+    case CONSTR_0_2:
+    case FUN_1_1:
+    case CONSTR_1_1:
+    case FUN_2_0:
+    case CONSTR_2_0:
+        return sizeofW(StgHeader) + 2;
+    case THUNK:
+        return thunk_sizeW_fromITBL(info);
+    case THUNK_SELECTOR:
+        return THUNK_SELECTOR_sizeW();
+    case AP_STACK:
+        return ap_stack_sizeW((StgAP_STACK *)p);
+    case AP:
+        return ap_sizeW((StgAP *)p);
+    case PAP:
+        return pap_sizeW((StgPAP *)p);
+    case IND:
+        return sizeofW(StgInd);
+    case ARR_WORDS:
+        return arr_words_sizeW((StgArrBytes *)p);
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+    case MUT_ARR_PTRS_FROZEN_CLEAN:
+    case MUT_ARR_PTRS_FROZEN_DIRTY:
+        return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+    case SMALL_MUT_ARR_PTRS_CLEAN:
+    case SMALL_MUT_ARR_PTRS_DIRTY:
+    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+        return small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p);
+    case TSO:
+        return sizeofW(StgTSO);
+    case STACK:
+        return stack_sizeW((StgStack*)p);
+    case BCO:
+        return bco_sizeW((StgBCO *)p);
+    case TREC_CHUNK:
+        return sizeofW(StgTRecChunk);
+    default:
+        return sizeW_fromITBL(info);
+    }
+}
+
+// The definitive way to find the size, in words, of a heap-allocated closure
+EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p);
+EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p)
+{
+    return closure_sizeW_(p, get_itbl(p));
+}
+
+/* -----------------------------------------------------------------------------
+   Sizes of stack frames
+   -------------------------------------------------------------------------- */
+
+EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame );
+EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame )
+{
+    const StgRetInfoTable *info;
+
+    info = get_ret_itbl(frame);
+    switch (info->i.type) {
+
+    case RET_FUN:
+        return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size;
+
+    case RET_BIG:
+        return 1 + GET_LARGE_BITMAP(&info->i)->size;
+
+    case RET_BCO:
+        return 2 + BCO_BITMAP_SIZE((StgBCO *)((P_)frame)[1]);
+
+    default:
+        return 1 + BITMAP_SIZE(info->i.layout.bitmap);
+    }
+}
+
+/* -----------------------------------------------------------------------------
+   StgMutArrPtrs macros
+
+   An StgMutArrPtrs has a card table to indicate which elements are
+   dirty for the generational GC.  The card table is an array of
+   bytes, where each byte covers (1 << MUT_ARR_PTRS_CARD_BITS)
+   elements.  The card table is directly after the array data itself.
+   -------------------------------------------------------------------------- */
+
+// The number of card bytes needed
+INLINE_HEADER W_ mutArrPtrsCards (W_ elems)
+{
+    return (W_)((elems + (1 << MUT_ARR_PTRS_CARD_BITS) - 1)
+                           >> MUT_ARR_PTRS_CARD_BITS);
+}
+
+// The number of words in the card table
+INLINE_HEADER W_ mutArrPtrsCardTableSize (W_ elems)
+{
+    return ROUNDUP_BYTES_TO_WDS(mutArrPtrsCards(elems));
+}
+
+// The address of the card for a particular card number
+INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, W_ n)
+{
+    return ((StgWord8 *)&(a->payload[a->ptrs]) + n);
+}
+
+/* -----------------------------------------------------------------------------
+   Replacing a closure with a different one.  We must call
+   OVERWRITING_CLOSURE(p) on the old closure that is about to be
+   overwritten.
+
+   Note [zeroing slop when overwriting closures]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+   When we overwrite a closure in the heap with a smaller one, in some scenarios
+   we need to write zero words into "slop"; the memory that is left
+   unoccupied. See Note [slop on the heap]
+
+   Zeroing slop is required for:
+
+    - full-heap sanity checks (DEBUG, and +RTS -DS),
+
+    - LDV profiling (PROFILING, and +RTS -hb) and
+
+   However we can get into trouble if we're zeroing slop for ordinarily
+   immutable closures when using multiple threads, since there is nothing
+   preventing another thread from still being in the process of reading the
+   memory we're about to zero.
+
+   Thus, with the THREADED RTS and +RTS -N2 or greater we must not zero
+   immutable closure's slop.
+
+   Hence, an immutable closure's slop is zeroed when either:
+
+    - PROFILING && era > 0 (LDV is on) or
+    - !THREADED && DEBUG
+
+   Additionally:
+
+    - LDV profiling and +RTS -N2 are incompatible,
+
+    - full-heap sanity checks are disabled for the THREADED RTS, at least when
+      they don't run right after GC when there is no slop.
+      See Note [heap sanity checking with SMP].
+
+   -------------------------------------------------------------------------- */
+
+#if defined(PROFILING) || defined(DEBUG)
+#define OVERWRITING_CLOSURE(c) \
+    overwritingClosure(c)
+#define OVERWRITING_CLOSURE_SIZE(c, size) \
+    overwritingClosureSize(c, size)
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) \
+    overwritingMutableClosureOfs(c, off)
+#else
+#define OVERWRITING_CLOSURE(c) \
+    do { (void) sizeof(c); } while(0)
+#define OVERWRITING_CLOSURE_SIZE(c, size) \
+    do { (void) sizeof(c); (void) sizeof(size); } while(0)
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) \
+    do { (void) sizeof(c); (void) sizeof(off); } while(0)
+#endif
+
+#if defined(PROFILING)
+void LDV_recordDead (const StgClosure *c, uint32_t size);
+RTS_PRIVATE bool isInherentlyUsed ( StgHalfWord closure_type );
+#endif
+
+EXTERN_INLINE void
+zeroSlop (
+    StgClosure *p,
+    uint32_t offset, /*< offset to start zeroing at, in words */
+    uint32_t size,   /*< total closure size, in words */
+    bool known_mutable /*< is this a closure who's slop we can always zero? */
+    );
+
+EXTERN_INLINE void
+zeroSlop (StgClosure *p, uint32_t offset, uint32_t size, bool known_mutable)
+{
+    // see Note [zeroing slop when overwriting closures], also #8402
+
+    const bool want_to_zero_immutable_slop = false
+        // Sanity checking (-DS) is enabled
+        || RTS_DEREF(RtsFlags).DebugFlags.sanity
+#if defined(PROFILING)
+        // LDV profiler is enabled
+        || era > 0
+#endif
+        ;
+
+    const bool can_zero_immutable_slop =
+        // Only if we're running single threaded.
+        RTS_DEREF(RtsFlags).ParFlags.nCapabilities <= 1;
+
+    const bool zero_slop_immutable =
+        want_to_zero_immutable_slop && can_zero_immutable_slop;
+
+    const bool zero_slop_mutable =
+#if defined(PROFILING)
+        // Always zero mutable closure slop when profiling. We do this to cover
+        // the case of shrinking mutable arrays in pinned blocks for the heap
+        // profiler, see Note [skipping slop in the heap profiler]
+        //
+        // TODO: We could make this check more specific and only zero if the
+        // object is in a BF_PINNED bdescr here. Update Note [slop on the heap]
+        // and [zeroing slop when overwriting closures] if you change this.
+        true
+#else
+        zero_slop_immutable
+#endif
+        ;
+
+    const bool zero_slop =
+        // If we're not sure this is a mutable closure treat it like an
+        // immutable one.
+        known_mutable ? zero_slop_mutable : zero_slop_immutable;
+
+    if(!zero_slop)
+        return;
+
+    for (uint32_t i = offset; i < size; i++) {
+        ((StgWord *)p)[i] = 0;
+    }
+}
+
+EXTERN_INLINE void overwritingClosure (StgClosure *p);
+EXTERN_INLINE void overwritingClosure (StgClosure *p)
+{
+    W_ size = closure_sizeW(p);
+#if defined(PROFILING)
+    if(era > 0 && !isInherentlyUsed(get_itbl(p)->type))
+        LDV_recordDead(p, size);
+#endif
+    zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false);
+}
+
+// Version of 'overwritingClosure' which overwrites only a suffix of a
+// closure.  The offset is expressed in words relative to 'p' and shall
+// be less than or equal to closure_sizeW(p), and usually at least as
+// large as the respective thunk header.
+EXTERN_INLINE void
+overwritingMutableClosureOfs (StgClosure *p, uint32_t offset);
+
+EXTERN_INLINE void
+overwritingMutableClosureOfs (StgClosure *p, uint32_t offset)
+{
+    // Since overwritingClosureOfs is only ever called by:
+    //
+    //   - shrinkMutableByteArray# (ARR_WORDS) and
+    //
+    //   - shrinkSmallMutableArray# (SMALL_MUT_ARR_PTRS)
+    //
+    // we can safely omit the Ldv_recordDead call. Since these closures are
+    // considered inherenlty used we don't need to track their destruction.
+#if defined(PROFILING)
+    ASSERT(isInherentlyUsed(get_itbl(p)->type) == true);
+#endif
+    zeroSlop(p, offset, closure_sizeW(p), /*known_mutable=*/true);
+}
+
+// Version of 'overwritingClosure' which takes closure size as argument.
+EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size /* in words */);
+EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size)
+{
+    // This function is only called from stg_AP_STACK so we can assume it's not
+    // inherently used.
+#if defined(PROFILING)
+    ASSERT(isInherentlyUsed(get_itbl(p)->type) == false);
+    if(era > 0)
+        LDV_recordDead(p, size);
+#endif
+    zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false);
+}
diff --git a/rts/include/rts/storage/ClosureTypes.h b/rts/include/rts/storage/ClosureTypes.h
new file mode 100644
index 0000000000..85dc1a0ce4
--- /dev/null
+++ b/rts/include/rts/storage/ClosureTypes.h
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Closure Type Constants: out here because the native code generator
+ * needs to get at them.
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * WARNING WARNING WARNING
+ *
+ * If you add or delete any closure types, don't forget to update the following,
+ *   - the closure flags table in rts/ClosureFlags.c
+ *   - isRetainer in rts/RetainerProfile.c
+ *   - the closure_type_names list in rts/Printer.c
+ */
+
+/* Object tag 0 raises an internal error */
+#define INVALID_OBJECT                0
+#define CONSTR                        1
+#define CONSTR_1_0                    2
+#define CONSTR_0_1                    3
+#define CONSTR_2_0                    4
+#define CONSTR_1_1                    5
+#define CONSTR_0_2                    6
+#define CONSTR_NOCAF                  7
+#define FUN                           8
+#define FUN_1_0                       9
+#define FUN_0_1                       10
+#define FUN_2_0                       11
+#define FUN_1_1                       12
+#define FUN_0_2                       13
+#define FUN_STATIC                    14
+#define THUNK                         15
+#define THUNK_1_0                     16
+#define THUNK_0_1                     17
+#define THUNK_2_0                     18
+#define THUNK_1_1                     19
+#define THUNK_0_2                     20
+#define THUNK_STATIC                  21
+#define THUNK_SELECTOR                22
+#define BCO                           23
+#define AP                            24
+#define PAP                           25
+#define AP_STACK                      26
+#define IND                           27
+#define IND_STATIC                    28
+#define RET_BCO                       29
+#define RET_SMALL                     30
+#define RET_BIG                       31
+#define RET_FUN                       32
+#define UPDATE_FRAME                  33
+#define CATCH_FRAME                   34
+#define UNDERFLOW_FRAME               35
+#define STOP_FRAME                    36
+#define BLOCKING_QUEUE                37
+#define BLACKHOLE                     38
+#define MVAR_CLEAN                    39
+#define MVAR_DIRTY                    40
+#define TVAR                          41
+#define ARR_WORDS                     42
+#define MUT_ARR_PTRS_CLEAN            43
+#define MUT_ARR_PTRS_DIRTY            44
+#define MUT_ARR_PTRS_FROZEN_DIRTY     45
+#define MUT_ARR_PTRS_FROZEN_CLEAN     46
+#define MUT_VAR_CLEAN                 47
+#define MUT_VAR_DIRTY                 48
+#define WEAK                          49
+#define PRIM                          50
+#define MUT_PRIM                      51
+#define TSO                           52
+#define STACK                         53
+#define TREC_CHUNK                    54
+#define ATOMICALLY_FRAME              55
+#define CATCH_RETRY_FRAME             56
+#define CATCH_STM_FRAME               57
+#define WHITEHOLE                     58
+#define SMALL_MUT_ARR_PTRS_CLEAN      59
+#define SMALL_MUT_ARR_PTRS_DIRTY      60
+#define SMALL_MUT_ARR_PTRS_FROZEN_DIRTY 61
+#define SMALL_MUT_ARR_PTRS_FROZEN_CLEAN 62
+#define COMPACT_NFDATA                63
+#define N_CLOSURE_TYPES               64
diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h
new file mode 100644
index 0000000000..ebb836bca2
--- /dev/null
+++ b/rts/include/rts/storage/Closures.h
@@ -0,0 +1,488 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Closures
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * The Layout of a closure header depends on which kind of system we're
+ * compiling for: profiling, parallel, ticky, etc.
+ */
+
+/* -----------------------------------------------------------------------------
+   The profiling header
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+  CostCentreStack *ccs;
+  union {
+    StgWord trav;             /* Heap traversal */
+    StgWord ldvw;             /* Lag/Drag/Void Word */
+  } hp;
+    // Heap profiling header. This field is shared among the various heap
+    // profiling modes. Currently it is used by ProfHeap.c for Lag/Drag/Void
+    // profiling and by the heap traversal modes using TraverseHeap.c such as
+    // the retainer profiler.
+} StgProfHeader;
+
+/* -----------------------------------------------------------------------------
+   The SMP header
+
+   A thunk has a padding word to take the updated value.  This is so
+   that the update doesn't overwrite the payload, so we can avoid
+   needing to lock the thunk during entry and update.
+
+   Note: this doesn't apply to THUNK_STATICs, which have no payload.
+
+   Note: we leave this padding word in all ways, rather than just SMP,
+   so that we don't have to recompile all our libraries for SMP.
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+    StgWord pad;
+} StgSMPThunkHeader;
+
+/* -----------------------------------------------------------------------------
+   The full fixed-size closure header
+
+   The size of the fixed header is the sum of the optional parts plus a single
+   word for the entry code pointer.
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+    // If TABLES_NEXT_TO_CODE is defined, then `info` is offset by
+    // `sizeof(StgInfoTable)` and so points to the `code` field of the
+    // StgInfoTable! You may want to use `get_itbl` to get the pointer to the
+    // start of the info table. See
+    // https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#tables_next_to_code.
+    const StgInfoTable* info;
+#if defined(PROFILING)
+    StgProfHeader         prof;
+#endif
+} StgHeader;
+
+typedef struct {
+    const StgInfoTable* info;
+#if defined(PROFILING)
+    StgProfHeader         prof;
+#endif
+    StgSMPThunkHeader     smp;
+} StgThunkHeader;
+
+#define THUNK_EXTRA_HEADER_W (sizeofW(StgThunkHeader)-sizeofW(StgHeader))
+
+/* -----------------------------------------------------------------------------
+   Closure Types
+
+   For any given closure type (defined in InfoTables.h), there is a
+   corresponding structure defined below.  The name of the structure
+   is obtained by concatenating the closure type with '_closure'
+   -------------------------------------------------------------------------- */
+
+/* All closures follow the generic format */
+
+typedef struct StgClosure_ {
+    StgHeader   header;
+    struct StgClosure_ *payload[];
+} *StgClosurePtr; // StgClosure defined in rts/Types.h
+
+typedef struct StgThunk_ {
+    StgThunkHeader  header;
+    struct StgClosure_ *payload[];
+} StgThunk;
+
+typedef struct {
+    StgThunkHeader   header;
+    StgClosure *selectee;
+} StgSelector;
+
+/*
+   PAP payload contains pointers and non-pointers interleaved and we only have
+   one info table for PAPs (stg_PAP_info). To visit pointers in a PAP payload we
+   use the `fun`s bitmap. For a PAP with n_args arguments the first n_args bits
+   in the fun's bitmap tell us which payload locations contain pointers.
+*/
+typedef struct {
+    StgHeader   header;
+    StgHalfWord arity;          /* zero if it is an AP */
+    StgHalfWord n_args;
+    StgClosure *fun;            /* really points to a fun */
+    StgClosure *payload[];
+} StgPAP;
+
+typedef struct {
+    StgThunkHeader   header;
+    StgHalfWord arity;          /* zero if it is an AP */
+    StgHalfWord n_args;
+    StgClosure *fun;            /* really points to a fun */
+    StgClosure *payload[];
+} StgAP;
+
+typedef struct {
+    StgThunkHeader   header;
+    StgWord     size;                    /* number of words in payload */
+    StgClosure *fun;
+    StgClosure *payload[]; /* contains a chunk of *stack* */
+} StgAP_STACK;
+
+typedef struct {
+    StgHeader   header;
+    StgClosure *indirectee;
+} StgInd;
+
+typedef struct {
+    StgHeader     header;
+    StgClosure   *indirectee;
+    StgClosure   *static_link; // See Note [CAF lists]
+    const StgInfoTable *saved_info;
+        // `saved_info` also used for the link field for `debug_caf_list`,
+        // see `newCAF` and Note [CAF lists] in rts/sm/Storage.h.
+} StgIndStatic;
+
+typedef struct StgBlockingQueue_ {
+    StgHeader   header;
+    struct StgBlockingQueue_ *link;
+        // here so it looks like an IND, to be able to skip the queue without
+        // deleting it (done in wakeBlockingQueue())
+    StgClosure *bh;  // the BLACKHOLE
+    StgTSO     *owner;
+    struct MessageBlackHole_ *queue;
+        // holds TSOs blocked on `bh`
+} StgBlockingQueue;
+
+typedef struct {
+    StgHeader  header;
+    StgWord    bytes;
+    StgWord    payload[];
+} StgArrBytes;
+
+typedef struct {
+    StgHeader   header;
+    StgWord     ptrs;
+    StgWord     size; // ptrs plus card table
+    StgClosure *payload[];
+    // see also: StgMutArrPtrs macros in ClosureMacros.h
+} StgMutArrPtrs;
+
+typedef struct {
+    StgHeader   header;
+    StgWord     ptrs;
+    StgClosure *payload[];
+} StgSmallMutArrPtrs;
+
+typedef struct {
+    StgHeader   header;
+    StgClosure *var;
+} StgMutVar;
+
+typedef struct _StgUpdateFrame {
+    StgHeader  header;
+    StgClosure *updatee;
+} StgUpdateFrame;
+
+typedef struct {
+    StgHeader  header;
+    StgWord    exceptions_blocked;
+    StgClosure *handler;
+} StgCatchFrame;
+
+typedef struct {
+    const StgInfoTable* info;
+    struct StgStack_ *next_chunk;
+} StgUnderflowFrame;
+
+typedef struct {
+    StgHeader  header;
+} StgStopFrame;
+
+typedef struct {
+  StgHeader header;
+  StgWord data;
+} StgIntCharlikeClosure;
+
+/* statically allocated */
+typedef struct {
+  StgHeader  header;
+} StgRetry;
+
+typedef struct _StgStableName {
+  StgHeader      header;
+  StgWord        sn;
+} StgStableName;
+
+typedef struct _StgWeak {       /* Weak v */
+  StgHeader header;
+  StgClosure *cfinalizers;
+  StgClosure *key;
+  StgClosure *value;            /* v */
+  StgClosure *finalizer;
+  struct _StgWeak *link;
+} StgWeak;
+
+typedef struct _StgCFinalizerList {
+  StgHeader header;
+  StgClosure *link;
+  void (*fptr)(void);
+  void *ptr;
+  void *eptr;
+  StgWord flag; /* has environment (0 or 1) */
+} StgCFinalizerList;
+
+/* Byte code objects.  These are fixed size objects with pointers to
+ * four arrays, designed so that a BCO can be easily "re-linked" to
+ * other BCOs, to facilitate GHC's intelligent recompilation.  The
+ * array of instructions is static and not re-generated when the BCO
+ * is re-linked, but the other 3 arrays will be regenerated.
+ *
+ * A BCO represents either a function or a stack frame.  In each case,
+ * it needs a bitmap to describe to the garbage collector the
+ * pointerhood of its arguments/free variables respectively, and in
+ * the case of a function it also needs an arity.  These are stored
+ * directly in the BCO, rather than in the instrs array, for two
+ * reasons:
+ * (a) speed: we need to get at the bitmap info quickly when
+ *     the GC is examining APs and PAPs that point to this BCO
+ * (b) a subtle interaction with the compacting GC.  In compacting
+ *     GC, the info that describes the size/layout of a closure
+ *     cannot be in an object more than one level of indirection
+ *     away from the current object, because of the order in
+ *     which pointers are updated to point to their new locations.
+ */
+
+typedef struct {
+    StgHeader      header;
+    StgArrBytes   *instrs;      /* a pointer to an ArrWords */
+    StgArrBytes   *literals;    /* a pointer to an ArrWords */
+    StgMutArrPtrs *ptrs;        /* a pointer to a  MutArrPtrs */
+    StgHalfWord   arity;        /* arity of this BCO */
+    StgHalfWord   size;         /* size of this BCO (in words) */
+    StgWord       bitmap[];  /* an StgLargeBitmap */
+} StgBCO;
+
+#define BCO_BITMAP(bco)      ((StgLargeBitmap *)((StgBCO *)(bco))->bitmap)
+#define BCO_BITMAP_SIZE(bco) (BCO_BITMAP(bco)->size)
+#define BCO_BITMAP_BITS(bco) (BCO_BITMAP(bco)->bitmap)
+#define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \
+                                / BITS_IN(StgWord))
+
+/* A function return stack frame: used when saving the state for a
+ * garbage collection at a function entry point.  The function
+ * arguments are on the stack, and we also save the function (its
+ * info table describes the pointerhood of the arguments).
+ *
+ * The stack frame size is also cached in the frame for convenience.
+ *
+ * The only RET_FUN is stg_gc_fun, which is created by __stg_gc_fun,
+ * both in HeapStackCheck.cmm.
+ */
+typedef struct {
+    const StgInfoTable* info;
+    StgWord        size;
+    StgClosure *   fun;
+    StgClosure *   payload[];
+} StgRetFun;
+
+/* Concurrent communication objects */
+
+typedef struct StgMVarTSOQueue_ {
+    StgHeader                header;
+    struct StgMVarTSOQueue_ *link;
+    struct StgTSO_          *tso;
+} StgMVarTSOQueue;
+
+typedef struct {
+    StgHeader                header;
+    struct StgMVarTSOQueue_ *head;
+    struct StgMVarTSOQueue_ *tail;
+    StgClosure*              value;
+} StgMVar;
+
+
+/* STM data structures
+ *
+ *  StgTVar defines the only type that can be updated through the STM
+ *  interface.
+ *
+ *  Note that various optimisations may be possible in order to use less
+ *  space for these data structures at the cost of more complexity in the
+ *  implementation:
+ *
+ *   - In StgTVar, current_value and first_watch_queue_entry could be held in
+ *     the same field: if any thread is waiting then its expected_value for
+ *     the tvar is the current value.
+ *
+ *   - In StgTRecHeader, it might be worthwhile having separate chunks
+ *     of read-only and read-write locations.  This would save a
+ *     new_value field in the read-only locations.
+ *
+ *   - In StgAtomicallyFrame, we could combine the waiting bit into
+ *     the header (maybe a different info tbl for a waiting transaction).
+ *     This means we can specialise the code for the atomically frame
+ *     (it immediately switches on frame->waiting anyway).
+ */
+
+typedef struct StgTRecHeader_ StgTRecHeader;
+
+typedef struct StgTVarWatchQueue_ {
+  StgHeader                  header;
+  StgClosure                *closure; // StgTSO
+  struct StgTVarWatchQueue_ *next_queue_entry;
+  struct StgTVarWatchQueue_ *prev_queue_entry;
+} StgTVarWatchQueue;
+
+typedef struct {
+  StgHeader                  header;
+  StgClosure                *current_value; /* accessed via atomics */
+  StgTVarWatchQueue         *first_watch_queue_entry; /* accessed via atomics */
+  StgInt                     num_updates; /* accessed via atomics */
+} StgTVar;
+
+/* new_value == expected_value for read-only accesses */
+/* new_value is a StgTVarWatchQueue entry when trec in state TREC_WAITING */
+typedef struct {
+  StgTVar                   *tvar;
+  StgClosure                *expected_value;
+  StgClosure                *new_value;
+#if defined(THREADED_RTS)
+  StgInt                     num_updates;
+#endif
+} TRecEntry;
+
+#define TREC_CHUNK_NUM_ENTRIES 16
+
+typedef struct StgTRecChunk_ {
+  StgHeader                  header;
+  struct StgTRecChunk_      *prev_chunk;
+  StgWord                    next_entry_idx;
+  TRecEntry                  entries[TREC_CHUNK_NUM_ENTRIES];
+} StgTRecChunk;
+
+typedef enum {
+  TREC_ACTIVE,        /* Transaction in progress, outcome undecided */
+  TREC_CONDEMNED,     /* Transaction in progress, inconsistent / out of date reads */
+  TREC_COMMITTED,     /* Transaction has committed, now updating tvars */
+  TREC_ABORTED,       /* Transaction has aborted, now reverting tvars */
+  TREC_WAITING,       /* Transaction currently waiting */
+} TRecState;
+
+struct StgTRecHeader_ {
+  StgHeader                  header;
+  struct StgTRecHeader_     *enclosing_trec;
+  StgTRecChunk              *current_chunk;
+  TRecState                  state;
+};
+
+typedef struct {
+  StgHeader   header;
+  StgClosure *code;
+  StgClosure *result;
+} StgAtomicallyFrame;
+
+typedef struct {
+  StgHeader   header;
+  StgClosure *code;
+  StgClosure *handler;
+} StgCatchSTMFrame;
+
+typedef struct {
+  StgHeader      header;
+  StgWord        running_alt_code;
+  StgClosure    *first_code;
+  StgClosure    *alt_code;
+} StgCatchRetryFrame;
+
+/* ----------------------------------------------------------------------------
+   Messages
+   ------------------------------------------------------------------------- */
+
+typedef struct Message_ {
+    StgHeader        header;
+    struct Message_ *link;
+} Message;
+
+typedef struct MessageWakeup_ {
+    StgHeader header;
+    Message  *link;
+    StgTSO   *tso;
+} MessageWakeup;
+
+typedef struct MessageThrowTo_ {
+    StgHeader   header;
+    struct MessageThrowTo_ *link;
+    StgTSO     *source;
+    StgTSO     *target;
+    StgClosure *exception;
+} MessageThrowTo;
+
+typedef struct MessageBlackHole_ {
+    StgHeader   header;
+    struct MessageBlackHole_ *link;
+        // here so it looks like an IND, to be able to skip the message without
+        // deleting it (done in throwToMsg())
+    StgTSO     *tso;
+    StgClosure *bh;
+} MessageBlackHole;
+
+/* ----------------------------------------------------------------------------
+   Compact Regions
+   ------------------------------------------------------------------------- */
+
+//
+// A compact region is a list of blocks.  Each block starts with an
+// StgCompactNFDataBlock structure, and the list is chained through the next
+// field of these structs.  (the link field of the bdescr is used to chain
+// together multiple compact region on the compact_objects field of a
+// generation).
+//
+// See Note [Compact Normal Forms] for details
+//
+typedef struct StgCompactNFDataBlock_ {
+    struct StgCompactNFDataBlock_ *self;
+       // the address of this block this is copied over to the
+       // receiving end when serializing a compact, so the receiving
+       // end can allocate the block at best as it can, and then
+       // verify if pointer adjustment is needed or not by comparing
+       // self with the actual address; the same data is sent over as
+       // SerializedCompact metadata, but having it here simplifies
+       // the fixup implementation.
+    struct StgCompactNFData_ *owner;
+       // the closure who owns this block (used in objectGetCompact)
+    struct StgCompactNFDataBlock_ *next;
+       // chain of blocks used for serialization and freeing
+} StgCompactNFDataBlock;
+
+//
+// This is the Compact# primitive object.
+//
+typedef struct StgCompactNFData_ {
+    StgHeader header;
+      // for sanity and other checks in practice, nothing should ever
+      // need the compact info pointer (we don't even need fwding
+      // pointers because it's a large object)
+    StgWord totalW;
+      // Total number of words in all blocks in the compact
+    StgWord autoBlockW;
+      // size of automatically appended blocks
+    StgPtr hp, hpLim;
+      // the beginning and end of the free area in the nursery block.  This is
+      // just a convenience so that we can avoid multiple indirections through
+      // the nursery pointer below during compaction.
+    StgCompactNFDataBlock *nursery;
+      // where to (try to) allocate from when appending
+    StgCompactNFDataBlock *last;
+      // the last block of the chain (to know where to append new
+      // blocks for resize)
+    struct hashtable *hash;
+      // the hash table for the current compaction, or NULL if
+      // there's no (sharing-preserved) compaction in progress.
+    StgClosure *result;
+      // Used temporarily to store the result of compaction.  Doesn't need to be
+      // a GC root.
+    struct StgCompactNFData_ *link;
+      // Used by compacting GC for linking CNFs with threaded hash tables. See
+      // Note [CNFs in compacting GC] in Compact.c for details.
+} StgCompactNFData;
diff --git a/rts/include/rts/storage/FunTypes.h b/rts/include/rts/storage/FunTypes.h
new file mode 100644
index 0000000000..3b066ef7dc
--- /dev/null
+++ b/rts/include/rts/storage/FunTypes.h
@@ -0,0 +1,54 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2002
+ *
+ * Things for functions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* generic - function comes with a small bitmap */
+#define ARG_GEN      0
+
+/* generic - function comes with a large bitmap */
+#define ARG_GEN_BIG  1
+
+/* BCO - function is really a BCO */
+#define ARG_BCO      2
+
+/*
+ * Specialised function types: bitmaps and calling sequences
+ * for these functions are pre-generated: see ghc/utils/genapply and
+ * generated code in ghc/rts/AutoApply.cmm.
+ *
+ *  NOTE: other places to change if you change this table:
+ *       - utils/genapply/Main.hs: stackApplyTypes
+ *       - GHC.StgToCmm.Layout: stdPattern
+ */
+#define ARG_NONE     3
+#define ARG_N        4
+#define ARG_P        5
+#define ARG_F        6
+#define ARG_D        7
+#define ARG_L        8
+#define ARG_V16      9
+#define ARG_V32      10
+#define ARG_V64      11
+#define ARG_NN       12
+#define ARG_NP       13
+#define ARG_PN       14
+#define ARG_PP       15
+#define ARG_NNN      16
+#define ARG_NNP      17
+#define ARG_NPN      18
+#define ARG_NPP      19
+#define ARG_PNN      20
+#define ARG_PNP      21
+#define ARG_PPN      22
+#define ARG_PPP      23
+#define ARG_PPPP     24
+#define ARG_PPPPP    25
+#define ARG_PPPPPP   26
+#define ARG_PPPPPPP  27
+#define ARG_PPPPPPPP 28
diff --git a/rts/include/rts/storage/GC.h b/rts/include/rts/storage/GC.h
new file mode 100644
index 0000000000..478503aaee
--- /dev/null
+++ b/rts/include/rts/storage/GC.h
@@ -0,0 +1,261 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * External Storage Manger Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include "rts/OSThreads.h"
+
+/* -----------------------------------------------------------------------------
+ * Generational GC
+ *
+ * We support an arbitrary number of generations.  Notes (in no particular
+ * order):
+ *
+ *       - Objects "age" in the nursery for one GC cycle before being promoted
+ *         to the next generation.  There is no aging in other generations.
+ *
+ *       - generation 0 is the allocation area.  It is given
+ *         a fixed set of blocks during initialisation, and these blocks
+ *         normally stay in G0S0.  In parallel execution, each
+ *         Capability has its own nursery.
+ *
+ *       - during garbage collection, each generation which is an
+ *         evacuation destination (i.e. all generations except G0) is
+ *         allocated a to-space.  evacuated objects are allocated into
+ *         the generation's to-space until GC is finished, when the
+ *         original generations's contents may be freed and replaced
+ *         by the to-space.
+ *
+ *       - the mutable-list is per-generation.  G0 doesn't have one
+ *         (since every garbage collection collects at least G0).
+ *
+ *       - block descriptors contain a pointer to the generation that
+ *         the block belongs to, for convenience.
+ *
+ *       - static objects are stored in per-generation lists.  See GC.c for
+ *         details of how we collect CAFs in the generational scheme.
+ *
+ *       - large objects are per-generation, and are promoted in the
+ *         same way as small objects.
+ *
+ * ------------------------------------------------------------------------- */
+
+// A count of blocks needs to store anything up to the size of memory
+// divided by the block size.  The safest thing is therefore to use a
+// type that can store the full range of memory addresses,
+// ie. StgWord.  Note that we have had some tricky int overflows in a
+// couple of cases caused by using ints rather than longs (e.g. #5086)
+
+typedef StgWord memcount;
+
+typedef struct nursery_ {
+    bdescr *       blocks;
+    memcount       n_blocks;
+} nursery;
+
+// Nursery invariants:
+//
+//  - cap->r.rNursery points to the nursery for this capability
+//
+//  - cap->r.rCurrentNursery points to the block in the nursery that we are
+//    currently allocating into.  While in Haskell the current heap pointer is
+//    in Hp, outside Haskell it is stored in cap->r.rCurrentNursery->free.
+//
+//  - the blocks *after* cap->rCurrentNursery in the chain are empty
+//    (although their bd->free pointers have not been updated to
+//    reflect that)
+//
+//  - the blocks *before* cap->rCurrentNursery have been used.  Except
+//    for rCurrentAlloc.
+//
+//  - cap->r.rCurrentAlloc is either NULL, or it points to a block in
+//    the nursery *before* cap->r.rCurrentNursery.
+//
+// See also Note [allocation accounting] to understand how total
+// memory allocation is tracked.
+
+typedef struct generation_ {
+    uint32_t       no;                  // generation number
+
+    bdescr *       blocks;              // blocks in this gen
+    memcount       n_blocks;            // number of blocks
+    memcount       n_words;             // number of used words
+
+    bdescr *       large_objects;       // large objects (doubly linked)
+    memcount       n_large_blocks;      // no. of blocks used by large objs
+    memcount       n_large_words;       // no. of words used by large objs
+    memcount       n_new_large_words;   // words of new large objects
+                                        // (for doYouWantToGC())
+
+    bdescr *       compact_objects;     // compact objects chain
+                                        // the second block in each compact is
+                                        // linked from the closure object, while
+                                        // the second compact object in the
+                                        // chain is linked from bd->link (like
+                                        // large objects)
+    memcount       n_compact_blocks;    // no. of blocks used by all compacts
+    bdescr *       compact_blocks_in_import; // compact objects being imported
+                                             // (not known to the GC because
+                                             // potentially invalid, but we
+                                             // need to keep track of them
+                                             // to avoid assertions in Sanity)
+                                             // this is a list shaped like compact_objects
+    memcount       n_compact_blocks_in_import; // no. of blocks used by compacts
+                                               // being imported
+
+    // Max blocks to allocate in this generation before collecting it. Collect
+    // this generation when
+    //
+    //     n_blocks + n_large_blocks + n_compact_blocks > max_blocks
+    //
+    memcount       max_blocks;
+
+    StgTSO *       threads;             // threads in this gen
+                                        // linked via global_link
+    StgWeak *      weak_ptr_list;       // weak pointers in this gen
+
+    struct generation_ *to;             // destination gen for live objects
+
+    // stats information
+    uint32_t collections;
+    uint32_t par_collections;
+    uint32_t failed_promotions;         // Currently unused
+
+    // ------------------------------------
+    // Fields below are used during GC only
+
+#if defined(THREADED_RTS)
+    char pad[128];                      // make sure the following is
+                                        // on a separate cache line.
+    SpinLock     sync;                  // lock for large_objects
+                                        //    and scavenged_large_objects
+#endif
+
+    int          mark;                  // mark (not copy)? (old gen only)
+    int          compact;               // compact (not sweep)? (old gen only)
+
+    // During GC, if we are collecting this gen, blocks and n_blocks
+    // are copied into the following two fields.  After GC, these blocks
+    // are freed.
+    bdescr *     old_blocks;            // bdescr of first from-space block
+    memcount     n_old_blocks;         // number of blocks in from-space
+    memcount     live_estimate;         // for sweeping: estimate of live data
+
+    bdescr *     scavenged_large_objects;  // live large objs after GC (d-link)
+    memcount     n_scavenged_large_blocks; // size (not count) of above
+
+    bdescr *     live_compact_objects;  // live compact objs after GC (d-link)
+    memcount     n_live_compact_blocks; // size (not count) of above
+
+    bdescr *     bitmap;                // bitmap for compacting collection
+
+    StgTSO *     old_threads;
+    StgWeak *    old_weak_ptr_list;
+} generation;
+
+extern generation * generations;
+extern generation * g0;
+extern generation * oldest_gen;
+
+typedef void(*ListBlocksCb)(void *user, bdescr *);
+void listAllBlocks(ListBlocksCb cb, void *user);
+
+/* -----------------------------------------------------------------------------
+   Generic allocation
+
+   StgPtr allocate(Capability *cap, W_ n)
+                                Allocates memory from the nursery in
+                                the current Capability.
+
+   StgPtr allocatePinned(Capability *cap, W_ n, W_ alignment, W_ align_off)
+                                Allocates a chunk of contiguous store
+                                n words long, which is at a fixed
+                                address (won't be moved by GC). The
+                                word at the byte offset 'align_off'
+                                will be aligned to 'alignment', which
+                                must be a power of two.
+                                Returns a pointer to the first word.
+                                Always succeeds.
+
+                                NOTE: the GC can't in general handle
+                                pinned objects, so allocatePinned()
+                                can only be used for ByteArrays at the
+                                moment.
+
+                                Don't forget to TICK_ALLOC_XXX(...)
+                                after calling allocate or
+                                allocatePinned, for the
+                                benefit of the ticky-ticky profiler.
+
+   -------------------------------------------------------------------------- */
+
+StgPtr  allocate          ( Capability *cap, W_ n );
+StgPtr  allocateMightFail ( Capability *cap, W_ n );
+StgPtr  allocatePinned    ( Capability *cap, W_ n, W_ alignment, W_ align_off);
+
+/* memory allocator for executable memory */
+typedef void* AdjustorWritable;
+typedef void* AdjustorExecutable;
+
+void flushExec(W_ len, AdjustorExecutable exec_addr);
+
+// Used by GC checks in external .cmm code:
+extern W_ large_alloc_lim;
+
+/* -----------------------------------------------------------------------------
+   Performing Garbage Collection
+   -------------------------------------------------------------------------- */
+
+void performGC(void);
+void performMajorGC(void);
+
+/* -----------------------------------------------------------------------------
+   The CAF table - used to let us revert CAFs in GHCi
+   -------------------------------------------------------------------------- */
+
+StgInd *newCAF         (StgRegTable *reg, StgIndStatic *caf);
+StgInd *newRetainedCAF (StgRegTable *reg, StgIndStatic *caf);
+StgInd *newGCdCAF      (StgRegTable *reg, StgIndStatic *caf);
+void revertCAFs (void);
+
+// Request that all CAFs are retained indefinitely.
+// (preferably use RtsConfig.keep_cafs instead)
+void setKeepCAFs (void);
+
+// Let the runtime know that all the CAFs in high mem are not
+// to be retained. Useful in conjunction with loadNativeObj
+void setHighMemDynamic (void);
+
+/* -----------------------------------------------------------------------------
+   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
+   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+   and is put on the mutable list.
+   -------------------------------------------------------------------------- */
+
+void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
+
+/* set to disable CAF garbage collection in GHCi. */
+/* (needed when dynamic libraries are used). */
+extern bool keepCAFs;
+
+#include "rts/Flags.h"
+
+INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
+{
+    RELAXED_STORE(&bd->gen, gen);
+    RELAXED_STORE(&bd->gen_no, gen->no);
+    RELAXED_STORE(&bd->dest_no, dest->no);
+
+#if !IN_STG_CODE
+    /* See Note [RtsFlags is a pointer in STG code] */
+    ASSERT(gen->no < RtsFlags.GcFlags.generations);
+    ASSERT(dest->no < RtsFlags.GcFlags.generations);
+#endif
+}
diff --git a/rts/include/rts/storage/Heap.h b/rts/include/rts/storage/Heap.h
new file mode 100644
index 0000000000..b950df76e5
--- /dev/null
+++ b/rts/include/rts/storage/Heap.h
@@ -0,0 +1,27 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2006-2017
+ *
+ * Introspection into GHC's heap representation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "rts/storage/Closures.h"
+
+StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure);
+
+void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs
+                        , StgClosure *fun, StgClosure **payload, StgWord size);
+
+StgWord heap_view_closureSize(StgClosure *closure);
+
+/*
+ * Collect the pointers of a closure into the given array. `size` should be
+ * large enough to hold all collected pointers e.g.
+ * `heap_view_closureSize(closure)`. Returns the number of pointers collected.
+ * The caller must ensure that `closure` is not modified (or moved by the GC)
+ * for the duration of the call to `collect_pointers`.
+ */
+StgWord collect_pointers(StgClosure *closure, StgWord size, StgClosure *ptrs[size]);
diff --git a/rts/include/rts/storage/InfoTables.h b/rts/include/rts/storage/InfoTables.h
new file mode 100644
index 0000000000..b97e12982b
--- /dev/null
+++ b/rts/include/rts/storage/InfoTables.h
@@ -0,0 +1,405 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002
+ *
+ * Info Tables
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* ----------------------------------------------------------------------------
+   Relative pointers
+
+   Several pointer fields in info tables are expressed as offsets
+   relative to the info pointer, so that we can generate
+   position-independent code.
+
+   Note [x86-64-relative]
+   There is a complication on the x86_64 platform, where pointers are
+   64 bits, but the tools don't support 64-bit relative relocations.
+   However, the default memory model (small) ensures that all symbols
+   have values in the lower 2Gb of the address space, so offsets all
+   fit in 32 bits.  Hence we can use 32-bit offset fields.
+
+   Somewhere between binutils-2.16.1 and binutils-2.16.91.0.6,
+   support for 64-bit PC-relative relocations was added, so maybe this
+   hackery can go away sometime.
+   ------------------------------------------------------------------------- */
+
+#if defined(x86_64_HOST_ARCH)
+#define OFFSET_FIELD(n) StgHalfInt n; StgHalfWord __pad_##n
+#else
+#define OFFSET_FIELD(n) StgInt n
+#endif
+
+/* -----------------------------------------------------------------------------
+   Profiling info
+   -------------------------------------------------------------------------- */
+
+typedef struct {
+#if !defined(TABLES_NEXT_TO_CODE)
+    char *closure_type;
+    char *closure_desc;
+#else
+    OFFSET_FIELD(closure_type_off);
+    OFFSET_FIELD(closure_desc_off);
+#endif
+} StgProfInfo;
+
+/* -----------------------------------------------------------------------------
+   Closure flags
+   -------------------------------------------------------------------------- */
+
+/* The type flags provide quick access to certain properties of a closure. */
+
+#define _HNF (1<<0)  /* head normal form?    */
+#define _BTM (1<<1)  /* uses info->layout.bitmap */
+#define _NS  (1<<2)  /* non-sparkable        */
+#define _THU (1<<3)  /* thunk?               */
+#define _MUT (1<<4)  /* mutable?             */
+#define _UPT (1<<5)  /* unpointed?           */
+#define _SRT (1<<6)  /* has an SRT?          */
+#define _IND (1<<7)  /* is an indirection?   */
+
+#define isMUTABLE(flags)   ((flags) &_MUT)
+#define isBITMAP(flags)    ((flags) &_BTM)
+#define isTHUNK(flags)     ((flags) &_THU)
+#define isUNPOINTED(flags) ((flags) &_UPT)
+#define hasSRT(flags)      ((flags) &_SRT)
+
+extern StgWord16 closure_flags[];
+
+#define closureFlags(c)         (closure_flags[get_itbl \
+                                    (UNTAG_CONST_CLOSURE(c))->type])
+
+#define closure_HNF(c)          (  closureFlags(c) & _HNF)
+#define closure_BITMAP(c)       (  closureFlags(c) & _BTM)
+#define closure_NON_SPARK(c)    ( (closureFlags(c) & _NS))
+#define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS))
+#define closure_THUNK(c)        (  closureFlags(c) & _THU)
+#define closure_MUTABLE(c)      (  closureFlags(c) & _MUT)
+#define closure_UNPOINTED(c)    (  closureFlags(c) & _UPT)
+#define closure_SRT(c)          (  closureFlags(c) & _SRT)
+#define closure_IND(c)          (  closureFlags(c) & _IND)
+
+/* same as above but for info-ptr rather than closure */
+#define ipFlags(ip)             (closure_flags[ip->type])
+
+#define ip_HNF(ip)               (  ipFlags(ip) & _HNF)
+#define ip_BITMAP(ip)            (  ipFlags(ip) & _BTM)
+#define ip_SHOULD_SPARK(ip)      (!(ipFlags(ip) & _NS))
+#define ip_THUNK(ip)             (  ipFlags(ip) & _THU)
+#define ip_MUTABLE(ip)           (  ipFlags(ip) & _MUT)
+#define ip_UNPOINTED(ip)         (  ipFlags(ip) & _UPT)
+#define ip_SRT(ip)               (  ipFlags(ip) & _SRT)
+#define ip_IND(ip)               (  ipFlags(ip) & _IND)
+
+/* -----------------------------------------------------------------------------
+   Bitmaps
+
+   These are used to describe the pointerhood of a sequence of words
+   (usually on the stack) to the garbage collector.  The two primary
+   uses are for stack frames, and functions (where we need to describe
+   the layout of a PAP to the GC).
+
+   In these bitmaps: 0 == ptr, 1 == non-ptr.
+   -------------------------------------------------------------------------- */
+
+/*
+ * Small bitmaps:  for a small bitmap, we store the size and bitmap in
+ * the same word, using the following macros.  If the bitmap doesn't
+ * fit in a single word, we use a pointer to an StgLargeBitmap below.
+ */
+#define MK_SMALL_BITMAP(size,bits) (((bits)<<BITMAP_BITS_SHIFT) | (size))
+
+#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
+#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
+
+/*
+ * A large bitmap.
+ */
+typedef struct {
+  StgWord size;
+  StgWord bitmap[];
+} StgLargeBitmap;
+
+/* ----------------------------------------------------------------------------
+   Info Tables
+   ------------------------------------------------------------------------- */
+
+/*
+ * Stuff describing the closure layout.  Well, actually, it might
+ * contain the selector index for a THUNK_SELECTOR.  This union is one
+ * word long.
+ */
+typedef union {
+    struct {                    /* Heap closure payload layout: */
+        StgHalfWord ptrs;       /* number of pointers */
+        StgHalfWord nptrs;      /* number of non-pointers */
+    } payload;
+
+    StgWord bitmap;               /* word-sized bit pattern describing */
+                                  /*  a stack frame: see below */
+
+#if !defined(TABLES_NEXT_TO_CODE)
+    StgLargeBitmap* large_bitmap; /* pointer to large bitmap structure */
+#else
+    OFFSET_FIELD(large_bitmap_offset);  /* offset from info table to large bitmap structure */
+#endif
+
+    StgWord selector_offset;      /* used in THUNK_SELECTORs */
+
+} StgClosureInfo;
+
+
+#if defined(x86_64_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE)
+// On x86_64 we can fit a pointer offset in half a word, so put the SRT offset
+// in the info->srt field directly.
+//
+// See the section "Referring to an SRT from the info table" in
+// Note [SRTs] in CmmBuildInfoTables.hs
+#define USE_INLINE_SRT_FIELD
+#endif
+
+#if defined(USE_INLINE_SRT_FIELD)
+// offset to the SRT / closure, or zero if there's no SRT
+typedef StgHalfInt StgSRTField;
+#else
+// non-zero if there is an SRT, the offset is in the optional srt field.
+typedef StgHalfWord StgSRTField;
+#endif
+
+
+/*
+ * The "standard" part of an info table.  Every info table has this bit.
+ */
+typedef struct StgInfoTable_ {
+
+#if !defined(TABLES_NEXT_TO_CODE)
+    StgFunPtr       entry;      /* pointer to the entry code */
+#endif
+
+#if defined(PROFILING)
+    StgProfInfo     prof;
+#endif
+
+    StgClosureInfo  layout;     /* closure layout info (one word) */
+
+    StgHalfWord     type;       /* closure type */
+    StgSRTField     srt;
+       /* In a CONSTR:
+            - the zero-based constructor tag
+          In a FUN/THUNK
+            - if USE_INLINE_SRT_FIELD
+              - offset to the SRT (or zero if no SRT)
+            - otherwise
+              - non-zero if there is an SRT, offset is in srt_offset
+       */
+
+#if defined(TABLES_NEXT_TO_CODE)
+    StgCode         code[];
+#endif
+} *StgInfoTablePtr; // StgInfoTable defined in rts/Types.h
+
+
+/* -----------------------------------------------------------------------------
+   Function info tables
+
+   This is the general form of function info tables.  The compiler
+   will omit some of the fields in common cases:
+
+   -  If fun_type is not ARG_GEN or ARG_GEN_BIG, then the slow_apply
+      and bitmap fields may be left out (they are at the end, so omitting
+      them doesn't affect the layout).
+
+   -  If has_srt (in the std info table part) is zero, then the srt
+      field needn't be set.  This only applies if the slow_apply and
+      bitmap fields have also been omitted.
+   -------------------------------------------------------------------------- */
+
+/*
+   Note [Encoding static reference tables]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+   As static reference tables appear frequently in code, we use a special
+   compact encoding for the common case of a module defining only a few CAFs: We
+   produce one table containing a list of CAFs in the module and then include a
+   bitmap in each info table describing which entries of this table the closure
+   references.
+ */
+
+typedef struct StgFunInfoExtraRev_ {
+    OFFSET_FIELD(slow_apply_offset); /* apply to args on the stack */
+    union {
+        StgWord bitmap;
+        OFFSET_FIELD(bitmap_offset);    /* arg ptr/nonptr bitmap */
+    } b;
+#if !defined(USE_INLINE_SRT_FIELD)
+    OFFSET_FIELD(srt_offset);   /* pointer to the SRT closure */
+#endif
+    StgHalfWord    fun_type;    /* function type */
+    StgHalfWord    arity;       /* function arity */
+} StgFunInfoExtraRev;
+
+typedef struct StgFunInfoExtraFwd_ {
+    StgHalfWord    fun_type;    /* function type */
+    StgHalfWord    arity;       /* function arity */
+    StgClosure    *srt;         /* pointer to the SRT closure */
+    union { /* union for compat. with TABLES_NEXT_TO_CODE version */
+        StgWord        bitmap;  /* arg ptr/nonptr bitmap */
+    } b;
+    StgFun         *slow_apply; /* apply to args on the stack */
+} StgFunInfoExtraFwd;
+
+typedef struct {
+#if defined(TABLES_NEXT_TO_CODE)
+    StgFunInfoExtraRev f;
+    StgInfoTable i;
+#else
+    StgInfoTable i;
+    StgFunInfoExtraFwd f;
+#endif
+} StgFunInfoTable;
+
+// canned bitmap for each arg type, indexed by constants in FunTypes.h
+extern const StgWord stg_arg_bitmaps[];
+
+/* -----------------------------------------------------------------------------
+   Return info tables
+   -------------------------------------------------------------------------- */
+
+/*
+ * When info tables are laid out backwards, we can omit the SRT
+ * pointer iff has_srt is zero.
+ */
+
+typedef struct {
+#if defined(TABLES_NEXT_TO_CODE)
+#if !defined(USE_INLINE_SRT_FIELD)
+    OFFSET_FIELD(srt_offset);   /* offset to the SRT closure */
+#endif
+    StgInfoTable i;
+#else
+    StgInfoTable i;
+    StgClosure  *srt;           /* pointer to the SRT closure */
+#endif
+} StgRetInfoTable;
+
+/* -----------------------------------------------------------------------------
+   Thunk info tables
+   -------------------------------------------------------------------------- */
+
+/*
+ * When info tables are laid out backwards, we can omit the SRT
+ * pointer iff has_srt is zero.
+ */
+
+typedef struct StgThunkInfoTable_ {
+#if defined(TABLES_NEXT_TO_CODE)
+#if !defined(USE_INLINE_SRT_FIELD)
+    OFFSET_FIELD(srt_offset);   /* offset to the SRT closure */
+#endif
+    StgInfoTable i;
+#else
+    StgInfoTable i;
+    StgClosure  *srt;           /* pointer to the SRT closure */
+#endif
+} StgThunkInfoTable;
+
+/* -----------------------------------------------------------------------------
+   Constructor info tables
+   -------------------------------------------------------------------------- */
+
+typedef struct StgConInfoTable_ {
+#if !defined(TABLES_NEXT_TO_CODE)
+    StgInfoTable i;
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+    OFFSET_FIELD(con_desc); // the name of the data constructor
+                            // as: Package:Module.Name
+#else
+    char *con_desc;
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+    StgInfoTable i;
+#endif
+} StgConInfoTable;
+
+
+/* -----------------------------------------------------------------------------
+   Accessor macros for fields that might be offsets (C version)
+   -------------------------------------------------------------------------- */
+
+/*
+ * GET_SRT(info)
+ * info must be a Stg[Ret|Thunk]InfoTable* (an info table that has a SRT)
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#if defined(x86_64_HOST_ARCH)
+#define GET_SRT(info) \
+  ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
+#else
+#define GET_SRT(info) \
+  ((StgClosure*) (((StgWord) ((info)+1)) + (info)->srt_offset))
+#endif
+#else // !TABLES_NEXT_TO_CODE
+#define GET_SRT(info) ((info)->srt)
+#endif
+
+/*
+ * GET_CON_DESC(info)
+ * info must be a StgConInfoTable*.
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_CON_DESC(info) \
+            ((const char *)((StgWord)((info)+1) + ((info)->con_desc)))
+#else
+#define GET_CON_DESC(info) ((const char *)(info)->con_desc)
+#endif
+
+/*
+ * GET_FUN_SRT(info)
+ * info must be a StgFunInfoTable*
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#if defined(x86_64_HOST_ARCH)
+#define GET_FUN_SRT(info) \
+  ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
+#else
+#define GET_FUN_SRT(info) \
+  ((StgClosure*) (((StgWord) ((info)+1)) + (info)->f.srt_offset))
+#endif
+#else
+#define GET_FUN_SRT(info) ((info)->f.srt)
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \
+                                        + (info)->layout.large_bitmap_offset))
+#else
+#define GET_LARGE_BITMAP(info) ((info)->layout.large_bitmap)
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \
+                                        + (info)->f.b.bitmap_offset))
+#else
+#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) ((info)->f.b.bitmap))
+#endif
+
+/*
+ * GET_PROF_TYPE, GET_PROF_DESC
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_PROF_TYPE(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_type_off)))
+#else
+#define GET_PROF_TYPE(info) ((info)->prof.closure_type)
+#endif
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_PROF_DESC(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_desc_off)))
+#else
+#define GET_PROF_DESC(info) ((info)->prof.closure_desc)
+#endif
diff --git a/rts/include/rts/storage/MBlock.h b/rts/include/rts/storage/MBlock.h
new file mode 100644
index 0000000000..3acefda9a0
--- /dev/null
+++ b/rts/include/rts/storage/MBlock.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2008
+ *
+ * MegaBlock Allocator interface.
+ *
+ * See wiki commentary at
+ *  https://gitlab.haskell.org/ghc/ghc/wikis/commentary/heap-alloced
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+extern W_ peak_mblocks_allocated;
+extern W_ mblocks_allocated;
+
+extern void initMBlocks(void);
+extern void * getMBlock(void);
+extern void * getMBlocks(uint32_t n);
+extern void * getMBlockOnNode(uint32_t node);
+extern void * getMBlocksOnNode(uint32_t node, uint32_t n);
+extern void freeMBlocks(void *addr, uint32_t n);
+extern void releaseFreeMemory(void);
+extern void freeAllMBlocks(void);
+
+extern void *getFirstMBlock(void **state);
+extern void *getNextMBlock(void **state, void *mblock);
+
+#if defined(THREADED_RTS)
+// needed for HEAP_ALLOCED below
+extern SpinLock gc_alloc_block_sync;
+#endif
diff --git a/rts/include/rts/storage/TSO.h b/rts/include/rts/storage/TSO.h
new file mode 100644
index 0000000000..61215d9f38
--- /dev/null
+++ b/rts/include/rts/storage/TSO.h
@@ -0,0 +1,330 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * The definitions for Thread State Objects.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/*
+ * PROFILING info in a TSO
+ */
+typedef struct {
+  CostCentreStack *cccs;       /* thread's current CCS */
+} StgTSOProfInfo;
+
+/*
+ * There is no TICKY info in a TSO at this time.
+ */
+
+/*
+ * Thread IDs are 64 bits.
+ */
+typedef StgWord64 StgThreadID;
+
+#define tsoLocked(tso) ((tso)->flags & TSO_LOCKED)
+
+/*
+ * Type returned after running a thread.  Values of this type
+ * include HeapOverflow, StackOverflow etc.  See Constants.h for the
+ * full list.
+ */
+typedef unsigned int StgThreadReturnCode;
+
+#if defined(mingw32_HOST_OS)
+/* results from an async I/O request + its request ID. */
+typedef struct {
+  unsigned int reqID;
+  int          len;
+  int          errCode;
+} StgAsyncIOResult;
+#endif
+
+/* Reason for thread being blocked. See comment above struct StgTso_. */
+typedef union {
+  StgClosure *closure;
+  StgTSO *prev; // a back-link when the TSO is on the run queue (NotBlocked)
+  struct MessageBlackHole_ *bh;
+  struct MessageThrowTo_ *throwto;
+  struct MessageWakeup_  *wakeup;
+  StgInt fd;    /* StgInt instead of int, so that it's the same size as the ptrs */
+#if defined(mingw32_HOST_OS)
+  StgAsyncIOResult *async_result;
+#endif
+#if !defined(THREADED_RTS)
+  StgWord target;
+    // Only for the non-threaded RTS: the target time for a thread
+    // blocked in threadDelay, in units of 1ms.  This is a
+    // compromise: we don't want to take up much space in the TSO.  If
+    // you want better resolution for threadDelay, use -threaded.
+#endif
+} StgTSOBlockInfo;
+
+
+/*
+ * TSOs live on the heap, and therefore look just like heap objects.
+ * Large TSOs will live in their own "block group" allocated by the
+ * storage manager, and won't be copied during garbage collection.
+ */
+
+/*
+ * Threads may be blocked for several reasons.  A blocked thread will
+ * have the reason in the why_blocked field of the TSO, and some
+ * further info (such as the closure the thread is blocked on, or the
+ * file descriptor if the thread is waiting on I/O) in the block_info
+ * field.
+ */
+
+typedef struct StgTSO_ {
+    StgHeader               header;
+
+    /* The link field, for linking threads together in lists (e.g. the
+       run queue on a Capability.
+    */
+    struct StgTSO_*         _link;
+    /*
+      Currently used for linking TSOs on:
+      * cap->run_queue_{hd,tl}
+      * (non-THREADED_RTS); the blocked_queue
+      * and pointing to the next chunk for a ThreadOldStack
+
+       NOTE!!!  do not modify _link directly, it is subject to
+       a write barrier for generational GC.  Instead use the
+       setTSOLink() function.  Exceptions to this rule are:
+
+       * setting the link field to END_TSO_QUEUE
+       * setting the link field of the currently running TSO, as it
+         will already be dirty.
+    */
+
+    struct StgTSO_*         global_link;    // Links threads on the
+                                            // generation->threads lists
+
+    /*
+     * The thread's stack
+     */
+    struct StgStack_       *stackobj;
+
+    /*
+     * The tso->dirty flag indicates that this TSO's stack should be
+     * scanned during garbage collection.  It also indicates that this
+     * TSO is on the mutable list.
+     *
+     * NB. The dirty flag gets a word to itself, so that it can be set
+     * safely by multiple threads simultaneously (the flags field is
+     * not safe for this purpose; see #3429).  It is harmless for the
+     * TSO to be on the mutable list multiple times.
+     *
+     * tso->dirty is set by dirty_TSO(), and unset by the garbage
+     * collector (only).
+     */
+
+    StgWord16               what_next;      // Values defined in Constants.h
+    StgWord16               why_blocked;    // Values defined in Constants.h
+    StgWord32               flags;          // Values defined in Constants.h
+    StgTSOBlockInfo         block_info;
+    StgThreadID             id;
+    StgWord32               saved_errno;
+    StgWord32               dirty;          /* non-zero => dirty */
+    struct InCall_*         bound;
+    struct Capability_*     cap;
+
+    struct StgTRecHeader_ * trec;       /* STM transaction record */
+
+    /*
+     * A list of threads blocked on this TSO waiting to throw exceptions.
+    */
+    struct MessageThrowTo_ * blocked_exceptions;
+
+    /*
+     * A list of StgBlockingQueue objects, representing threads
+     * blocked on thunks that are under evaluation by this thread.
+    */
+    struct StgBlockingQueue_ *bq;
+
+    /*
+     * The allocation limit for this thread, which is updated as the
+     * thread allocates.  If the value drops below zero, and
+     * TSO_ALLOC_LIMIT is set in flags, we raise an exception in the
+     * thread, and give the thread a little more space to handle the
+     * exception before we raise the exception again.
+     *
+     * This is an integer, because we might update it in a place where
+     * it isn't convenient to raise the exception, so we want it to
+     * stay negative until we get around to checking it.
+     *
+     * Use only PK_Int64/ASSIGN_Int64 macros to get/set the value of alloc_limit
+     * in C code otherwise you will cause alignment issues on SPARC
+     */
+    StgInt64  alloc_limit;     /* in bytes */
+
+    /*
+     * sum of the sizes of all stack chunks (in words), used to decide
+     * whether to throw the StackOverflow exception when the stack
+     * overflows, or whether to just chain on another stack chunk.
+     *
+     * Note that this overestimates the real stack size, because each
+     * chunk will have a gap at the end, of +RTS -kb<size> words.
+     * This means stack overflows are not entirely accurate, because
+     * the more gaps there are, the sooner the stack will run into the
+     * hard +RTS -K<size> limit.
+     */
+    StgWord32  tot_stack_size;
+
+#if defined(TICKY_TICKY)
+    /* TICKY-specific stuff would go here. */
+#endif
+#if defined(PROFILING)
+    StgTSOProfInfo prof;
+#endif
+#if defined(mingw32_HOST_OS)
+    StgWord32 saved_winerror;
+#endif
+
+} *StgTSOPtr; // StgTSO defined in rts/Types.h
+
+/* Note [StgStack dirtiness flags and concurrent marking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Without concurrent collection by the nonmoving collector the stack dirtiness story
+ * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
+ * or not.
+ *
+ * However, things are considerably more complicated with concurrent collection
+ * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
+ * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
+ * that stacks are marked in accordance with the nonmoving collector's snapshot
+ * invariant. This is: every stack alive at the time the snapshot is taken must
+ * be marked at some point after the moment the snapshot is taken and before it
+ * is mutated or the commencement of the sweep phase.
+ *
+ * This marking may be done by the concurrent mark phase (in the case of a
+ * thread that never runs during the concurrent mark) or by the mutator when
+ * dirtying the stack. However, it is unsafe for the concurrent collector to
+ * traverse the stack while it is under mutation. Consequently, the following
+ * handshake is obeyed by the mutator's write barrier and the concurrent mark to
+ * ensure this doesn't happen:
+ *
+ * 1. The entity seeking to mark first checks that the stack lives in the nonmoving
+ *    generation; if not then the stack was not alive at the time the snapshot
+ *    was taken and therefore we need not mark it.
+ *
+ * 2. The entity seeking to mark checks the stack's mark bit. If it is set then
+ *    no mark is necessary.
+ *
+ * 3. The entity seeking to mark tries to lock the stack for marking by
+ *    atomically setting its `marking` field to the current non-moving mark
+ *    epoch:
+ *
+ *    a. If the mutator finds the concurrent collector has already locked the
+ *       stack then it waits until it is finished (indicated by the mark bit
+ *       being set) before proceeding with execution.
+ *
+ *    b. If the concurrent collector finds that the mutator has locked the stack
+ *       then it moves on, leaving the mutator to mark it. There is no need to wait;
+ *       the mark is guaranteed to finish before sweep due to the post-mark
+ *       synchronization with mutators.
+ *
+ *    c. Whoever succeeds in locking the stack is responsible for marking it and
+ *       setting the stack's mark bit (either the BF_MARKED bit for large objects
+ *       or otherwise its bit in its segment's mark bitmap).
+ *
+ * To ensure that mutation does not proceed until the stack is fully marked the
+ * mark phase must not set the mark bit until it has finished tracing.
+ *
+ */
+
+#define STACK_DIRTY 1
+// used by sanity checker to verify that all dirty stacks are on the mutable list
+#define STACK_SANE 64
+
+typedef struct StgStack_ {
+    StgHeader  header;
+
+    /* Size of the `stack` field in *words*. This is not affected by how much of
+     * the stack space is used, nor if more stack space is linked to by an
+     * UNDERFLOW_FRAME.
+     */
+    StgWord32  stack_size;
+
+    StgWord8   dirty;          // non-zero => dirty
+    StgWord8   marking;        // non-zero => someone is currently marking the stack
+
+    /* Pointer to the "top" of the stack i.e. the most recently written address.
+     * The stack is filled downwards, so the "top" of the stack starts with `sp
+     * = stack + stack_size` and is decremented as the stack fills with data.
+     * See comment on "Invariants" below.
+     */
+    StgPtr     sp;
+    StgWord    stack[];
+} StgStack;
+
+// Calculate SpLim from a TSO (reads tso->stackobj, but no fields from
+// the stackobj itself).
+INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso)
+{
+    return tso->stackobj->stack + RESERVED_STACK_WORDS;
+}
+
+/* -----------------------------------------------------------------------------
+   functions
+   -------------------------------------------------------------------------- */
+
+void dirty_TSO  (Capability *cap, StgTSO *tso);
+void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
+void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target);
+
+void dirty_STACK (Capability *cap, StgStack *stack);
+
+/* -----------------------------------------------------------------------------
+   Invariants:
+
+   An active thread has the following properties:
+
+      tso->stack < tso->sp < tso->stack+tso->stack_size
+      tso->stack_size <= tso->max_stack_size
+
+      RESERVED_STACK_WORDS is large enough for any heap-check or
+      stack-check failure.
+
+      The size of the TSO struct plus the stack is either
+        (a) smaller than a block, or
+        (b) a multiple of BLOCK_SIZE
+
+        tso->why_blocked       tso->block_info      location
+        ----------------------------------------------------------------------
+        NotBlocked             END_TSO_QUEUE        runnable_queue, or running
+
+        BlockedOnBlackHole     MessageBlackHole *   TSO->bq
+
+        BlockedOnMVar          the MVAR             the MVAR's queue
+        BlockedOnIOCompletion  the PortEVent        the IOCP's queue
+
+        BlockedOnSTM           END_TSO_QUEUE        STM wait queue(s)
+        BlockedOnSTM           STM_AWOKEN           run queue
+
+        BlockedOnMsgThrowTo    MessageThrowTo *     TSO->blocked_exception
+
+        BlockedOnRead          NULL                 blocked_queue
+        BlockedOnWrite         NULL                 blocked_queue
+        BlockedOnDelay         NULL                 blocked_queue
+
+      tso->link == END_TSO_QUEUE, if the thread is currently running.
+
+   A zombie thread has the following properties:
+
+      tso->what_next == ThreadComplete or ThreadKilled
+      tso->link     ==  (could be on some queue somewhere)
+      tso->sp       ==  tso->stack + tso->stack_size - 1 (i.e. top stack word)
+      tso->sp[0]    ==  return value of thread, if what_next == ThreadComplete,
+                        exception             , if what_next == ThreadKilled
+
+      (tso->sp is left pointing at the top word on the stack so that
+      the return value or exception will be retained by a GC).
+
+ ---------------------------------------------------------------------------- */
+
+/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */
+#define END_TSO_QUEUE  ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure)