summaryrefslogtreecommitdiff
path: root/includes/stg
diff options
context:
space:
mode:
Diffstat (limited to 'includes/stg')
-rw-r--r--includes/stg/DLL.h48
-rw-r--r--includes/stg/MachRegs.h768
-rw-r--r--includes/stg/MiscClosures.h642
-rw-r--r--includes/stg/Regs.h667
-rw-r--r--includes/stg/SMP.h313
-rw-r--r--includes/stg/TailCalls.h304
-rw-r--r--includes/stg/Ticky.h188
-rw-r--r--includes/stg/Types.h135
8 files changed, 3065 insertions, 0 deletions
diff --git a/includes/stg/DLL.h b/includes/stg/DLL.h
new file mode 100644
index 0000000000..5e824271bf
--- /dev/null
+++ b/includes/stg/DLL.h
@@ -0,0 +1,48 @@
+#ifndef __STGDLL_H__
+#define __STGDLL_H__ 1
+
+#if defined(__PIC__) && defined(mingw32_TARGET_OS)
+# define DLL_IMPORT_DATA_REF(x) (_imp__##x)
+# define DLL_IMPORT_DATA_VARNAME(x) *_imp__##x
+# if __GNUC__ && !defined(__declspec)
+# define DLLIMPORT
+# else
+# define DLLIMPORT __declspec(dllimport)
+# define DLLIMPORT_DATA(x) _imp__##x
+# endif
+#else
+# define DLL_IMPORT_DATA_REF(x) (&(x))
+# define DLL_IMPORT_DATA_VARNAME(x) x
+# define DLLIMPORT
+#endif
+
+/* The view of the ghc/includes/ header files differ ever so
+ slightly depending on whether the RTS is being compiled
+ or not - so we're forced to distinguish between two.
+ [oh, you want details :) : Data symbols defined by the RTS
+ have to be accessed through an extra level of indirection
+ when compiling generated .hc code compared to when the RTS
+ sources are being processed. This is only the case when
+ using Win32 DLLs. ]
+*/
+#ifdef COMPILING_RTS
+#define DLL_IMPORT DLLIMPORT
+#define DLL_IMPORT_RTS
+#define DLL_IMPORT_DATA_VAR(x) x
+#else
+#define DLL_IMPORT
+#define DLL_IMPORT_RTS DLLIMPORT
+# if defined(__PIC__) && defined(mingw32_TARGET_OS)
+# define DLL_IMPORT_DATA_VAR(x) _imp__##x
+# else
+# define DLL_IMPORT_DATA_VAR(x) x
+# endif
+#endif
+
+#ifdef COMPILING_STDLIB
+#define DLL_IMPORT_STDLIB
+#else
+#define DLL_IMPORT_STDLIB DLLIMPORT
+#endif
+
+#endif /* __STGDLL_H__ */
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
new file mode 100644
index 0000000000..d6075326db
--- /dev/null
+++ b/includes/stg/MachRegs.h
@@ -0,0 +1,768 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Registers used in STG code. Might or might not correspond to
+ * actual machine registers.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef MACHREGS_H
+#define MACHREGS_H
+
+/* This file is #included into Haskell code in the compiler: #defines
+ * only in here please.
+ */
+
+/*
+ * Defining NO_REGS causes no global registers to be used. NO_REGS is
+ * typically defined by GHC, via a command-line option passed to gcc,
+ * when the -funregisterised flag is given.
+ *
+ * NB. When NO_REGS is on, calling & return conventions may be
+ * different. For example, all function arguments will be passed on
+ * the stack, and components of an unboxed tuple will be returned on
+ * the stack rather than in registers.
+ */
+#ifndef NO_REGS
+
+/* NOTE: when testing the platform in this file we must test either
+ * *_HOST_ARCH and *_TARGET_ARCH, depending on whether COMPILING_GHC
+ * is set. This is because when we're compiling the RTS and HC code,
+ * the platform we're running on is the HOST, but when compiling GHC
+ * we want to know about the register mapping on the TARGET platform.
+ */
+#ifdef COMPILING_GHC
+#define alpha_REGS alpha_TARGET_ARCH
+#define hppa1_1_REGS hppa1_1_TARGET_ARCH
+#define i386_REGS i386_TARGET_ARCH
+#define x86_64_REGS x86_64_TARGET_ARCH
+#define m68k_REGS m68k_TARGET_ARCH
+#define mips_REGS (mipsel_TARGET_ARCH || mipseb_TARGET_ARCH)
+#define powerpc_REGS (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH)
+#define ia64_REGS ia64_TARGET_ARCH
+#define sparc_REGS sparc_TARGET_ARCH
+#define darwin_REGS darwin_TARGET_OS
+#else
+#define alpha_REGS alpha_HOST_ARCH
+#define hppa1_1_REGS hppa1_1_HOST_ARCH
+#define i386_REGS i386_HOST_ARCH
+#define x86_64_REGS x86_64_HOST_ARCH
+#define m68k_REGS m68k_HOST_ARCH
+#define mips_REGS (mipsel_HOST_ARCH || mipseb_HOST_ARCH)
+#define powerpc_REGS (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH)
+#define ia64_REGS ia64_HOST_ARCH
+#define sparc_REGS sparc_HOST_ARCH
+#define darwin_REGS darwin_HOST_OS
+#endif
+
+/* ----------------------------------------------------------------------------
+ Caller saves and callee-saves regs.
+
+ Caller-saves regs have to be saved around C-calls made from STG
+ land, so this file defines CALLER_SAVES_<reg> for each <reg> that
+ is designated caller-saves in that machine's C calling convention.
+ -------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------------
+ The DEC Alpha register mapping
+
+ Alpha registers
+ \tr{$9}--\tr{$14} are our ``prize'' callee-save registers.
+ \tr{$15} is the frame pointer, and \tr{$16}--\tr{$21} are argument
+ registers. (These are off-limits.) We can steal some of the \tr{$22}-and-up
+ caller-save registers provided we do the appropriate save/restore stuff.
+
+ \tr{$f2}--\tr{$f9} are some callee-save floating-point registers.
+
+ We cannot use \tr{$23} (aka t9), \tr{$24} (aka t10), \tr{$25} (aka
+ t11), \tr{$27} (aka pv), or \tr{$28} (aka at), because they are
+ occasionally required by the assembler to handle non-primitive
+ instructions (e.g. ldb, remq). Sigh!
+
+ Cheat sheet for GDB:
+
+ GDB here Main map
+ === ==== ========
+ s5 $14 R1
+ t1 $2 R2
+ t2 $3 R3
+ t3 $4 R4
+ t4 $5 R5
+ t5 $6 R6
+ t6 $7 R7
+ t7 $8 R8
+ s0 $9 Sp
+ s2 $11 SpLim
+ s3 $12 Hp
+ t8 $22 NCG_reserved
+ t12 $27 NCG_reserved
+ -------------------------------------------------------------------------- */
+
+#if alpha_REGS
+# define REG(x) __asm__("$" #x)
+
+# define CALLER_SAVES_R2
+# define CALLER_SAVES_R3
+# define CALLER_SAVES_R4
+# define CALLER_SAVES_R5
+# define CALLER_SAVES_R6
+# define CALLER_SAVES_R7
+# define CALLER_SAVES_R8
+
+# define CALLER_SAVES_USER
+
+# define REG_R1 14
+# define REG_R2 2
+# define REG_R3 3
+# define REG_R4 4
+# define REG_R5 5
+# define REG_R6 6
+# define REG_R7 7
+# define REG_R8 8
+
+# define REG_F1 f2
+# define REG_F2 f3
+# define REG_F3 f4
+# define REG_F4 f5
+
+# define REG_D1 f6
+# define REG_D2 f7
+
+# define REG_Sp 9
+# define REG_SpLim 11
+
+# define REG_Hp 12
+
+# define NCG_Reserved_I1 22
+# define NCG_Reserved_I2 27
+# define NCG_Reserved_F1 f29
+# define NCG_Reserved_F2 f30
+
+#endif /* alpha_REGS */
+
+/* -----------------------------------------------------------------------------
+ The HP-PA register mapping
+
+ We cater for HP-PA 1.1.
+
+ \tr{%r0}--\tr{%r1} are special.
+ \tr{%r2} is the return pointer.
+ \tr{%r3} is the frame pointer.
+ \tr{%r4}--\tr{%r18} are callee-save registers.
+ \tr{%r19} is a linkage table register for HPUX 8.0 shared libraries.
+ \tr{%r20}--\tr{%r22} are caller-save registers.
+ \tr{%r23}--\tr{%r26} are parameter registers.
+ \tr{%r27} is a global data pointer.
+ \tr{%r28}--\tr{%r29} are temporaries.
+ \tr{%r30} is the stack pointer.
+ \tr{%r31} is a temporary.
+
+ \tr{%fr12}--\tr{%fr15} are some callee-save floating-point registers.
+ \tr{%fr8}--\tr{%fr11} are some available caller-save fl-pt registers.
+ -------------------------------------------------------------------------- */
+
+#if hppa1_1_REGS
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_R1 r11
+#define REG_R2 r12
+#define REG_R3 r13
+#define REG_R4 r14
+#define REG_R5 r15
+#define REG_R6 r16
+#define REG_R7 r17
+#define REG_R8 r18
+
+#define REG_F1 fr12
+#define REG_F2 fr12R
+#define REG_F3 fr13
+#define REG_F4 fr13R
+
+#define REG_D1 fr20 /* L & R */
+#define REG_D2 fr21 /* L & R */
+
+#define REG_Sp r4
+#define REG_SpLim r6
+
+#define REG_Hp r7
+
+#define NCG_Reserved_I1 r28
+#define NCG_Reserved_I2 r29
+#define NCG_Reserved_F1 fr8
+#define NCG_Reserved_F2 fr8R
+#define NCG_Reserved_D1 fr10
+#define NCG_Reserved_D2 fr11
+
+#endif /* hppa */
+
+/* -----------------------------------------------------------------------------
+ The x86 register mapping
+
+ Ok, we've only got 6 general purpose registers, a frame pointer and a
+ stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions,
+ hence they get trashed across ccalls and are caller saves. \tr{%ebx},
+ \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
+
+ Reg STG-Reg
+ ---------------
+ ebx Base
+ ebp Sp
+ esi R1
+ edi Hp
+
+ Leaving SpLim out of the picture.
+ -------------------------------------------------------------------------- */
+
+
+#if i386_REGS
+
+#define REG(x) __asm__("%" #x)
+
+#ifndef not_doing_dynamic_linking
+#define REG_Base ebx
+#endif
+#define REG_Sp ebp
+
+#ifndef STOLEN_X86_REGS
+#define STOLEN_X86_REGS 4
+#endif
+
+#if STOLEN_X86_REGS >= 3
+# define REG_R1 esi
+#endif
+
+#if STOLEN_X86_REGS >= 4
+# define REG_Hp edi
+#endif
+
+#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
+#define MAX_REAL_FLOAT_REG 0
+#define MAX_REAL_DOUBLE_REG 0
+#define MAX_REAL_LONG_REG 0
+
+#endif /* iX86 */
+
+/* -----------------------------------------------------------------------------
+ The x86-64 register mapping
+
+ %rax caller-saves, don't steal this one
+ %rbx YES
+ %rcx arg reg, caller-saves
+ %rdx arg reg, caller-saves
+ %rsi arg reg, caller-saves
+ %rdi arg reg, caller-saves
+ %rbp YES (our *prime* register)
+ %rsp (unavailable - stack pointer)
+ %r8 arg reg, caller-saves
+ %r9 arg reg, caller-saves
+ %r10 caller-saves
+ %r11 caller-saves
+ %r12 YES
+ %r13 YES
+ %r14 YES
+ %r15 YES
+
+ %xmm0-7 arg regs, caller-saves
+ %xmm8-15 caller-saves
+
+ Use the caller-saves regs for Rn, because we don't always have to
+ save those (as opposed to Sp/Hp/SpLim etc. which always have to be
+ saved).
+
+ --------------------------------------------------------------------------- */
+
+#if x86_64_REGS
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_Base r13
+#define REG_Sp rbp
+#define REG_Hp r12
+#define REG_R1 rbx
+#define REG_R2 r14
+#define REG_R3 rsi
+#define REG_R4 rdi
+#define REG_R5 r8
+#define REG_R6 r9
+#define REG_SpLim r15
+
+#define REG_F1 xmm1
+#define REG_F2 xmm2
+#define REG_F3 xmm3
+#define REG_F4 xmm4
+
+#define REG_D1 xmm5
+#define REG_D2 xmm6
+
+#define CALLER_SAVES_R3
+#define CALLER_SAVES_R4
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+
+#define CALLER_SAVES_F1
+#define CALLER_SAVES_F2
+#define CALLER_SAVES_F3
+#define CALLER_SAVES_F4
+
+#define CALLER_SAVES_D1
+#define CALLER_SAVES_D2
+
+#define MAX_REAL_VANILLA_REG 6
+#define MAX_REAL_FLOAT_REG 4
+#define MAX_REAL_DOUBLE_REG 2
+#define MAX_REAL_LONG_REG 0
+
+#endif /* x86_64 */
+
+/* -----------------------------------------------------------------------------
+ The Motorola 680x0 register mapping
+
+ A Sun3 (mc680x0) has eight address registers, \tr{a0} to \tr{a7}, and
+ eight data registers, \tr{d0} to \tr{d7}. Address operations have to
+ be done through address registers; data registers are used for
+ comparison values and data.
+
+ Here's the register-usage picture for m68k boxes with GCC.
+
+ \begin{tabular}{ll}
+ a0 & used directly by GCC \\
+ a1 & used directly by GCC \\
+ \\
+ a2..a5 & callee-saved: available for STG registers \\
+ & (a5 may be special, ``global'' register for PIC?) \\
+ \\
+ a6 & C-stack frame pointer \\
+ a7 & C-stack pointer \\
+ \\
+ d0 & used directly by GCC \\
+ d1 & used directly by GCC \\
+ d2 & really needed for local optimisation by GCC \\
+ \\
+ d3..d7 & callee-saved: available for STG registers
+ \\
+ fp0 & call-clobbered \\
+ fp1 & call-clobbered \\
+ fp2..fp7 & callee-saved: available for STG registers
+ \end{tabular}
+ -------------------------------------------------------------------------- */
+
+#if m68k_REGS
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base a2
+
+#define REG_Sp a3
+#define REG_SpLim d3
+
+#define REG_Hp d4
+
+#define REG_R1 a5
+#define REG_R2 d6
+#define MAX_REAL_VANILLA_REG 2
+
+#define REG_Ret d7
+
+#define REG_F1 fp2
+#define REG_F2 fp3
+#define REG_F3 fp4
+#define REG_F4 fp5
+
+#define REG_D1 fp6
+#define REG_D2 fp7
+
+#endif /* m68k */
+
+/* -----------------------------------------------------------------------------
+ The DECstation (MIPS) register mapping
+
+ Here's at least some simple stuff about registers on a MIPS.
+
+ \tr{s0}--\tr{s7} are callee-save integer registers; they are our
+ ``prize'' stolen registers. There is also a wad of callee-save
+ floating-point registers, \tr{$f20}--\tr{$f31}; we'll use some of
+ those.
+
+ \tr{t0}--\tr{t9} are caller-save (``temporary?'') integer registers.
+ We can steal some, but we might have to save/restore around ccalls.
+ -------------------------------------------------------------------------- */
+
+#if mips_REGS
+
+#define REG(x) __asm__("$" #x)
+
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+#define CALLER_SAVES_R7
+#define CALLER_SAVES_R8
+
+#define CALLER_SAVES_USER
+
+#define REG_R1 16
+#define REG_R2 17
+#define REG_R3 18
+#define REG_R4 19
+#define REG_R5 12
+#define REG_R6 13
+#define REG_R7 14
+#define REG_R8 15
+
+#define REG_F1 f20
+#define REG_F2 f22
+#define REG_F3 f24
+#define REG_F4 f26
+
+#define REG_D1 f28
+#define REG_D2 f30
+
+#define REG_Sp 20
+#define REG_SpLim 21
+
+#define REG_Hp 22
+
+#define REG_Base 30
+
+#endif /* mipse[lb] */
+
+/* -----------------------------------------------------------------------------
+ The PowerPC register mapping
+
+ 0 system glue? (caller-save, volatile)
+ 1 SP (callee-save, non-volatile)
+ 2 AIX, powerpc64-linux:
+ RTOC (a strange special case)
+ darwin:
+ (caller-save, volatile)
+ powerpc32-linux:
+ reserved for use by system
+
+ 3-10 args/return (caller-save, volatile)
+ 11,12 system glue? (caller-save, volatile)
+ 13 on 64-bit: reserved for thread state pointer
+ on 32-bit: (callee-save, non-volatile)
+ 14-31 (callee-save, non-volatile)
+
+ f0 (caller-save, volatile)
+ f1-f13 args/return (caller-save, volatile)
+ f14-f31 (callee-save, non-volatile)
+
+ \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
+ \tr{0}--\tr{12} are caller-save registers.
+
+ \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
+
+ We can do the Whole Business with callee-save registers only!
+ -------------------------------------------------------------------------- */
+
+#if powerpc_REGS
+
+#define REG(x) __asm__(#x)
+
+#define REG_R1 r14
+#define REG_R2 r15
+#define REG_R3 r16
+#define REG_R4 r17
+#define REG_R5 r18
+#define REG_R6 r19
+#define REG_R7 r20
+#define REG_R8 r21
+
+#if darwin_REGS
+
+#define REG_F1 f14
+#define REG_F2 f15
+#define REG_F3 f16
+#define REG_F4 f17
+
+#define REG_D1 f18
+#define REG_D2 f19
+
+#else
+
+#define REG_F1 fr14
+#define REG_F2 fr15
+#define REG_F3 fr16
+#define REG_F4 fr17
+
+#define REG_D1 fr18
+#define REG_D2 fr19
+
+#endif
+
+#define REG_Sp r22
+#define REG_SpLim r24
+
+#define REG_Hp r25
+
+#define REG_Base r27
+
+#endif /* powerpc */
+
+/* -----------------------------------------------------------------------------
+ The IA64 register mapping
+
+ We place the general registers in the locals area of the register stack,
+ so that the call mechanism takes care of saving them for us. We reserve
+ the first 16 for gcc's use - since gcc uses the highest used register to
+ determine the register stack frame size, this gives us a constant size
+ register stack frame.
+
+ \tr{f16-f32} are the callee-saved floating point registers.
+ -------------------------------------------------------------------------- */
+
+#if ia64_REGS
+
+#define REG(x) __asm__(#x)
+
+#define REG_R1 loc16
+#define REG_R2 loc17
+#define REG_R3 loc18
+#define REG_R4 loc19
+#define REG_R5 loc20
+#define REG_R6 loc21
+#define REG_R7 loc22
+#define REG_R8 loc23
+
+#define REG_F1 f16
+#define REG_F2 f17
+#define REG_F3 f18
+#define REG_F4 f19
+
+#define REG_D1 f20
+#define REG_D2 f21
+
+#define REG_Sp loc24
+#define REG_SpLim loc26
+
+#define REG_Hp loc27
+
+#endif /* ia64 */
+
+/* -----------------------------------------------------------------------------
+ The Sun SPARC register mapping
+
+ !! IMPORTANT: if you change this register mapping you must also update
+ compiler/nativeGen/SPARC/Regs.hs. That file handles the
+ mapping for the NCG. This one only affects via-c code.
+
+ The SPARC register (window) story: Remember, within the Haskell
+ Threaded World, we essentially ``shut down'' the register-window
+ mechanism---the window doesn't move at all while in this World. It
+ *does* move, of course, if we call out to arbitrary~C...
+
+ The %i, %l, and %o registers (8 each) are the input, local, and
+ output registers visible in one register window. The 8 %g (global)
+ registers are visible all the time.
+
+ zero: always zero
+ scratch: volatile across C-fn calls. used by linker.
+ app: usable by application
+ system: reserved for system
+
+ alloc: allocated to in the register allocator, intra-closure only
+
+ GHC usage v8 ABI v9 ABI
+ Global
+ %g0 zero zero zero
+ %g1 alloc scratch scrach
+ %g2 alloc app app
+ %g3 alloc app app
+ %g4 alloc app scratch
+ %g5 system scratch
+ %g6 system system
+ %g7 system system
+
+ Output: can be zapped by callee
+ %o0-o5 alloc caller saves
+ %o6 C stack ptr
+ %o7 C ret addr
+
+ Local: maintained by register windowing mechanism
+ %l0 alloc
+ %l1 R1
+ %l2 R2
+ %l3 R3
+ %l4 R4
+ %l5 R5
+ %l6 alloc
+ %l7 alloc
+
+ Input
+ %i0 Sp
+ %i1 Base
+ %i2 SpLim
+ %i3 Hp
+ %i4 alloc
+ %i5 R6
+ %i6 C frame ptr
+ %i7 C ret addr
+
+ The paired nature of the floating point registers causes complications for
+ the native code generator. For convenience, we pretend that the first 22
+ fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are
+ float (single) regs. The NCG acts accordingly. That means that the
+ following FP assignment is rather fragile, and should only be changed
+ with extreme care. The current scheme is:
+
+ %f0 /%f1 FP return from C
+ %f2 /%f3 D1
+ %f4 /%f5 D2
+ %f6 /%f7 ncg double spill tmp #1
+ %f8 /%f9 ncg double spill tmp #2
+ %f10/%f11 allocatable
+ %f12/%f13 allocatable
+ %f14/%f15 allocatable
+ %f16/%f17 allocatable
+ %f18/%f19 allocatable
+ %f20/%f21 allocatable
+
+ %f22 F1
+ %f23 F2
+ %f24 F3
+ %f25 F4
+ %f26 ncg single spill tmp #1
+ %f27 ncg single spill tmp #2
+ %f28 allocatable
+ %f29 allocatable
+ %f30 allocatable
+ %f31 allocatable
+
+ -------------------------------------------------------------------------- */
+
+#if sparc_REGS
+
+#define REG(x) __asm__("%" #x)
+
+#define CALLER_SAVES_USER
+
+#define CALLER_SAVES_F1
+#define CALLER_SAVES_F2
+#define CALLER_SAVES_F3
+#define CALLER_SAVES_F4
+#define CALLER_SAVES_D1
+#define CALLER_SAVES_D2
+
+#define REG_R1 l1
+#define REG_R2 l2
+#define REG_R3 l3
+#define REG_R4 l4
+#define REG_R5 l5
+#define REG_R6 i5
+
+#define REG_F1 f22
+#define REG_F2 f23
+#define REG_F3 f24
+#define REG_F4 f25
+
+/* for each of the double arg regs,
+ Dn_2 is the high half. */
+
+#define REG_D1 f2
+#define REG_D1_2 f3
+
+#define REG_D2 f4
+#define REG_D2_2 f5
+
+#define REG_Sp i0
+#define REG_SpLim i2
+
+#define REG_Hp i3
+
+#define REG_Base i1
+
+/*
+#define NCG_SpillTmp_I1 g1
+#define NCG_SpillTmp_I2 g2
+#define NCG_SpillTmp_F1 f26
+#define NCG_SpillTmp_F2 f27
+#define NCG_SpillTmp_D1 f6
+#define NCG_SpillTmp_D2 f8
+*/
+
+#define NCG_FirstFloatReg f22
+
+#endif /* sparc */
+
+#endif /* NO_REGS */
+
+/* -----------------------------------------------------------------------------
+ * These constants define how many stg registers will be used for
+ * passing arguments (and results, in the case of an unboxed-tuple
+ * return).
+ *
+ * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the
+ * highest STG register to occupy a real machine register, otherwise
+ * the calling conventions will needlessly shuffle data between the
+ * stack and memory-resident STG registers. We might occasionally
+ * set these macros to other values for testing, though.
+ *
+ * Registers above these values might still be used, for instance to
+ * communicate with PrimOps and RTS functions.
+ */
+
+#ifndef MAX_REAL_VANILLA_REG
+# if defined(REG_R8)
+# define MAX_REAL_VANILLA_REG 8
+# elif defined(REG_R7)
+# define MAX_REAL_VANILLA_REG 7
+# elif defined(REG_R6)
+# define MAX_REAL_VANILLA_REG 6
+# elif defined(REG_R5)
+# define MAX_REAL_VANILLA_REG 5
+# elif defined(REG_R4)
+# define MAX_REAL_VANILLA_REG 4
+# elif defined(REG_R3)
+# define MAX_REAL_VANILLA_REG 3
+# elif defined(REG_R2)
+# define MAX_REAL_VANILLA_REG 2
+# elif defined(REG_R1)
+# define MAX_REAL_VANILLA_REG 1
+# else
+# define MAX_REAL_VANILLA_REG 0
+# endif
+#endif
+
+#ifndef MAX_REAL_FLOAT_REG
+# if defined(REG_F4)
+# define MAX_REAL_FLOAT_REG 4
+# elif defined(REG_F3)
+# define MAX_REAL_FLOAT_REG 3
+# elif defined(REG_F2)
+# define MAX_REAL_FLOAT_REG 2
+# elif defined(REG_F1)
+# define MAX_REAL_FLOAT_REG 1
+# else
+# define MAX_REAL_FLOAT_REG 0
+# endif
+#endif
+
+#ifndef MAX_REAL_DOUBLE_REG
+# if defined(REG_D2)
+# define MAX_REAL_DOUBLE_REG 2
+# elif defined(REG_D1)
+# define MAX_REAL_DOUBLE_REG 1
+# else
+# define MAX_REAL_DOUBLE_REG 0
+# endif
+#endif
+
+#ifndef MAX_REAL_LONG_REG
+# if defined(REG_L1)
+# define MAX_REAL_LONG_REG 1
+# else
+# define MAX_REAL_LONG_REG 0
+# endif
+#endif
+
+/* define NO_ARG_REGS if we have no argument registers at all (we can
+ * optimise certain code paths using this predicate).
+ */
+#if MAX_REAL_VANILLA_REG < 2
+#define NO_ARG_REGS
+#else
+#undef NO_ARG_REGS
+#endif
+
+#endif /* MACHREGS_H */
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
new file mode 100644
index 0000000000..1591570780
--- /dev/null
+++ b/includes/stg/MiscClosures.h
@@ -0,0 +1,642 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Declarations for various symbols exported by the RTS.
+ *
+ * ToDo: many of the symbols in here don't need to be exported, but
+ * our Cmm code generator doesn't know how to generate local symbols
+ * for the RTS bits (it assumes all RTS symbols are external).
+ *
+ * See wiki:Commentary/Compiler/Backends/PprC#Prototypes
+ *
+ * --------------------------------------------------------------------------*/
+
+#ifndef STGMISCCLOSURES_H
+#define STGMISCCLOSURES_H
+
+#if IN_STG_CODE
+# define RTS_RET_INFO(i) extern W_(i)[]
+# define RTS_FUN_INFO(i) extern W_(i)[]
+# define RTS_THUNK_INFO(i) extern W_(i)[]
+# define RTS_INFO(i) extern W_(i)[]
+# define RTS_CLOSURE(i) extern W_(i)[]
+# define RTS_FUN(f) extern DLL_IMPORT_RTS StgFunPtr f(void)
+#else
+# define RTS_RET_INFO(i) extern DLL_IMPORT_RTS const StgRetInfoTable i
+# define RTS_FUN_INFO(i) extern DLL_IMPORT_RTS const StgFunInfoTable i
+# define RTS_THUNK_INFO(i) extern DLL_IMPORT_RTS const StgThunkInfoTable i
+# define RTS_INFO(i) extern DLL_IMPORT_RTS const StgInfoTable i
+# define RTS_CLOSURE(i) extern DLL_IMPORT_RTS StgClosure i
+# define RTS_FUN(f) extern DLL_IMPORT_RTS StgFunPtr f(void)
+#endif
+
+#ifdef TABLES_NEXT_TO_CODE
+# define RTS_ENTRY(f) /* nothing */
+#else
+# define RTS_ENTRY(f) RTS_FUN(f)
+#endif
+
+/* Stack frames */
+RTS_RET_INFO(stg_upd_frame_info);
+RTS_RET_INFO(stg_marked_upd_frame_info);
+RTS_RET_INFO(stg_noupd_frame_info);
+RTS_RET_INFO(stg_catch_frame_info);
+RTS_RET_INFO(stg_catch_retry_frame_info);
+RTS_RET_INFO(stg_atomically_frame_info);
+RTS_RET_INFO(stg_atomically_waiting_frame_info);
+RTS_RET_INFO(stg_catch_stm_frame_info);
+RTS_RET_INFO(stg_unblockAsyncExceptionszh_ret_info);
+
+RTS_ENTRY(stg_upd_frame_ret);
+RTS_ENTRY(stg_marked_upd_frame_ret);
+
+// RTS_FUN(stg_interp_constr_entry);
+//
+// This is referenced using the FFI in the compiler (ByteCodeItbls),
+// so we can't give it the correct type here because the prototypes
+// would clash (FFI references are always declared with type StgWord[]
+// in the generated C code).
+
+/* Magic glue code for when compiled code returns a value in R1/F1/D1
+ or a VoidRep to the interpreter. */
+RTS_RET_INFO(stg_ctoi_R1p_info);
+RTS_RET_INFO(stg_ctoi_R1unpt_info);
+RTS_RET_INFO(stg_ctoi_R1n_info);
+RTS_RET_INFO(stg_ctoi_F1_info);
+RTS_RET_INFO(stg_ctoi_D1_info);
+RTS_RET_INFO(stg_ctoi_L1_info);
+RTS_RET_INFO(stg_ctoi_V_info);
+
+RTS_ENTRY(stg_ctoi_R1p_ret);
+RTS_ENTRY(stg_ctoi_R1unpt_ret);
+RTS_ENTRY(stg_ctoi_R1n_ret);
+RTS_ENTRY(stg_ctoi_F1_ret);
+RTS_ENTRY(stg_ctoi_D1_ret);
+RTS_ENTRY(stg_ctoi_L1_ret);
+RTS_ENTRY(stg_ctoi_V_ret);
+
+RTS_RET_INFO(stg_apply_interp_info);
+RTS_ENTRY(stg_apply_interp_ret);
+
+RTS_INFO(stg_IND_info);
+RTS_INFO(stg_IND_direct_info);
+RTS_INFO(stg_IND_STATIC_info);
+RTS_INFO(stg_IND_PERM_info);
+RTS_INFO(stg_IND_OLDGEN_info);
+RTS_INFO(stg_IND_OLDGEN_PERM_info);
+RTS_INFO(stg_CAF_UNENTERED_info);
+RTS_INFO(stg_CAF_ENTERED_info);
+RTS_INFO(stg_WHITEHOLE_info);
+RTS_INFO(stg_BLACKHOLE_info);
+RTS_INFO(__stg_EAGER_BLACKHOLE_info);
+RTS_INFO(stg_CAF_BLACKHOLE_info);
+
+RTS_FUN_INFO(stg_BCO_info);
+RTS_INFO(stg_EVACUATED_info);
+RTS_INFO(stg_WEAK_info);
+RTS_INFO(stg_DEAD_WEAK_info);
+RTS_INFO(stg_STABLE_NAME_info);
+RTS_INFO(stg_MVAR_CLEAN_info);
+RTS_INFO(stg_MVAR_DIRTY_info);
+RTS_INFO(stg_TSO_info);
+RTS_INFO(stg_ARR_WORDS_info);
+RTS_INFO(stg_MUT_ARR_WORDS_info);
+RTS_INFO(stg_MUT_ARR_PTRS_CLEAN_info);
+RTS_INFO(stg_MUT_ARR_PTRS_DIRTY_info);
+RTS_INFO(stg_MUT_ARR_PTRS_FROZEN_info);
+RTS_INFO(stg_MUT_ARR_PTRS_FROZEN0_info);
+RTS_INFO(stg_MUT_VAR_CLEAN_info);
+RTS_INFO(stg_MUT_VAR_DIRTY_info);
+RTS_INFO(stg_END_TSO_QUEUE_info);
+RTS_INFO(stg_MUT_CONS_info);
+RTS_INFO(stg_catch_info);
+RTS_INFO(stg_PAP_info);
+RTS_INFO(stg_AP_info);
+RTS_INFO(stg_AP_NOUPD_info);
+RTS_INFO(stg_AP_STACK_info);
+RTS_INFO(stg_dummy_ret_info);
+RTS_INFO(stg_raise_info);
+RTS_INFO(stg_raise_ret_info);
+RTS_INFO(stg_TVAR_WATCH_QUEUE_info);
+RTS_INFO(stg_INVARIANT_CHECK_QUEUE_info);
+RTS_INFO(stg_ATOMIC_INVARIANT_info);
+RTS_INFO(stg_TVAR_info);
+RTS_INFO(stg_TREC_CHUNK_info);
+RTS_INFO(stg_TREC_HEADER_info);
+RTS_INFO(stg_END_STM_WATCH_QUEUE_info);
+RTS_INFO(stg_END_INVARIANT_CHECK_QUEUE_info);
+RTS_INFO(stg_END_STM_CHUNK_LIST_info);
+RTS_INFO(stg_NO_TREC_info);
+
+RTS_ENTRY(stg_IND_entry);
+RTS_ENTRY(stg_IND_direct_entry);
+RTS_ENTRY(stg_IND_STATIC_entry);
+RTS_ENTRY(stg_IND_PERM_entry);
+RTS_ENTRY(stg_IND_OLDGEN_entry);
+RTS_ENTRY(stg_IND_OLDGEN_PERM_entry);
+RTS_ENTRY(stg_CAF_UNENTERED_entry);
+RTS_ENTRY(stg_CAF_ENTERED_entry);
+RTS_ENTRY(stg_WHITEHOLE_entry);
+RTS_ENTRY(stg_BLACKHOLE_entry);
+RTS_ENTRY(__stg_EAGER_BLACKHOLE_entry);
+RTS_ENTRY(stg_CAF_BLACKHOLE_entry);
+RTS_ENTRY(stg_BCO_entry);
+RTS_ENTRY(stg_EVACUATED_entry);
+RTS_ENTRY(stg_WEAK_entry);
+RTS_ENTRY(stg_DEAD_WEAK_entry);
+RTS_ENTRY(stg_STABLE_NAME_entry);
+RTS_ENTRY(stg_FULL_MVAR_entry);
+RTS_ENTRY(stg_EMPTY_MVAR_entry);
+RTS_ENTRY(stg_TSO_entry);
+RTS_ENTRY(stg_ARR_WORDS_entry);
+RTS_ENTRY(stg_MUT_ARR_WORDS_entry);
+RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN_entry);
+RTS_ENTRY(stg_MUT_ARR_PTRS_DIRTY_entry);
+RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_entry);
+RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN0_entry);
+RTS_ENTRY(stg_MUT_VAR_CLEAN_entry);
+RTS_ENTRY(stg_MUT_VAR_DIRTY_entry);
+RTS_ENTRY(stg_END_TSO_QUEUE_entry);
+RTS_ENTRY(stg_MUT_CONS_entry);
+RTS_ENTRY(stg_catch_entry);
+RTS_ENTRY(stg_PAP_entry);
+RTS_ENTRY(stg_AP_entry);
+RTS_ENTRY(stg_AP_NOUPD_entry);
+RTS_ENTRY(stg_AP_STACK_entry);
+RTS_ENTRY(stg_dummy_ret_entry);
+RTS_ENTRY(stg_raise_entry);
+RTS_ENTRY(stg_raise_ret_ret);
+RTS_ENTRY(stg_END_STM_WATCH_QUEUE_entry);
+RTS_ENTRY(stg_END_INVARIANT_CHECK_QUEUE_entry);
+RTS_ENTRY(stg_END_STM_CHUNK_LIST_entry);
+RTS_ENTRY(stg_NO_TREC_entry);
+RTS_ENTRY(stg_TVAR_entry);
+RTS_ENTRY(stg_TVAR_WATCH_QUEUE_entry);
+RTS_ENTRY(stg_INVARIANT_CHECK_QUEUE_entry);
+RTS_ENTRY(stg_ATOMIC_INVARIANT_entry);
+RTS_ENTRY(stg_TREC_CHUNK_entry);
+RTS_ENTRY(stg_TREC_HEADER_entry);
+
+
+RTS_ENTRY(stg_unblockAsyncExceptionszh_ret_ret);
+RTS_ENTRY(stg_blockAsyncExceptionszh_ret_ret);
+RTS_ENTRY(stg_catch_frame_ret);
+RTS_ENTRY(stg_catch_retry_frame_ret);
+RTS_ENTRY(stg_atomically_frame_ret);
+RTS_ENTRY(stg_atomically_waiting_frame_ret);
+RTS_ENTRY(stg_catch_stm_frame_ret);
+RTS_ENTRY(stg_catch_frame_ret);
+RTS_ENTRY(stg_catch_entry);
+RTS_ENTRY(stg_raise_entry);
+
+/* closures */
+
+RTS_CLOSURE(stg_END_TSO_QUEUE_closure);
+RTS_CLOSURE(stg_NO_FINALIZER_closure);
+RTS_CLOSURE(stg_dummy_ret_closure);
+RTS_CLOSURE(stg_forceIO_closure);
+
+RTS_CLOSURE(stg_END_STM_WATCH_QUEUE_closure);
+RTS_CLOSURE(stg_END_INVARIANT_CHECK_QUEUE_closure);
+RTS_CLOSURE(stg_END_STM_CHUNK_LIST_closure);
+RTS_CLOSURE(stg_NO_TREC_closure);
+
+RTS_ENTRY(stg_NO_FINALIZER_entry);
+RTS_ENTRY(stg_END_EXCEPTION_LIST_entry);
+RTS_ENTRY(stg_EXCEPTION_CONS_entry);
+
+#if IN_STG_CODE
+extern DLL_IMPORT_RTS StgWordArray stg_CHARLIKE_closure;
+extern DLL_IMPORT_RTS StgWordArray stg_INTLIKE_closure;
+#else
+extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_CHARLIKE_closure[];
+extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_INTLIKE_closure[];
+#endif
+
+/* StgStartup */
+
+RTS_RET_INFO(stg_forceIO_info);
+RTS_ENTRY(stg_forceIO_ret);
+
+RTS_RET_INFO(stg_noforceIO_info);
+RTS_ENTRY(stg_noforceIO_ret);
+
+/* standard entry points */
+
+/* standard selector thunks */
+
+RTS_ENTRY(stg_sel_ret_0_upd_ret);
+RTS_ENTRY(stg_sel_ret_1_upd_ret);
+RTS_ENTRY(stg_sel_ret_2_upd_ret);
+RTS_ENTRY(stg_sel_ret_3_upd_ret);
+RTS_ENTRY(stg_sel_ret_4_upd_ret);
+RTS_ENTRY(stg_sel_ret_5_upd_ret);
+RTS_ENTRY(stg_sel_ret_6_upd_ret);
+RTS_ENTRY(stg_sel_ret_7_upd_ret);
+RTS_ENTRY(stg_sel_ret_8_upd_ret);
+RTS_ENTRY(stg_sel_ret_8_upd_ret);
+RTS_ENTRY(stg_sel_ret_9_upd_ret);
+RTS_ENTRY(stg_sel_ret_10_upd_ret);
+RTS_ENTRY(stg_sel_ret_11_upd_ret);
+RTS_ENTRY(stg_sel_ret_12_upd_ret);
+RTS_ENTRY(stg_sel_ret_13_upd_ret);
+RTS_ENTRY(stg_sel_ret_14_upd_ret);
+RTS_ENTRY(stg_sel_ret_15_upd_ret);
+
+RTS_INFO(stg_sel_0_upd_info);
+RTS_INFO(stg_sel_1_upd_info);
+RTS_INFO(stg_sel_2_upd_info);
+RTS_INFO(stg_sel_3_upd_info);
+RTS_INFO(stg_sel_4_upd_info);
+RTS_INFO(stg_sel_5_upd_info);
+RTS_INFO(stg_sel_6_upd_info);
+RTS_INFO(stg_sel_7_upd_info);
+RTS_INFO(stg_sel_8_upd_info);
+RTS_INFO(stg_sel_9_upd_info);
+RTS_INFO(stg_sel_10_upd_info);
+RTS_INFO(stg_sel_11_upd_info);
+RTS_INFO(stg_sel_12_upd_info);
+RTS_INFO(stg_sel_13_upd_info);
+RTS_INFO(stg_sel_14_upd_info);
+RTS_INFO(stg_sel_15_upd_info);
+
+RTS_ENTRY(stg_sel_0_upd_entry);
+RTS_ENTRY(stg_sel_1_upd_entry);
+RTS_ENTRY(stg_sel_2_upd_entry);
+RTS_ENTRY(stg_sel_3_upd_entry);
+RTS_ENTRY(stg_sel_4_upd_entry);
+RTS_ENTRY(stg_sel_5_upd_entry);
+RTS_ENTRY(stg_sel_6_upd_entry);
+RTS_ENTRY(stg_sel_7_upd_entry);
+RTS_ENTRY(stg_sel_8_upd_entry);
+RTS_ENTRY(stg_sel_9_upd_entry);
+RTS_ENTRY(stg_sel_10_upd_entry);
+RTS_ENTRY(stg_sel_11_upd_entry);
+RTS_ENTRY(stg_sel_12_upd_entry);
+RTS_ENTRY(stg_sel_13_upd_entry);
+RTS_ENTRY(stg_sel_14_upd_entry);
+RTS_ENTRY(stg_sel_15_upd_entry);
+
+RTS_ENTRY(stg_sel_ret_0_noupd_ret);
+RTS_ENTRY(stg_sel_ret_1_noupd_ret);
+RTS_ENTRY(stg_sel_ret_2_noupd_ret);
+RTS_ENTRY(stg_sel_ret_3_noupd_ret);
+RTS_ENTRY(stg_sel_ret_4_noupd_ret);
+RTS_ENTRY(stg_sel_ret_5_noupd_ret);
+RTS_ENTRY(stg_sel_ret_6_noupd_ret);
+RTS_ENTRY(stg_sel_ret_7_noupd_ret);
+RTS_ENTRY(stg_sel_ret_8_noupd_ret);
+RTS_ENTRY(stg_sel_ret_8_noupd_ret);
+RTS_ENTRY(stg_sel_ret_9_noupd_ret);
+RTS_ENTRY(stg_sel_ret_10_noupd_ret);
+RTS_ENTRY(stg_sel_ret_11_noupd_ret);
+RTS_ENTRY(stg_sel_ret_12_noupd_ret);
+RTS_ENTRY(stg_sel_ret_13_noupd_ret);
+RTS_ENTRY(stg_sel_ret_14_noupd_ret);
+RTS_ENTRY(stg_sel_ret_15_noupd_ret);
+
+RTS_INFO(stg_sel_0_noupd_info);
+RTS_INFO(stg_sel_1_noupd_info);
+RTS_INFO(stg_sel_2_noupd_info);
+RTS_INFO(stg_sel_3_noupd_info);
+RTS_INFO(stg_sel_4_noupd_info);
+RTS_INFO(stg_sel_5_noupd_info);
+RTS_INFO(stg_sel_6_noupd_info);
+RTS_INFO(stg_sel_7_noupd_info);
+RTS_INFO(stg_sel_8_noupd_info);
+RTS_INFO(stg_sel_9_noupd_info);
+RTS_INFO(stg_sel_10_noupd_info);
+RTS_INFO(stg_sel_11_noupd_info);
+RTS_INFO(stg_sel_12_noupd_info);
+RTS_INFO(stg_sel_13_noupd_info);
+RTS_INFO(stg_sel_14_noupd_info);
+RTS_INFO(stg_sel_15_noupd_info);
+
+RTS_ENTRY(stg_sel_0_noupd_entry);
+RTS_ENTRY(stg_sel_1_noupd_entry);
+RTS_ENTRY(stg_sel_2_noupd_entry);
+RTS_ENTRY(stg_sel_3_noupd_entry);
+RTS_ENTRY(stg_sel_4_noupd_entry);
+RTS_ENTRY(stg_sel_5_noupd_entry);
+RTS_ENTRY(stg_sel_6_noupd_entry);
+RTS_ENTRY(stg_sel_7_noupd_entry);
+RTS_ENTRY(stg_sel_8_noupd_entry);
+RTS_ENTRY(stg_sel_9_noupd_entry);
+RTS_ENTRY(stg_sel_10_noupd_entry);
+RTS_ENTRY(stg_sel_11_noupd_entry);
+RTS_ENTRY(stg_sel_12_noupd_entry);
+RTS_ENTRY(stg_sel_13_noupd_entry);
+RTS_ENTRY(stg_sel_14_noupd_entry);
+RTS_ENTRY(stg_sel_15_noupd_entry);
+
+/* standard ap thunks */
+
+RTS_THUNK_INFO(stg_ap_1_upd_info);
+RTS_THUNK_INFO(stg_ap_2_upd_info);
+RTS_THUNK_INFO(stg_ap_3_upd_info);
+RTS_THUNK_INFO(stg_ap_4_upd_info);
+RTS_THUNK_INFO(stg_ap_5_upd_info);
+RTS_THUNK_INFO(stg_ap_6_upd_info);
+RTS_THUNK_INFO(stg_ap_7_upd_info);
+
+RTS_ENTRY(stg_ap_1_upd_entry);
+RTS_ENTRY(stg_ap_2_upd_entry);
+RTS_ENTRY(stg_ap_3_upd_entry);
+RTS_ENTRY(stg_ap_4_upd_entry);
+RTS_ENTRY(stg_ap_5_upd_entry);
+RTS_ENTRY(stg_ap_6_upd_entry);
+RTS_ENTRY(stg_ap_7_upd_entry);
+
+/* standard application routines (see also rts/gen_apply.py,
+ * and compiler/codeGen/CgStackery.lhs).
+ */
+RTS_RET_INFO(stg_ap_v_info);
+RTS_RET_INFO(stg_ap_f_info);
+RTS_RET_INFO(stg_ap_d_info);
+RTS_RET_INFO(stg_ap_l_info);
+RTS_RET_INFO(stg_ap_n_info);
+RTS_RET_INFO(stg_ap_p_info);
+RTS_RET_INFO(stg_ap_pv_info);
+RTS_RET_INFO(stg_ap_pp_info);
+RTS_RET_INFO(stg_ap_ppv_info);
+RTS_RET_INFO(stg_ap_ppp_info);
+RTS_RET_INFO(stg_ap_pppv_info);
+RTS_RET_INFO(stg_ap_pppp_info);
+RTS_RET_INFO(stg_ap_ppppp_info);
+RTS_RET_INFO(stg_ap_pppppp_info);
+
+RTS_ENTRY(stg_ap_v_ret);
+RTS_ENTRY(stg_ap_f_ret);
+RTS_ENTRY(stg_ap_d_ret);
+RTS_ENTRY(stg_ap_l_ret);
+RTS_ENTRY(stg_ap_n_ret);
+RTS_ENTRY(stg_ap_p_ret);
+RTS_ENTRY(stg_ap_pv_ret);
+RTS_ENTRY(stg_ap_pp_ret);
+RTS_ENTRY(stg_ap_ppv_ret);
+RTS_ENTRY(stg_ap_ppp_ret);
+RTS_ENTRY(stg_ap_pppv_ret);
+RTS_ENTRY(stg_ap_pppp_ret);
+RTS_ENTRY(stg_ap_ppppp_ret);
+RTS_ENTRY(stg_ap_pppppp_ret);
+
+RTS_FUN(stg_ap_0_fast);
+RTS_FUN(stg_ap_v_fast);
+RTS_FUN(stg_ap_f_fast);
+RTS_FUN(stg_ap_d_fast);
+RTS_FUN(stg_ap_l_fast);
+RTS_FUN(stg_ap_n_fast);
+RTS_FUN(stg_ap_p_fast);
+RTS_FUN(stg_ap_pv_fast);
+RTS_FUN(stg_ap_pp_fast);
+RTS_FUN(stg_ap_ppv_fast);
+RTS_FUN(stg_ap_ppp_fast);
+RTS_FUN(stg_ap_pppv_fast);
+RTS_FUN(stg_ap_pppp_fast);
+RTS_FUN(stg_ap_ppppp_fast);
+RTS_FUN(stg_ap_pppppp_fast);
+RTS_FUN(stg_PAP_apply);
+
+/* standard GC & stack check entry points, all defined in HeapStackCheck.hc */
+
+RTS_RET_INFO(stg_enter_info);
+RTS_ENTRY(stg_enter_ret);
+
+RTS_RET_INFO(stg_gc_void_info);
+RTS_ENTRY(stg_gc_void_ret);
+
+RTS_FUN(__stg_gc_enter_1);
+
+RTS_FUN(stg_gc_noregs);
+
+RTS_RET_INFO(stg_gc_unpt_r1_info);
+RTS_ENTRY(stg_gc_unpt_r1_ret);
+RTS_FUN(stg_gc_unpt_r1);
+
+RTS_RET_INFO(stg_gc_unbx_r1_info);
+RTS_ENTRY(stg_gc_unbx_r1_ret);
+RTS_FUN(stg_gc_unbx_r1);
+
+RTS_RET_INFO(stg_gc_f1_info);
+RTS_ENTRY(stg_gc_f1_ret);
+RTS_FUN(stg_gc_f1);
+
+RTS_RET_INFO(stg_gc_d1_info);
+RTS_ENTRY(stg_gc_d1_ret);
+RTS_FUN(stg_gc_d1);
+
+RTS_RET_INFO(stg_gc_l1_info);
+RTS_ENTRY(stg_gc_l1_ret);
+RTS_FUN(stg_gc_l1);
+
+RTS_FUN(__stg_gc_fun);
+RTS_RET_INFO(stg_gc_fun_info);
+RTS_ENTRY(stg_gc_fun_ret);
+
+RTS_RET_INFO(stg_gc_gen_info);
+RTS_ENTRY(stg_gc_gen_ret);
+RTS_FUN(stg_gc_gen);
+
+RTS_ENTRY(stg_ut_1_0_unreg_ret);
+RTS_RET_INFO(stg_ut_1_0_unreg_info);
+
+RTS_FUN(stg_gc_gen_hp);
+RTS_FUN(stg_gc_ut);
+RTS_FUN(stg_gen_yield);
+RTS_FUN(stg_yield_noregs);
+RTS_FUN(stg_yield_to_interpreter);
+RTS_FUN(stg_gen_block);
+RTS_FUN(stg_block_noregs);
+RTS_FUN(stg_block_1);
+RTS_FUN(stg_block_blackhole);
+RTS_FUN(stg_block_blackhole_finally);
+RTS_FUN(stg_block_takemvar);
+RTS_ENTRY(stg_block_takemvar_ret);
+RTS_FUN(stg_block_putmvar);
+RTS_ENTRY(stg_block_putmvar_ret);
+#ifdef mingw32_HOST_OS
+RTS_FUN(stg_block_async);
+RTS_ENTRY(stg_block_async_ret);
+RTS_FUN(stg_block_async_void);
+RTS_ENTRY(stg_block_async_void_ret);
+#endif
+RTS_FUN(stg_block_stmwait);
+RTS_FUN(stg_block_throwto);
+RTS_ENTRY(stg_block_throwto_ret);
+RTS_RET_INFO(stg_block_throwto_info);
+
+/* Entry/exit points from StgStartup.cmm */
+
+RTS_RET_INFO(stg_stop_thread_info);
+RTS_ENTRY(stg_stop_thread_ret);
+
+RTS_FUN(stg_returnToStackTop);
+RTS_FUN(stg_returnToSched);
+RTS_FUN(stg_returnToSchedNotPaused);
+RTS_FUN(stg_returnToSchedButFirst);
+RTS_FUN(stg_threadFinished);
+
+RTS_FUN(stg_init_finish);
+RTS_FUN(stg_init);
+
+RTS_FUN(StgReturn);
+
+/* -----------------------------------------------------------------------------
+ PrimOps
+ -------------------------------------------------------------------------- */
+
+RTS_FUN(plusIntegerzh_fast);
+RTS_FUN(minusIntegerzh_fast);
+RTS_FUN(timesIntegerzh_fast);
+RTS_FUN(gcdIntegerzh_fast);
+RTS_FUN(quotRemIntegerzh_fast);
+RTS_FUN(quotIntegerzh_fast);
+RTS_FUN(remIntegerzh_fast);
+RTS_FUN(divExactIntegerzh_fast);
+RTS_FUN(divModIntegerzh_fast);
+
+RTS_FUN(cmpIntegerIntzh_fast);
+RTS_FUN(cmpIntegerzh_fast);
+RTS_FUN(integer2Intzh_fast);
+RTS_FUN(integer2Wordzh_fast);
+RTS_FUN(gcdIntegerIntzh_fast);
+RTS_FUN(gcdIntzh_fast);
+
+RTS_FUN(int2Integerzh_fast);
+RTS_FUN(word2Integerzh_fast);
+
+RTS_FUN(decodeFloatzuIntzh_fast);
+RTS_FUN(decodeDoublezh_fast);
+RTS_FUN(decodeDoublezu2Intzh_fast);
+
+RTS_FUN(andIntegerzh_fast);
+RTS_FUN(orIntegerzh_fast);
+RTS_FUN(xorIntegerzh_fast);
+RTS_FUN(complementIntegerzh_fast);
+
+#if SIZEOF_HSINT == 4
+
+RTS_FUN(int64ToIntegerzh_fast);
+RTS_FUN(word64ToIntegerzh_fast);
+
+#endif
+
+RTS_FUN(unsafeThawArrayzh_fast);
+RTS_FUN(newByteArrayzh_fast);
+RTS_FUN(newPinnedByteArrayzh_fast);
+RTS_FUN(newAlignedPinnedByteArrayzh_fast);
+RTS_FUN(newArrayzh_fast);
+
+RTS_FUN(newMutVarzh_fast);
+RTS_FUN(atomicModifyMutVarzh_fast);
+
+RTS_FUN(isEmptyMVarzh_fast);
+RTS_FUN(newMVarzh_fast);
+RTS_FUN(takeMVarzh_fast);
+RTS_FUN(putMVarzh_fast);
+RTS_FUN(tryTakeMVarzh_fast);
+RTS_FUN(tryPutMVarzh_fast);
+
+RTS_FUN(waitReadzh_fast);
+RTS_FUN(waitWritezh_fast);
+RTS_FUN(delayzh_fast);
+#ifdef mingw32_HOST_OS
+RTS_FUN(asyncReadzh_fast);
+RTS_FUN(asyncWritezh_fast);
+RTS_FUN(asyncDoProczh_fast);
+#endif
+
+RTS_FUN(catchzh_fast);
+RTS_FUN(raisezh_fast);
+RTS_FUN(raiseIOzh_fast);
+
+RTS_FUN(makeStableNamezh_fast);
+RTS_FUN(makeStablePtrzh_fast);
+RTS_FUN(deRefStablePtrzh_fast);
+
+RTS_FUN(forkzh_fast);
+RTS_FUN(forkOnzh_fast);
+RTS_FUN(yieldzh_fast);
+RTS_FUN(killThreadzh_fast);
+RTS_FUN(asyncExceptionsBlockedzh_fast);
+RTS_FUN(blockAsyncExceptionszh_fast);
+RTS_FUN(unblockAsyncExceptionszh_fast);
+RTS_FUN(myThreadIdzh_fast);
+RTS_FUN(labelThreadzh_fast);
+RTS_FUN(isCurrentThreadBoundzh_fast);
+RTS_FUN(threadStatuszh_fast);
+
+RTS_FUN(mkWeakzh_fast);
+RTS_FUN(mkWeakForeignzh_fast);
+RTS_FUN(mkWeakForeignEnvzh_fast);
+RTS_FUN(finalizzeWeakzh_fast);
+RTS_FUN(deRefWeakzh_fast);
+
+RTS_FUN(newBCOzh_fast);
+RTS_FUN(mkApUpd0zh_fast);
+
+RTS_FUN(retryzh_fast);
+RTS_FUN(catchRetryzh_fast);
+RTS_FUN(catchSTMzh_fast);
+RTS_FUN(atomicallyzh_fast);
+RTS_FUN(newTVarzh_fast);
+RTS_FUN(readTVarzh_fast);
+RTS_FUN(readTVarIOzh_fast);
+RTS_FUN(writeTVarzh_fast);
+RTS_FUN(checkzh_fast);
+
+RTS_FUN(unpackClosurezh_fast);
+RTS_FUN(getApStackValzh_fast);
+RTS_FUN(getSparkzh_fast);
+
+RTS_FUN(noDuplicatezh_fast);
+
+RTS_FUN(traceCcszh_fast);
+
+/* Other misc stuff */
+// See wiki:Commentary/Compiler/Backends/PprC#Prototypes
+
+#if IN_STG_CODE && !IN_STGCRUN
+
+// Interpreter.c
+extern StgWord rts_stop_next_breakpoint[];
+extern StgWord rts_stop_on_exception[];
+extern StgWord rts_breakpoint_io_action[];
+
+// Schedule.c
+extern StgWord RTS_VAR(blocked_queue_hd), RTS_VAR(blocked_queue_tl);
+extern StgWord RTS_VAR(sleeping_queue);
+extern StgWord RTS_VAR(blackhole_queue);
+extern StgWord RTS_VAR(sched_mutex);
+
+// Apply.cmm
+// canned bitmap for each arg type
+extern StgWord stg_arg_bitmaps[];
+extern StgWord stg_ap_stack_entries[];
+extern StgWord stg_stack_save_entries[];
+
+// Storage.c
+extern unsigned int RTS_VAR(alloc_blocks);
+extern unsigned int RTS_VAR(alloc_blocks_lim);
+extern StgWord RTS_VAR(weak_ptr_list);
+extern StgWord RTS_VAR(atomic_modify_mutvar_mutex);
+
+// RtsFlags
+extern StgWord RTS_VAR(RtsFlags); // bogus type
+
+// Stable.c
+extern StgWord RTS_VAR(stable_ptr_table);
+
+// Profiling.c
+extern unsigned int RTS_VAR(era);
+extern StgWord RTS_VAR(CCCS); /* current CCS */
+extern unsigned int RTS_VAR(entering_PAP);
+extern StgWord RTS_VAR(CC_LIST); /* registered CC list */
+extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */
+extern unsigned int RTS_VAR(CC_ID); /* global ids */
+extern unsigned int RTS_VAR(CCS_ID);
+
+#endif
+
+#endif /* STGMISCCLOSURES_H */
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
new file mode 100644
index 0000000000..fb26254d5a
--- /dev/null
+++ b/includes/stg/Regs.h
@@ -0,0 +1,667 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Registers in the STG machine.
+ *
+ * The STG machine has a collection of "registers", each one of which
+ * may or may not correspond to an actual machine register when
+ * running code.
+ *
+ * The register set is backed by a table in memory (struct
+ * StgRegTable). If a particular STG register is not mapped to a
+ * machine register, then the apprpriate slot in this table is used
+ * instead.
+ *
+ * This table is itself pointed to by another register, BaseReg. If
+ * BaseReg is not in a machine register, then the register table is
+ * used from an absolute location (MainCapability).
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef REGS_H
+#define REGS_H
+
+typedef struct {
+ StgWord stgEagerBlackholeInfo;
+ StgFunPtr stgGCEnter1;
+ StgFunPtr stgGCFun;
+} StgFunTable;
+
+/*
+ * Vanilla registers are given this union type, which is purely so
+ * that we can cast the vanilla reg to a variety of types with the
+ * minimum of syntax. eg. R1.w instead of (StgWord)R1.
+ */
+typedef union {
+ StgWord w;
+ StgAddr a;
+ StgChar c;
+ StgFloat f;
+ StgInt i;
+ StgPtr p;
+} StgUnion;
+
+/*
+ * This is the table that holds shadow-locations for all the STG
+ * registers. The shadow locations are used when:
+ *
+ * 1) the particular register isn't mapped to a real machine
+ * register, probably because there's a shortage of real registers.
+ * 2) caller-saves registers are saved across a CCall
+ */
+typedef struct StgRegTable_ {
+ StgUnion rR1;
+ StgUnion rR2;
+ StgUnion rR3;
+ StgUnion rR4;
+ StgUnion rR5;
+ StgUnion rR6;
+ StgUnion rR7;
+ StgUnion rR8;
+ StgUnion rR9; /* used occasionally by heap/stack checks */
+ StgUnion rR10; /* used occasionally by heap/stack checks */
+ StgFloat rF1;
+ StgFloat rF2;
+ StgFloat rF3;
+ StgFloat rF4;
+ StgDouble rD1;
+ StgDouble rD2;
+ StgWord64 rL1;
+ StgPtr rSp;
+ StgPtr rSpLim;
+ StgPtr rHp;
+ StgPtr rHpLim;
+ struct StgTSO_ *rCurrentTSO;
+ struct step_ *rNursery;
+ struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
+ struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */
+ StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
+ StgWord rRet; // holds the return code of the thread
+} StgRegTable;
+
+#if IN_STG_CODE
+
+/*
+ * Registers Hp and HpLim are global across the entire system, and are
+ * copied into the RegTable before executing a thread.
+ *
+ * Registers Sp and SpLim are saved in the TSO for the
+ * thread, but are copied into the RegTable before executing a thread.
+ *
+ * All other registers are "general purpose", and are used for passing
+ * arguments to functions, and returning values. The code generator
+ * knows how many of these are in real registers, and avoids
+ * generating code that uses non-real registers. General purpose
+ * registers are never saved when returning to the scheduler, instead
+ * we save whatever is live at the time on the stack, and restore it
+ * later. This should reduce the context switch time, amongst other
+ * things.
+ *
+ * For argument passing, the stack will be used in preference to
+ * pseudo-registers if the architecture has too few general purpose
+ * registers.
+ *
+ * Some special RTS functions like newArray and the Integer primitives
+ * expect their arguments to be in registers R1-Rn, so we use these
+ * (pseudo-)registers in those cases.
+ */
+
+/*
+ * Locations for saving per-thread registers.
+ */
+
+#define SAVE_Sp (CurrentTSO->sp)
+#define SAVE_SpLim (CurrentTSO->splim)
+
+#define SAVE_Hp (BaseReg->rHp)
+
+#define SAVE_CurrentTSO (BaseReg->rCurrentTSO)
+#define SAVE_CurrentNursery (BaseReg->rCurrentNursery)
+#define SAVE_HpAlloc (BaseReg->rHpAlloc)
+
+/* We sometimes need to save registers across a C-call, eg. if they
+ * are clobbered in the standard calling convention. We define the
+ * save locations for all registers in the register table.
+ */
+
+#define SAVE_R1 (BaseReg->rR1)
+#define SAVE_R2 (BaseReg->rR2)
+#define SAVE_R3 (BaseReg->rR3)
+#define SAVE_R4 (BaseReg->rR4)
+#define SAVE_R5 (BaseReg->rR5)
+#define SAVE_R6 (BaseReg->rR6)
+#define SAVE_R7 (BaseReg->rR7)
+#define SAVE_R8 (BaseReg->rR8)
+
+#define SAVE_F1 (BaseReg->rF1)
+#define SAVE_F2 (BaseReg->rF2)
+#define SAVE_F3 (BaseReg->rF3)
+#define SAVE_F4 (BaseReg->rF4)
+
+#define SAVE_D1 (BaseReg->rD1)
+#define SAVE_D2 (BaseReg->rD2)
+
+#define SAVE_L1 (BaseReg->rL1)
+
+/* -----------------------------------------------------------------------------
+ * Emit the GCC-specific register declarations for each machine
+ * register being used. If any STG register isn't mapped to a machine
+ * register, then map it to an offset from BaseReg.
+ *
+ * First, the general purpose registers. The idea is, if a particular
+ * general-purpose STG register can't be mapped to a real machine
+ * register, it won't be used at all. Instead, we'll use the stack.
+ *
+ * This is an improvement on the way things used to be done, when all
+ * registers were mapped to locations in the register table, and stuff
+ * was being shifted from the stack to the register table and back
+ * again for no good reason (on register-poor architectures).
+ */
+
+/* define NO_REGS to omit register declarations - used in RTS C code
+ * that needs all the STG definitions but not the global register
+ * settings.
+ */
+#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
+
+#if defined(REG_R1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R1,REG_R1)
+#else
+# define R1 (BaseReg->rR1)
+#endif
+
+#if defined(REG_R2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R2,REG_R2)
+#else
+# define R2 (BaseReg->rR2)
+#endif
+
+#if defined(REG_R3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R3,REG_R3)
+#else
+# define R3 (BaseReg->rR3)
+#endif
+
+#if defined(REG_R4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R4,REG_R4)
+#else
+# define R4 (BaseReg->rR4)
+#endif
+
+#if defined(REG_R5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R5,REG_R5)
+#else
+# define R5 (BaseReg->rR5)
+#endif
+
+#if defined(REG_R6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R6,REG_R6)
+#else
+# define R6 (BaseReg->rR6)
+#endif
+
+#if defined(REG_R7) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R7,REG_R7)
+#else
+# define R7 (BaseReg->rR7)
+#endif
+
+#if defined(REG_R8) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R8,REG_R8)
+#else
+# define R8 (BaseReg->rR8)
+#endif
+
+#if defined(REG_R9) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R9,REG_R9)
+#else
+# define R9 (BaseReg->rR9)
+#endif
+
+#if defined(REG_R10) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R10,REG_R10)
+#else
+# define R10 (BaseReg->rR10)
+#endif
+
+#if defined(REG_F1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F1,REG_F1)
+#else
+#define F1 (BaseReg->rF1)
+#endif
+
+#if defined(REG_F2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F2,REG_F2)
+#else
+#define F2 (BaseReg->rF2)
+#endif
+
+#if defined(REG_F3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F3,REG_F3)
+#else
+#define F3 (BaseReg->rF3)
+#endif
+
+#if defined(REG_F4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F4,REG_F4)
+#else
+#define F4 (BaseReg->rF4)
+#endif
+
+#if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D1,REG_D1)
+#else
+#define D1 (BaseReg->rD1)
+#endif
+
+#if defined(REG_D2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D2,REG_D2)
+#else
+#define D2 (BaseReg->rD2)
+#endif
+
+#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
+#else
+#define L1 (BaseReg->rL1)
+#endif
+
+/*
+ * If BaseReg isn't mapped to a machine register, just use the global
+ * address of the current register table (CurrentRegTable in
+ * concurrent Haskell, MainRegTable otherwise).
+ */
+
+/* A capability is a combination of a FunTable and a RegTable. In STG
+ * code, BaseReg normally points to the RegTable portion of this
+ * structure, so that we can index both forwards and backwards to take
+ * advantage of shorter instruction forms on some archs (eg. x86).
+ * This is a cut-down version of the Capability structure; the full
+ * version is defined in Capability.h.
+ */
+struct PartCapability_ {
+ StgFunTable f;
+ StgRegTable r;
+};
+
+/* No such thing as a MainCapability under THREADED_RTS - each thread must have
+ * its own Capability.
+ */
+#if IN_STG_CODE && !(defined(THREADED_RTS) && !defined(NOSMP))
+extern W_ MainCapability[];
+#endif
+
+/*
+ * Assigning to BaseReg (the ASSIGN_BaseReg macro): this happens on
+ * return from a "safe" foreign call, when the thread might be running
+ * on a new Capability. Obviously if BaseReg is not a register, then
+ * we are restricted to a single Capability (this invariant is enforced
+ * in Capability.c:initCapabilities), and assigning to BaseReg can be omitted.
+ */
+
+#if defined(REG_Base) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base)
+#define ASSIGN_BaseReg(e) (BaseReg = (e))
+#else
+#if defined(THREADED_RTS) && !defined(NOSMP)
+#error BaseReg must be in a register for THREADED_RTS
+#endif
+#define BaseReg (&((struct PartCapability_ *)MainCapability)->r)
+#define ASSIGN_BaseReg(e) (e)
+#endif
+
+#if defined(REG_Sp) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,Sp,REG_Sp)
+#else
+#define Sp (BaseReg->rSp)
+#endif
+
+#if defined(REG_SpLim) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,SpLim,REG_SpLim)
+#else
+#define SpLim (BaseReg->rSpLim)
+#endif
+
+#if defined(REG_Hp) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,Hp,REG_Hp)
+#else
+#define Hp (BaseReg->rHp)
+#endif
+
+#if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS)
+#error HpLim cannot be in a register
+#else
+#define HpLim (BaseReg->rHpLim)
+#endif
+
+#if defined(REG_CurrentTSO) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(struct _StgTSO *,CurrentTSO,REG_CurrentTSO)
+#else
+#define CurrentTSO (BaseReg->rCurrentTSO)
+#endif
+
+#if defined(REG_CurrentNursery) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(bdescr *,CurrentNursery,REG_CurrentNursery)
+#else
+#define CurrentNursery (BaseReg->rCurrentNursery)
+#endif
+
+#if defined(REG_HpAlloc) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
+#else
+#define HpAlloc (BaseReg->rHpAlloc)
+#endif
+
+/* -----------------------------------------------------------------------------
+ Get absolute function pointers from the register table, to save
+ code space. On x86,
+
+ jmp *-12(%ebx)
+
+ is shorter than
+
+ jmp absolute_address
+
+ as long as the offset is within the range of a signed byte
+ (-128..+127). So we pick some common absolute_addresses and put
+ them in the register table. As a bonus, linking time should also
+ be reduced.
+
+ Other possible candidates in order of importance:
+
+ stg_upd_frame_info
+ stg_CAF_BLACKHOLE_info
+ stg_IND_STATIC_info
+
+ anything else probably isn't worth the effort.
+
+ -------------------------------------------------------------------------- */
+
+
+#define FunReg ((StgFunTable *)((void *)BaseReg - STG_FIELD_OFFSET(struct PartCapability_, r)))
+
+#define stg_EAGER_BLACKHOLE_info (FunReg->stgEagerBlackholeInfo)
+#define stg_gc_enter_1 (FunReg->stgGCEnter1)
+#define stg_gc_fun (FunReg->stgGCFun)
+
+/* -----------------------------------------------------------------------------
+ For any registers which are denoted "caller-saves" by the C calling
+ convention, we have to emit code to save and restore them across C
+ calls.
+ -------------------------------------------------------------------------- */
+
+#ifdef CALLER_SAVES_R1
+#define CALLER_SAVE_R1 SAVE_R1 = R1;
+#define CALLER_RESTORE_R1 R1 = SAVE_R1;
+#else
+#define CALLER_SAVE_R1 /* nothing */
+#define CALLER_RESTORE_R1 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R2
+#define CALLER_SAVE_R2 SAVE_R2 = R2;
+#define CALLER_RESTORE_R2 R2 = SAVE_R2;
+#else
+#define CALLER_SAVE_R2 /* nothing */
+#define CALLER_RESTORE_R2 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R3
+#define CALLER_SAVE_R3 SAVE_R3 = R3;
+#define CALLER_RESTORE_R3 R3 = SAVE_R3;
+#else
+#define CALLER_SAVE_R3 /* nothing */
+#define CALLER_RESTORE_R3 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R4
+#define CALLER_SAVE_R4 SAVE_R4 = R4;
+#define CALLER_RESTORE_R4 R4 = SAVE_R4;
+#else
+#define CALLER_SAVE_R4 /* nothing */
+#define CALLER_RESTORE_R4 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R5
+#define CALLER_SAVE_R5 SAVE_R5 = R5;
+#define CALLER_RESTORE_R5 R5 = SAVE_R5;
+#else
+#define CALLER_SAVE_R5 /* nothing */
+#define CALLER_RESTORE_R5 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R6
+#define CALLER_SAVE_R6 SAVE_R6 = R6;
+#define CALLER_RESTORE_R6 R6 = SAVE_R6;
+#else
+#define CALLER_SAVE_R6 /* nothing */
+#define CALLER_RESTORE_R6 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R7
+#define CALLER_SAVE_R7 SAVE_R7 = R7;
+#define CALLER_RESTORE_R7 R7 = SAVE_R7;
+#else
+#define CALLER_SAVE_R7 /* nothing */
+#define CALLER_RESTORE_R7 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R8
+#define CALLER_SAVE_R8 SAVE_R8 = R8;
+#define CALLER_RESTORE_R8 R8 = SAVE_R8;
+#else
+#define CALLER_SAVE_R8 /* nothing */
+#define CALLER_RESTORE_R8 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R9
+#define CALLER_SAVE_R9 SAVE_R9 = R9;
+#define CALLER_RESTORE_R9 R9 = SAVE_R9;
+#else
+#define CALLER_SAVE_R9 /* nothing */
+#define CALLER_RESTORE_R9 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_R10
+#define CALLER_SAVE_R10 SAVE_R10 = R10;
+#define CALLER_RESTORE_R10 R10 = SAVE_R10;
+#else
+#define CALLER_SAVE_R10 /* nothing */
+#define CALLER_RESTORE_R10 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_F1
+#define CALLER_SAVE_F1 SAVE_F1 = F1;
+#define CALLER_RESTORE_F1 F1 = SAVE_F1;
+#else
+#define CALLER_SAVE_F1 /* nothing */
+#define CALLER_RESTORE_F1 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_F2
+#define CALLER_SAVE_F2 SAVE_F2 = F2;
+#define CALLER_RESTORE_F2 F2 = SAVE_F2;
+#else
+#define CALLER_SAVE_F2 /* nothing */
+#define CALLER_RESTORE_F2 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_F3
+#define CALLER_SAVE_F3 SAVE_F3 = F3;
+#define CALLER_RESTORE_F3 F3 = SAVE_F3;
+#else
+#define CALLER_SAVE_F3 /* nothing */
+#define CALLER_RESTORE_F3 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_F4
+#define CALLER_SAVE_F4 SAVE_F4 = F4;
+#define CALLER_RESTORE_F4 F4 = SAVE_F4;
+#else
+#define CALLER_SAVE_F4 /* nothing */
+#define CALLER_RESTORE_F4 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_D1
+#define CALLER_SAVE_D1 SAVE_D1 = D1;
+#define CALLER_RESTORE_D1 D1 = SAVE_D1;
+#else
+#define CALLER_SAVE_D1 /* nothing */
+#define CALLER_RESTORE_D1 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_D2
+#define CALLER_SAVE_D2 SAVE_D2 = D2;
+#define CALLER_RESTORE_D2 D2 = SAVE_D2;
+#else
+#define CALLER_SAVE_D2 /* nothing */
+#define CALLER_RESTORE_D2 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_L1
+#define CALLER_SAVE_L1 SAVE_L1 = L1;
+#define CALLER_RESTORE_L1 L1 = SAVE_L1;
+#else
+#define CALLER_SAVE_L1 /* nothing */
+#define CALLER_RESTORE_L1 /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_Sp
+#define CALLER_SAVE_Sp SAVE_Sp = Sp;
+#define CALLER_RESTORE_Sp Sp = SAVE_Sp;
+#else
+#define CALLER_SAVE_Sp /* nothing */
+#define CALLER_RESTORE_Sp /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_SpLim
+#define CALLER_SAVE_SpLim SAVE_SpLim = SpLim;
+#define CALLER_RESTORE_SpLim SpLim = SAVE_SpLim;
+#else
+#define CALLER_SAVE_SpLim /* nothing */
+#define CALLER_RESTORE_SpLim /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_Hp
+#define CALLER_SAVE_Hp SAVE_Hp = Hp;
+#define CALLER_RESTORE_Hp Hp = SAVE_Hp;
+#else
+#define CALLER_SAVE_Hp /* nothing */
+#define CALLER_RESTORE_Hp /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_Base
+#ifdef THREADED_RTS
+#error "Can't have caller-saved BaseReg with THREADED_RTS"
+#endif
+#define CALLER_SAVE_Base /* nothing */
+#define CALLER_RESTORE_Base BaseReg = &MainRegTable;
+#else
+#define CALLER_SAVE_Base /* nothing */
+#define CALLER_RESTORE_Base /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_CurrentTSO
+#define CALLER_SAVE_CurrentTSO SAVE_CurrentTSO = CurrentTSO;
+#define CALLER_RESTORE_CurrentTSO CurrentTSO = SAVE_CurrentTSO;
+#else
+#define CALLER_SAVE_CurrentTSO /* nothing */
+#define CALLER_RESTORE_CurrentTSO /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_CurrentNursery
+#define CALLER_SAVE_CurrentNursery SAVE_CurrentNursery = CurrentNursery;
+#define CALLER_RESTORE_CurrentNursery CurrentNursery = SAVE_CurrentNursery;
+#else
+#define CALLER_SAVE_CurrentNursery /* nothing */
+#define CALLER_RESTORE_CurrentNursery /* nothing */
+#endif
+
+#ifdef CALLER_SAVES_HpAlloc
+#define CALLER_SAVE_HpAlloc SAVE_HpAlloc = HpAlloc;
+#define CALLER_RESTORE_HpAlloc HpAlloc = SAVE_HpAlloc;
+#else
+#define CALLER_SAVE_HpAlloc /* nothing */
+#define CALLER_RESTORE_HpAlloc /* nothing */
+#endif
+
+#endif /* IN_STG_CODE */
+
+/* ----------------------------------------------------------------------------
+ Handy bunches of saves/restores
+ ------------------------------------------------------------------------ */
+
+#if IN_STG_CODE
+
+#define CALLER_SAVE_USER \
+ CALLER_SAVE_R1 \
+ CALLER_SAVE_R2 \
+ CALLER_SAVE_R3 \
+ CALLER_SAVE_R4 \
+ CALLER_SAVE_R5 \
+ CALLER_SAVE_R6 \
+ CALLER_SAVE_R7 \
+ CALLER_SAVE_R8 \
+ CALLER_SAVE_F1 \
+ CALLER_SAVE_F2 \
+ CALLER_SAVE_F3 \
+ CALLER_SAVE_F4 \
+ CALLER_SAVE_D1 \
+ CALLER_SAVE_D2 \
+ CALLER_SAVE_L1
+
+ /* Save Base last, since the others may
+ be addressed relative to it */
+#define CALLER_SAVE_SYSTEM \
+ CALLER_SAVE_Sp \
+ CALLER_SAVE_SpLim \
+ CALLER_SAVE_Hp \
+ CALLER_SAVE_CurrentTSO \
+ CALLER_SAVE_CurrentNursery \
+ CALLER_SAVE_Base
+
+#define CALLER_RESTORE_USER \
+ CALLER_RESTORE_R1 \
+ CALLER_RESTORE_R2 \
+ CALLER_RESTORE_R3 \
+ CALLER_RESTORE_R4 \
+ CALLER_RESTORE_R5 \
+ CALLER_RESTORE_R6 \
+ CALLER_RESTORE_R7 \
+ CALLER_RESTORE_R8 \
+ CALLER_RESTORE_F1 \
+ CALLER_RESTORE_F2 \
+ CALLER_RESTORE_F3 \
+ CALLER_RESTORE_F4 \
+ CALLER_RESTORE_D1 \
+ CALLER_RESTORE_D2 \
+ CALLER_RESTORE_L1
+
+ /* Restore Base first, since the others may
+ be addressed relative to it */
+#define CALLER_RESTORE_SYSTEM \
+ CALLER_RESTORE_Base \
+ CALLER_RESTORE_Sp \
+ CALLER_RESTORE_SpLim \
+ CALLER_RESTORE_Hp \
+ CALLER_RESTORE_CurrentTSO \
+ CALLER_RESTORE_CurrentNursery
+
+#else /* not IN_STG_CODE */
+
+#define CALLER_SAVE_USER /* nothing */
+#define CALLER_SAVE_SYSTEM /* nothing */
+#define CALLER_RESTORE_USER /* nothing */
+#define CALLER_RESTORE_SYSTEM /* nothing */
+
+#endif /* IN_STG_CODE */
+#define CALLER_SAVE_ALL \
+ CALLER_SAVE_SYSTEM \
+ CALLER_SAVE_USER
+
+#define CALLER_RESTORE_ALL \
+ CALLER_RESTORE_SYSTEM \
+ CALLER_RESTORE_USER
+
+#endif /* REGS_H */
diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h
new file mode 100644
index 0000000000..5d9d80169b
--- /dev/null
+++ b/includes/stg/SMP.h
@@ -0,0 +1,313 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2005-2008
+ *
+ * Macros for multi-CPU support
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef SMP_H
+#define SMP_H
+
+#if defined(THREADED_RTS)
+
+/* ----------------------------------------------------------------------------
+ Atomic operations
+ ------------------------------------------------------------------------- */
+
+#if !IN_STG_CODE || IN_STGCRUN
+// We only want the barriers, e.g. write_barrier(), declared in .hc
+// files. Defining the other inline functions here causes type
+// mismatch errors from gcc, because the generated C code is assuming
+// that there are no prototypes in scope.
+
+/*
+ * The atomic exchange operation: xchg(p,w) exchanges the value
+ * pointed to by p with the value w, returning the old value.
+ *
+ * Used for locking closures during updates (see lockClosure() below)
+ * and the MVar primops.
+ */
+EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
+
+/*
+ * Compare-and-swap. Atomically does this:
+ *
+ * cas(p,o,n) {
+ * r = *p;
+ * if (r == o) { *p = n };
+ * return r;
+ * }
+ */
+EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
+
+/*
+ * Atomic increment
+ *
+ * atomic_inc(p) {
+ * return ++(*p);
+ * }
+ */
+EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
+
+/*
+ * Atomic decrement
+ *
+ * atomic_dec(p) {
+ * return --(*p);
+ * }
+ */
+EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
+
+#endif // !IN_STG_CODE
+
+/*
+ * Various kinds of memory barrier.
+ * write_barrier: prevents future stores occurring before prededing stores.
+ * store_load_barrier: prevents future loads occurring before preceding stores.
+ * load_load_barrier: prevents future loads occurring before earlier stores.
+ *
+ * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
+ * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
+ *
+ * To check whether you got these right, try the test in
+ * testsuite/tests/ghc-regress/rts/testwsdeque.c
+ * This tests the work-stealing deque implementation, which relies on
+ * properly working store_load and load_load memory barriers.
+ */
+EXTERN_INLINE void write_barrier(void);
+EXTERN_INLINE void store_load_barrier(void);
+EXTERN_INLINE void load_load_barrier(void);
+
+/* ----------------------------------------------------------------------------
+ Implementations
+ ------------------------------------------------------------------------- */
+
+#if !IN_STG_CODE || IN_STGCRUN
+
+EXTERN_INLINE StgWord
+xchg(StgPtr p, StgWord w)
+{
+ StgWord result;
+#if i386_HOST_ARCH || x86_64_HOST_ARCH
+ result = w;
+ __asm__ __volatile__ (
+ // NB: the xchg instruction is implicitly locked, so we do not
+ // need a lock prefix here.
+ "xchg %1,%0"
+ :"+r" (result), "+m" (*p)
+ : /* no input-only operands */
+ );
+#elif powerpc_HOST_ARCH
+ __asm__ __volatile__ (
+ "1: lwarx %0, 0, %2\n"
+ " stwcx. %1, 0, %2\n"
+ " bne- 1b"
+ :"=&r" (result)
+ :"r" (w), "r" (p)
+ );
+#elif sparc_HOST_ARCH
+ result = w;
+ __asm__ __volatile__ (
+ "swap %1,%0"
+ : "+r" (result), "+m" (*p)
+ : /* no input-only operands */
+ );
+#elif !defined(WITHSMP)
+ result = *p;
+ *p = w;
+#else
+#error xchg() unimplemented on this architecture
+#endif
+ return result;
+}
+
+/*
+ * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
+ * in the STM implementation.
+ */
+EXTERN_INLINE StgWord
+cas(StgVolatilePtr p, StgWord o, StgWord n)
+{
+#if i386_HOST_ARCH || x86_64_HOST_ARCH
+ __asm__ __volatile__ (
+ "lock\ncmpxchg %3,%1"
+ :"=a"(o), "=m" (*(volatile unsigned int *)p)
+ :"0" (o), "r" (n));
+ return o;
+#elif powerpc_HOST_ARCH
+ StgWord result;
+ __asm__ __volatile__ (
+ "1: lwarx %0, 0, %3\n"
+ " cmpw %0, %1\n"
+ " bne 2f\n"
+ " stwcx. %2, 0, %3\n"
+ " bne- 1b\n"
+ "2:"
+ :"=&r" (result)
+ :"r" (o), "r" (n), "r" (p)
+ :"cc", "memory"
+ );
+ return result;
+#elif sparc_HOST_ARCH
+ __asm__ __volatile__ (
+ "cas [%1], %2, %0"
+ : "+r" (n)
+ : "r" (p), "r" (o)
+ : "memory"
+ );
+ return n;
+#elif !defined(WITHSMP)
+ StgWord result;
+ result = *p;
+ if (result == o) {
+ *p = n;
+ }
+ return result;
+#else
+#error cas() unimplemented on this architecture
+#endif
+}
+
+EXTERN_INLINE StgWord
+atomic_inc(StgVolatilePtr p)
+{
+#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ StgWord r;
+ r = 1;
+ __asm__ __volatile__ (
+ "lock\nxadd %0,%1":
+ "+r" (r), "+m" (*p):
+ );
+ return r+1;
+#else
+ StgWord old, new;
+ do {
+ old = *p;
+ new = old + 1;
+ } while (cas(p, old, new) != old);
+ return new;
+#endif
+}
+
+EXTERN_INLINE StgWord
+atomic_dec(StgVolatilePtr p)
+{
+#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ StgWord r;
+ r = (StgWord)-1;
+ __asm__ __volatile__ (
+ "lock\nxadd %0,%1":
+ "+r" (r), "+m" (*p):
+ );
+ return r-1;
+#else
+ StgWord old, new;
+ do {
+ old = *p;
+ new = old - 1;
+ } while (cas(p, old, new) != old);
+ return new;
+#endif
+}
+
+#endif // !IN_STG_CODE
+
+/*
+ * We need to tell both the compiler AND the CPU about the barriers.
+ * It's no good preventing the CPU from reordering the operations if
+ * the compiler has already done so - hence the "memory" restriction
+ * on each of the barriers below.
+ */
+EXTERN_INLINE void
+write_barrier(void) {
+#if i386_HOST_ARCH || x86_64_HOST_ARCH
+ __asm__ __volatile__ ("" : : : "memory");
+#elif powerpc_HOST_ARCH
+ __asm__ __volatile__ ("lwsync" : : : "memory");
+#elif sparc_HOST_ARCH
+ /* Sparc in TSO mode does not require store/store barriers. */
+ __asm__ __volatile__ ("" : : : "memory");
+#elif !defined(WITHSMP)
+ return;
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+EXTERN_INLINE void
+store_load_barrier(void) {
+#if i386_HOST_ARCH
+ __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
+#elif x86_64_HOST_ARCH
+ __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
+#elif powerpc_HOST_ARCH
+ __asm__ __volatile__ ("sync" : : : "memory");
+#elif sparc_HOST_ARCH
+ __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
+#elif !defined(WITHSMP)
+ return;
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+EXTERN_INLINE void
+load_load_barrier(void) {
+#if i386_HOST_ARCH
+ __asm__ __volatile__ ("" : : : "memory");
+#elif x86_64_HOST_ARCH
+ __asm__ __volatile__ ("" : : : "memory");
+#elif powerpc_HOST_ARCH
+ __asm__ __volatile__ ("lwsync" : : : "memory");
+#elif sparc_HOST_ARCH
+ /* Sparc in TSO mode does not require load/load barriers. */
+ __asm__ __volatile__ ("" : : : "memory");
+#elif !defined(WITHSMP)
+ return;
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+#else /* !THREADED_RTS */
+
+#define write_barrier() /* nothing */
+#define store_load_barrier() /* nothing */
+#define load_load_barrier() /* nothing */
+
+INLINE_HEADER StgWord
+xchg(StgPtr p, StgWord w)
+{
+ StgWord old = *p;
+ *p = w;
+ return old;
+}
+
+STATIC_INLINE StgWord
+cas(StgVolatilePtr p, StgWord o, StgWord n)
+{
+ StgWord result;
+ result = *p;
+ if (result == o) {
+ *p = n;
+ }
+ return result;
+}
+
+INLINE_HEADER StgWord
+atomic_inc(StgVolatilePtr p)
+{
+ return ++(*p);
+}
+
+INLINE_HEADER StgWord
+atomic_dec(StgVolatilePtr p)
+{
+ return --(*p);
+}
+
+#endif /* !THREADED_RTS */
+
+#endif /* SMP_H */
diff --git a/includes/stg/TailCalls.h b/includes/stg/TailCalls.h
new file mode 100644
index 0000000000..854c7b4b18
--- /dev/null
+++ b/includes/stg/TailCalls.h
@@ -0,0 +1,304 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Stuff for implementing proper tail jumps.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef TAILCALLS_H
+#define TAILCALLS_H
+
+/* -----------------------------------------------------------------------------
+ Unmangled tail-jumping: use the mini interpretter.
+ -------------------------------------------------------------------------- */
+
+#ifdef USE_MINIINTERPRETER
+
+#define JMP_(cont) return((StgFunPtr)(cont))
+#define FB_
+#define FE_
+
+#else
+
+extern void __DISCARD__(void);
+
+/* -----------------------------------------------------------------------------
+ Tail calling on x86
+ -------------------------------------------------------------------------- */
+
+#if i386_HOST_ARCH
+
+/* Note about discard: possibly there to fool GCC into clearing up
+ before we do the jump eg. if there are some arguments left on the C
+ stack that GCC hasn't popped yet. Also possibly to fool any
+ optimisations (a function call often acts as a barrier). Not sure
+ if any of this is necessary now -- SDM
+
+ Comment to above note: I don't think the __DISCARD__() in JMP_ is
+ necessary. Arguments should be popped from the C stack immediately
+ after returning from a function, as long as we pass -fno-defer-pop
+ to gcc. Moreover, a goto to a first-class label acts as a barrier
+ for optimisations in the same way a function call does.
+ -= chak
+ */
+
+/* The goto here seems to cause gcc -O2 to delete all the code after
+ it - including the FE_ marker and the epilogue code - exactly what
+ we want! -- SDM
+ */
+
+#define JMP_(cont) \
+ { \
+ void *__target; \
+ __DISCARD__(); \
+ __target = (void *)(cont); \
+ goto *__target; \
+ }
+
+#endif /* i386_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ Tail calling on x86_64
+ -------------------------------------------------------------------------- */
+
+#if x86_64_HOST_ARCH
+
+/*
+ NOTE about __DISCARD__():
+
+ On x86_64 this is necessary to work around bugs in the register
+ variable support in gcc. Without the __DISCARD__() call, gcc will
+ silently throw away assignements to global register variables that
+ happen before the jump.
+
+ Here's the example:
+
+ extern void g(void);
+ static void f(void) {
+ R1 = g;
+ __DISCARD__()
+ goto *R1;
+ }
+
+ without the dummy function call, gcc throws away the assignment to R1
+ (gcc 3.4.3) gcc bug #20359.
+*/
+
+#define JMP_(cont) \
+ { \
+ __DISCARD__(); \
+ goto *(void *)(cont); \
+ }
+
+#endif /* x86_64_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ Tail calling on Sparc
+ -------------------------------------------------------------------------- */
+
+#ifdef sparc_HOST_ARCH
+
+#define JMP_(cont) ((F_) (cont))()
+ /* Oh so happily, the above turns into a "call" instruction,
+ which, on a SPARC, is nothing but a "jmpl" with the
+ return address in %o7 [which we don't care about].
+ */
+
+/* Don't need these for sparc mangling */
+#define FB_
+#define FE_
+
+#endif /* sparc_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ Tail calling on Alpha
+ -------------------------------------------------------------------------- */
+
+#ifdef alpha_HOST_ARCH
+
+#if IN_STG_CODE
+register void *_procedure __asm__("$27");
+#endif
+
+#define JMP_(cont) \
+ do { _procedure = (void *)(cont); \
+ __DISCARD__(); \
+ goto *_procedure; \
+ } while(0)
+
+/* Don't need these for alpha mangling */
+#define FB_
+#define FE_
+
+#endif /* alpha_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ Tail calling on HP
+
+Description of HP's weird procedure linkage, many thanks to Andy Bennet
+<andy_bennett@hp.com>:
+
+I've been digging a little further into the problem of how HP-UX does
+dynamic procedure calls. My solution in the last e-mail inserting an extra
+'if' statement into the JMP_ I think is probably the best general solution I
+can come up with. There are still a few problems with it however: It wont
+work, if JMP_ ever has to call anything in a shared library, if this is
+likely to be required it'll need something more elaborate. It also wont work
+with PA-RISC 2.0 wide mode (64-bit) which uses a different format PLT.
+
+I had some feedback from someone in HP's compiler lab and the problem
+relates to the linker on HP-UX, not gcc as I first suspected. The reason the
+'hsc' executable works is most likely due to a change in 'ld's behaviour for
+performance reasons between your revision and mine.
+
+The major issue relating to this is shared libraries and how they are
+implented under HP-UX. The whole point of the Procedure Label Table (PLT) is
+to allow a function pointer to hold the address of the function and a
+pointer to the library's global data lookup table (DLT) used by position
+independent code (PIC). This makes the PLT absolutely essential for shared
+library calls. HP has two linker introduced assembly functions for dealing
+with dynamic calls, $$dyncall and $$dyncall_external. The former does a
+check to see if the address is a PLT pointer and dereferences if necessary
+or just calls the address otherwise; the latter skips the check and just
+does the indirect jump no matter what.
+
+Since $$dyncall_external runs faster due to its not having the test, the
+linker nowadays prefers to generate calls to that, rather than $$dyncall. It
+makes this decision based on the presence of any shared library. If it even
+smells an sl's existence at link time, it rigs the runtime system to
+generate PLT references for everything on the assumption that the result
+will be slightly more efficient. This is what is crashing GHC since the
+calls it is generating have no understanding of the procedure label proper.
+The only way to get real addresses is to link everything archive, including
+system libraries, at which point it assumes you probably are going to be
+using calls similar to GHC's (its rigged for HP's +ESfic compiler option)
+but uses $$dyncall if necessary to cope, just in case you aren't.
+
+ -------------------------------------------------------------------------- */
+
+#ifdef hppa1_1_hp_hpux_TARGET
+
+#define JMP_(cont) \
+ do { void *_procedure = (void *)(cont); \
+ if (((int) _procedure) & 2) \
+ _procedure = (void *)(*((int *) (_procedure - 2))); \
+ goto *_procedure; \
+ } while(0)
+
+#endif /* hppa1_1_hp_hpux_TARGET */
+
+/* -----------------------------------------------------------------------------
+ Tail calling on PowerPC
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc_HOST_ARCH
+
+#define JMP_(cont) \
+ { \
+ void *target; \
+ target = (void *)(cont); \
+ __DISCARD__(); \
+ goto *target; \
+ }
+
+/*
+ The __DISCARD__ is there because Apple's April 2002 Beta of GCC 3.1
+ sometimes generates incorrect code otherwise.
+ It tends to "forget" to update global register variables in the presence
+ of decrement/increment operators:
+ JMP_(*(--Sp)) is wrongly compiled as JMP_(Sp[-1]).
+ Calling __DISCARD__ in between works around this problem.
+*/
+
+/*
+ I would _love_ to use the following instead,
+ but some versions of Apple's GCC fail to generate code for it
+ if it is called for a casted data pointer - which is exactly what
+ we are going to do...
+
+ #define JMP_(cont) ((F_) (cont))()
+*/
+
+#endif /* powerpc_HOST_ARCH */
+
+#ifdef powerpc64_HOST_ARCH
+#define JMP_(cont) ((F_) (cont))()
+#endif
+
+/* -----------------------------------------------------------------------------
+ Tail calling on IA64
+ -------------------------------------------------------------------------- */
+
+#ifdef ia64_HOST_ARCH
+
+/* The compiler can more intelligently decide how to do this. We therefore
+ * implement it as a call and optimise to a jump at mangle time.
+ *
+ * Sometimes GCC likes to move instructions between the function call and
+ * the "--- TAILCALL ---". To stop it from finding instructions to put
+ * there, we insert a jump to the end of the function after the TAILCALL. */
+#define JMP_(cont) \
+ ((F_) (cont))(); \
+ __asm__ volatile ("--- TAILCALL ---"); \
+ goto _function_end;
+
+#define FE_ _function_end: __asm__ volatile ("--- END ---");
+
+/* Don't emit calls to __DISCARD__ as this causes hassles */
+#define __DISCARD__()
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ Tail calling on MIPS
+ -------------------------------------------------------------------------- */
+
+#ifdef mips_HOST_ARCH
+
+#if IN_STG_CODE
+register void *_procedure __asm__("$25");
+#endif
+
+#define JMP_(cont) \
+ { \
+ _procedure = (void *)(cont); \
+ __DISCARD__(); \
+ goto *_procedure; \
+ }
+
+/* Don't need these for MIPS mangling */
+#define FB_
+#define FE_
+
+#endif /* mips_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ FUNBEGIN and FUNEND.
+
+ These are markers indicating the start and end of Real Code in a
+ function. All instructions between the actual start and end of the
+ function and these markers is shredded by the mangler.
+ -------------------------------------------------------------------------- */
+
+/* The following __DISCARD__() has become necessary with gcc 2.96 on x86.
+ * It prevents gcc from moving stack manipulation code from the function
+ * body (aka the Real Code) into the function prologue, ie, from moving it
+ * over the --- BEGIN --- marker. It should be noted that (like some
+ * other black magic in GHC's code), there is no essential reason why gcc
+ * could not move some stack manipulation code across the __DISCARD__() -
+ * it just doesn't choose to do it at the moment.
+ * -= chak
+ */
+
+#ifndef FB_
+#define FB_ __asm__ volatile ("--- BEGIN ---"); __DISCARD__ ();
+#endif
+
+#ifndef FE_
+#define FE_ __asm__ volatile ("--- END ---");
+#endif
+
+#endif /* !USE_MINIINTERPRETER */
+
+#endif /* TAILCALLS_H */
diff --git a/includes/stg/Ticky.h b/includes/stg/Ticky.h
new file mode 100644
index 0000000000..fd7edf85c5
--- /dev/null
+++ b/includes/stg/Ticky.h
@@ -0,0 +1,188 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2007
+ *
+ * Declarations for counters used by ticky-ticky profiling.
+ *
+ * -------------------------------------------------------------------------- */
+
+
+#ifndef TICKYCOUNTERS_H
+#define TICKYCOUNTERS_H
+
+/* These should probably be automatically generated in order to
+ keep them consistent with the macros that use them (which are
+ defined in Cmm.h. */
+
+#ifdef TICKY_TICKY
+/* same trick as in the former StgTicky.h: recycle the same declarations
+ for both extern decls (which are included everywhere)
+ and initializations (which only happen once) */
+#ifdef TICKY_C
+#define INIT(ializer) = ializer
+#define EXTERN
+#else
+#define INIT(ializer)
+#define EXTERN extern
+#endif
+
+/* Here are all the counter declarations: */
+
+EXTERN StgInt ENT_VIA_NODE_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_THK_ctr INIT(0);
+EXTERN StgInt ENT_DYN_THK_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_FUN_DIRECT_ctr INIT(0);
+EXTERN StgInt ENT_DYN_FUN_DIRECT_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_CON_ctr INIT(0);
+EXTERN StgInt ENT_DYN_CON_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_IND_ctr INIT(0);
+EXTERN StgInt ENT_DYN_IND_ctr INIT(0);
+EXTERN StgInt ENT_PERM_IND_ctr INIT(0);
+EXTERN StgInt ENT_PAP_ctr INIT(0);
+EXTERN StgInt ENT_AP_ctr INIT(0);
+EXTERN StgInt ENT_AP_STACK_ctr INIT(0);
+EXTERN StgInt ENT_BH_ctr INIT(0);
+
+EXTERN StgInt UNKNOWN_CALL_ctr INIT(0);
+
+EXTERN StgInt SLOW_CALL_v_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_f_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_d_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_l_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_n_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_p_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_pv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_pp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_ppv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_ppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_pppv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_pppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_ppppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_pppppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_OTHER_ctr INIT(0);
+
+EXTERN StgInt ticky_slow_call_unevald;
+EXTERN StgInt SLOW_CALL_ctr INIT(0);
+EXTERN StgInt MULTI_CHUNK_SLOW_CALL_ctr INIT(0);
+EXTERN StgInt MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_TOO_FEW_ARGS_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_EXTRA_ARGS_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_TOO_FEW_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_CORRECT_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_TOO_MANY_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_TOO_FEW_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_CORRECT_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_TOO_MANY_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_UNEVALD_ctr INIT(0);
+
+
+EXTERN StgInt UPDF_OMITTED_ctr INIT(0);
+EXTERN StgInt UPDF_PUSHED_ctr INIT(0);
+EXTERN StgInt CATCHF_PUSHED_ctr INIT(0);
+EXTERN StgInt UPDF_RCC_PUSHED_ctr INIT(0);
+EXTERN StgInt UPDF_RCC_OMITTED_ctr INIT(0);
+
+EXTERN StgInt UPD_SQUEEZED_ctr INIT(0);
+EXTERN StgInt UPD_CON_IN_NEW_ctr INIT(0);
+EXTERN StgInt UPD_CON_IN_PLACE_ctr INIT(0);
+EXTERN StgInt UPD_PAP_IN_NEW_ctr INIT(0);
+EXTERN StgInt UPD_PAP_IN_PLACE_ctr INIT(0);
+
+EXTERN StgInt ALLOC_HEAP_ctr INIT(0);
+EXTERN StgInt ALLOC_HEAP_tot;
+
+EXTERN StgInt ALLOC_FUN_ctr INIT(0);
+EXTERN StgInt ALLOC_FUN_adm;
+EXTERN StgInt ALLOC_FUN_gds;
+EXTERN StgInt ALLOC_FUN_slp;
+
+EXTERN StgInt UPD_NEW_IND_ctr INIT(0);
+EXTERN StgInt UPD_NEW_PERM_IND_ctr INIT(0);
+EXTERN StgInt UPD_OLD_IND_ctr INIT(0);
+EXTERN StgInt UPD_OLD_PERM_IND_ctr INIT(0);
+
+EXTERN StgInt UPD_BH_UPDATABLE_ctr INIT(0);
+EXTERN StgInt UPD_BH_SINGLE_ENTRY_ctr INIT(0);
+EXTERN StgInt UPD_CAF_BH_UPDATABLE_ctr INIT(0);
+EXTERN StgInt UPD_CAF_BH_SINGLE_ENTRY_ctr INIT(0);
+
+EXTERN StgInt GC_SEL_ABANDONED_ctr INIT(0);
+EXTERN StgInt GC_SEL_MINOR_ctr INIT(0);
+EXTERN StgInt GC_SEL_MAJOR_ctr INIT(0);
+
+EXTERN StgInt GC_FAILED_PROMOTION_ctr INIT(0);
+
+EXTERN StgInt GC_WORDS_COPIED_ctr INIT(0);
+
+EXTERN StgInt ALLOC_UP_THK_ctr INIT(0);
+EXTERN StgInt ALLOC_SE_THK_ctr INIT(0);
+EXTERN StgInt ALLOC_THK_adm INIT(0);
+EXTERN StgInt ALLOC_THK_gds INIT(0);
+EXTERN StgInt ALLOC_THK_slp INIT(0);
+
+EXTERN StgInt ALLOC_CON_ctr INIT(0);
+EXTERN StgInt ALLOC_CON_adm INIT(0);
+EXTERN StgInt ALLOC_CON_gds INIT(0);
+EXTERN StgInt ALLOC_CON_slp INIT(0);
+
+EXTERN StgInt ALLOC_TUP_ctr INIT(0);
+EXTERN StgInt ALLOC_TUP_adm INIT(0);
+EXTERN StgInt ALLOC_TUP_gds INIT(0);
+EXTERN StgInt ALLOC_TUP_slp INIT(0);
+
+EXTERN StgInt ALLOC_BH_ctr INIT(0);
+EXTERN StgInt ALLOC_BH_adm INIT(0);
+EXTERN StgInt ALLOC_BH_gds INIT(0);
+EXTERN StgInt ALLOC_BH_slp INIT(0);
+
+EXTERN StgInt ALLOC_PRIM_ctr INIT(0);
+EXTERN StgInt ALLOC_PRIM_adm INIT(0);
+EXTERN StgInt ALLOC_PRIM_gds INIT(0);
+EXTERN StgInt ALLOC_PRIM_slp INIT(0);
+
+EXTERN StgInt ALLOC_PAP_ctr INIT(0);
+EXTERN StgInt ALLOC_PAP_adm INIT(0);
+EXTERN StgInt ALLOC_PAP_gds INIT(0);
+EXTERN StgInt ALLOC_PAP_slp INIT(0);
+
+EXTERN StgInt ALLOC_TSO_ctr INIT(0);
+EXTERN StgInt ALLOC_TSO_adm INIT(0);
+EXTERN StgInt ALLOC_TSO_gds INIT(0);
+EXTERN StgInt ALLOC_TSO_slp INIT(0);
+
+EXTERN StgInt RET_NEW_ctr INIT(0);
+EXTERN StgInt RET_OLD_ctr INIT(0);
+EXTERN StgInt RET_UNBOXED_TUP_ctr INIT(0);
+
+EXTERN StgInt RET_SEMI_loads_avoided INIT(0);
+
+/* End of counter declarations. */
+
+#endif /* TICKY_TICKY */
+
+/* This is ugly, but the story is:
+ We got rid of StgTicky.h, which was previously
+ defining these macros for the benefit of C code
+ so, we define them here instead (to be no-ops).
+ (since those macros are only defined in Cmm.h)
+
+ Note that these macros must be defined whether
+ TICKY_TICKY is defined or not. */
+
+#ifndef CMINUSMINUS
+#define TICK_ALLOC_PRIM(x,y,z)
+#define TICK_UPD_OLD_IND()
+#define TICK_UPD_NEW_IND()
+#define TICK_UPD_SQUEEZED()
+#define TICK_ALLOC_HEAP_NOCTR(x)
+#define TICK_GC_WORDS_COPIED(x)
+#define TICK_GC_FAILED_PROMOTION()
+#define TICK_ALLOC_TSO(g,s)
+#define TICK_ALLOC_UP_THK(g,s)
+#define TICK_ALLOC_SE_THK(g,s)
+
+#endif
+
+
+#endif /* TICKYCOUNTERS_H */
diff --git a/includes/stg/Types.h b/includes/stg/Types.h
new file mode 100644
index 0000000000..227356c9ea
--- /dev/null
+++ b/includes/stg/Types.h
@@ -0,0 +1,135 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Various C datatypes used in the run-time system. This is the
+ * lowest-level include file, after ghcconfig.h and RtsConfig.h.
+ *
+ * This module should define types *only*, all beginning with "Stg".
+ *
+ * Specifically:
+
+ StgInt8, 16, 32, 64
+ StgWord8, 16, 32, 64
+ StgChar, StgFloat, StgDouble
+
+ ***** All the same size (i.e. sizeof(void *)): *****
+ StgPtr Basic pointer type
+ StgWord Unit of heap allocation
+ StgInt Signed version of StgWord
+ StgAddr Generic address type
+
+ StgBool, StgVoid, StgPtr, StgOffset,
+ StgCode, StgStablePtr, StgFunPtr,
+ StgUnion.
+
+ * WARNING: Keep this file, MachDeps.h, and HsFFI.h in synch!
+ *
+ * NOTE: assumes #include "ghcconfig.h"
+ *
+ * Works with or without _POSIX_SOURCE.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGTYPES_H
+#define STGTYPES_H
+
+/*
+ * First, platform-dependent definitions of size-specific integers.
+ * Assume for now that the int type is 32 bits.
+ * NOTE: Synch the following definitions with MachDeps.h!
+ * ToDo: move these into a platform-dependent file.
+ */
+
+typedef signed char StgInt8;
+typedef unsigned char StgWord8;
+
+typedef signed short StgInt16;
+typedef unsigned short StgWord16;
+
+#if SIZEOF_LONG == 4
+typedef signed long StgInt32;
+typedef unsigned long StgWord32;
+#elif SIZEOF_INT == 4
+typedef signed int StgInt32;
+typedef unsigned int StgWord32;
+#else
+#error GHC untested on this architecture: sizeof(int) != 4
+#endif
+
+#if SIZEOF_LONG == 8
+typedef signed long StgInt64;
+typedef unsigned long StgWord64;
+#elif defined(__MSVC__)
+typedef __int64 StgInt64;
+typedef unsigned __int64 StgWord64;
+#elif SIZEOF_LONG_LONG == 8
+typedef signed long long int StgInt64;
+typedef unsigned long long int StgWord64;
+#else
+#error cannot find a way to define StgInt64
+#endif
+
+/*
+ * Define the standard word size we'll use on this machine: make it
+ * big enough to hold a pointer.
+ *
+ * It's useful if StgInt/StgWord are always the same as long, so that
+ * we can use a consistent printf format specifier without warnings on
+ * any platform. Fortunately this works at the moement; if it breaks
+ * in the future we'll have to start using macros for format
+ * specifiers (c.f. FMT_StgWord64 in Rts.h).
+ */
+
+#if SIZEOF_VOID_P == 8
+typedef StgInt64 StgInt;
+typedef StgWord64 StgWord;
+typedef StgInt32 StgHalfInt;
+typedef StgWord32 StgHalfWord;
+#else
+#if SIZEOF_VOID_P == 4
+typedef StgInt32 StgInt;
+typedef StgWord32 StgWord;
+typedef StgInt16 StgHalfInt;
+typedef StgWord16 StgHalfWord;
+#else
+#error GHC untested on this architecture: sizeof(void *) != 4 or 8
+#endif
+#endif
+
+#define W_MASK (sizeof(W_)-1)
+
+/*
+ * Other commonly-used STG datatypes.
+ */
+
+typedef void* StgAddr;
+typedef StgWord32 StgChar;
+typedef int StgBool;
+typedef float StgFloat;
+typedef double StgDouble;
+typedef StgWord* StgPtr; /* heap or stack pointer */
+typedef StgWord volatile* StgVolatilePtr; /* pointer to volatile word */
+typedef StgWord StgOffset; /* byte offset within closure */
+typedef StgWord8 StgCode; /* close enough */
+typedef void* StgStablePtr;
+typedef StgWord8* StgByteArray;
+
+/*
+ Types for the generated C functions
+ take no arguments
+ return a pointer to the next function to be called
+ use: Ptr to Fun that returns a Ptr to Fun which returns Ptr to void
+
+ Note: Neither StgFunPtr not StgFun is quite right (that is,
+ StgFunPtr != StgFun*). So, the functions we define all have type
+ StgFun but we always have to cast them to StgFunPtr when we assign
+ them to something.
+ The only way round this would be to write a recursive type but
+ C only allows that if you're defining a struct or union.
+*/
+
+typedef void *(*(*StgFunPtr)(void))(void);
+typedef StgFunPtr StgFun(void);
+
+#endif /* STGTYPES_H */