From 6e2d9ee25bce06ae51d2f1cf8df4f7422106a383 Mon Sep 17 00:00:00 2001
From: Sylvain Henry <sylvain@haskus.fr>
Date: Tue, 7 Jan 2020 02:44:39 +0100
Subject: Module hierarchy: Cmm (cf #13009)

---
 aclocal.m4                                         |    2 +-
 compiler/GHC/Cmm.hs                                |  231 +++
 compiler/GHC/Cmm/BlockId.hs                        |   46 +
 compiler/GHC/Cmm/BlockId.hs-boot                   |    8 +
 compiler/GHC/Cmm/CLabel.hs                         | 1571 ++++++++++++++++++++
 compiler/GHC/Cmm/CallConv.hs                       |  212 +++
 compiler/GHC/Cmm/CommonBlockElim.hs                |  320 ++++
 compiler/GHC/Cmm/ContFlowOpt.hs                    |  451 ++++++
 compiler/GHC/Cmm/Dataflow.hs                       |  441 ++++++
 compiler/GHC/Cmm/Dataflow/Block.hs                 |  329 ++++
 compiler/GHC/Cmm/Dataflow/Collections.hs           |  177 +++
 compiler/GHC/Cmm/Dataflow/Graph.hs                 |  186 +++
 compiler/GHC/Cmm/Dataflow/Label.hs                 |  142 ++
 compiler/GHC/Cmm/DebugBlock.hs                     |  546 +++++++
 compiler/GHC/Cmm/Expr.hs                           |  619 ++++++++
 compiler/GHC/Cmm/Graph.hs                          |  484 ++++++
 compiler/GHC/Cmm/Info.hs                           |  593 ++++++++
 compiler/GHC/Cmm/Info/Build.hs                     |  892 +++++++++++
 compiler/GHC/Cmm/LayoutStack.hs                    | 1236 +++++++++++++++
 compiler/GHC/Cmm/Lexer.x                           |  368 +++++
 compiler/GHC/Cmm/Lint.hs                           |  261 ++++
 compiler/GHC/Cmm/Liveness.hs                       |   93 ++
 compiler/GHC/Cmm/MachOp.hs                         |  664 +++++++++
 compiler/GHC/Cmm/Monad.hs                          |   59 +
 compiler/GHC/Cmm/Node.hs                           |  724 +++++++++
 compiler/GHC/Cmm/Opt.hs                            |  423 ++++++
 compiler/GHC/Cmm/Parser.y                          | 1442 ++++++++++++++++++
 compiler/GHC/Cmm/Pipeline.hs                       |  367 +++++
 compiler/GHC/Cmm/Ppr.hs                            |  309 ++++
 compiler/GHC/Cmm/Ppr/Decl.hs                       |  169 +++
 compiler/GHC/Cmm/Ppr/Expr.hs                       |  286 ++++
 compiler/GHC/Cmm/ProcPoint.hs                      |  496 ++++++
 compiler/GHC/Cmm/Sink.hs                           |  854 +++++++++++
 compiler/GHC/Cmm/Switch.hs                         |  502 +++++++
 compiler/GHC/Cmm/Switch/Implement.hs               |  116 ++
 compiler/GHC/Cmm/Type.hs                           |  432 ++++++
 compiler/GHC/Cmm/Utils.hs                          |  607 ++++++++
 compiler/GHC/Cmm/cmm-notes                         |  184 +++
 compiler/GHC/CmmToC.hs                             | 1380 +++++++++++++++++
 compiler/GHC/Data/Bitmap.hs                        |  134 ++
 compiler/GHC/Platform/Regs.hs                      |    2 +-
 compiler/GHC/Runtime/Layout.hs                     |  563 +++++++
 compiler/GHC/Stg/Lift/Analysis.hs                  |    2 +-
 compiler/GHC/StgToCmm.hs                           |    8 +-
 compiler/GHC/StgToCmm/ArgRep.hs                    |    2 +-
 compiler/GHC/StgToCmm/Bind.hs                      |   18 +-
 compiler/GHC/StgToCmm/CgUtils.hs                   |   10 +-
 compiler/GHC/StgToCmm/Closure.hs                   |   10 +-
 compiler/GHC/StgToCmm/DataCon.hs                   |   10 +-
 compiler/GHC/StgToCmm/Env.hs                       |   10 +-
 compiler/GHC/StgToCmm/Expr.hs                      |    8 +-
 compiler/GHC/StgToCmm/ExtCode.hs                   |    8 +-
 compiler/GHC/StgToCmm/Foreign.hs                   |   20 +-
 compiler/GHC/StgToCmm/Heap.hs                      |   16 +-
 compiler/GHC/StgToCmm/Hpc.hs                       |    8 +-
 compiler/GHC/StgToCmm/Layout.hs                    |   14 +-
 compiler/GHC/StgToCmm/Monad.hs                     |   18 +-
 compiler/GHC/StgToCmm/Prim.hs                      |   14 +-
 compiler/GHC/StgToCmm/Prof.hs                      |   10 +-
 compiler/GHC/StgToCmm/Ticky.hs                     |   16 +-
 compiler/GHC/StgToCmm/Utils.hs                     |   20 +-
 compiler/basicTypes/Unique.hs                      |    2 +-
 compiler/cmm/Bitmap.hs                             |  134 --
 compiler/cmm/BlockId.hs                            |   46 -
 compiler/cmm/BlockId.hs-boot                       |    8 -
 compiler/cmm/CLabel.hs                             | 1571 --------------------
 compiler/cmm/Cmm.hs                                |  231 ---
 compiler/cmm/CmmBuildInfoTables.hs                 |  892 -----------
 compiler/cmm/CmmCallConv.hs                        |  212 ---
 compiler/cmm/CmmCommonBlockElim.hs                 |  320 ----
 compiler/cmm/CmmContFlowOpt.hs                     |  451 ------
 compiler/cmm/CmmExpr.hs                            |  619 --------
 compiler/cmm/CmmImplementSwitchPlans.hs            |  116 --
 compiler/cmm/CmmInfo.hs                            |  593 --------
 compiler/cmm/CmmLayoutStack.hs                     | 1236 ---------------
 compiler/cmm/CmmLex.x                              |  368 -----
 compiler/cmm/CmmLint.hs                            |  261 ----
 compiler/cmm/CmmLive.hs                            |   93 --
 compiler/cmm/CmmMachOp.hs                          |  664 ---------
 compiler/cmm/CmmMonad.hs                           |   59 -
 compiler/cmm/CmmNode.hs                            |  724 ---------
 compiler/cmm/CmmOpt.hs                             |  423 ------
 compiler/cmm/CmmParse.y                            | 1442 ------------------
 compiler/cmm/CmmPipeline.hs                        |  367 -----
 compiler/cmm/CmmProcPoint.hs                       |  496 ------
 compiler/cmm/CmmSink.hs                            |  854 -----------
 compiler/cmm/CmmSwitch.hs                          |  500 -------
 compiler/cmm/CmmType.hs                            |  432 ------
 compiler/cmm/CmmUtils.hs                           |  607 --------
 compiler/cmm/Debug.hs                              |  546 -------
 compiler/cmm/Hoopl/Block.hs                        |  329 ----
 compiler/cmm/Hoopl/Collections.hs                  |  177 ---
 compiler/cmm/Hoopl/Dataflow.hs                     |  441 ------
 compiler/cmm/Hoopl/Graph.hs                        |  186 ---
 compiler/cmm/Hoopl/Label.hs                        |  142 --
 compiler/cmm/MkGraph.hs                            |  484 ------
 compiler/cmm/PprC.hs                               | 1380 -----------------
 compiler/cmm/PprCmm.hs                             |  309 ----
 compiler/cmm/PprCmmDecl.hs                         |  169 ---
 compiler/cmm/PprCmmExpr.hs                         |  286 ----
 compiler/cmm/SMRep.hs                              |  563 -------
 compiler/cmm/cmm-notes                             |  184 ---
 compiler/deSugar/Coverage.hs                       |    2 +-
 compiler/deSugar/DsForeign.hs                      |    4 +-
 compiler/ghc.cabal.in                              |   76 +-
 compiler/ghci/ByteCodeAsm.hs                       |    2 +-
 compiler/ghci/ByteCodeGen.hs                       |    4 +-
 compiler/ghci/ByteCodeInstr.hs                     |    2 +-
 compiler/ghci/RtClosureInspect.hs                  |    2 +-
 compiler/llvmGen/LlvmCodeGen.hs                    |    6 +-
 compiler/llvmGen/LlvmCodeGen/Base.hs               |    6 +-
 compiler/llvmGen/LlvmCodeGen/CodeGen.hs            |   18 +-
 compiler/llvmGen/LlvmCodeGen/Data.hs               |    6 +-
 compiler/llvmGen/LlvmCodeGen/Ppr.hs                |    4 +-
 compiler/llvmGen/LlvmCodeGen/Regs.hs               |    2 +-
 compiler/main/CodeOutput.hs                        |    6 +-
 compiler/main/Hooks.hs                             |    2 +-
 compiler/main/HscMain.hs                           |   10 +-
 compiler/main/StaticPtrTable.hs                    |    2 +-
 compiler/nativeGen/AsmCodeGen.hs                   |   22 +-
 compiler/nativeGen/BlockLayout.hs                  |   10 +-
 compiler/nativeGen/CFG.hs                          |   25 +-
 compiler/nativeGen/CPrim.hs                        |    4 +-
 compiler/nativeGen/Dwarf.hs                        |   10 +-
 compiler/nativeGen/Dwarf/Types.hs                  |    6 +-
 compiler/nativeGen/Format.hs                       |    2 +-
 compiler/nativeGen/Instruction.hs                  |    8 +-
 compiler/nativeGen/NCGMonad.hs                     |   12 +-
 compiler/nativeGen/PIC.hs                          |    8 +-
 compiler/nativeGen/PPC/CodeGen.hs                  |   16 +-
 compiler/nativeGen/PPC/Instr.hs                    |   12 +-
 compiler/nativeGen/PPC/Ppr.hs                      |   12 +-
 compiler/nativeGen/PPC/RegInfo.hs                  |    6 +-
 compiler/nativeGen/PPC/Regs.hs                     |    4 +-
 compiler/nativeGen/PprBase.hs                      |    4 +-
 compiler/nativeGen/RegAlloc/Graph/Coalesce.hs      |    2 +-
 compiler/nativeGen/RegAlloc/Graph/Spill.hs         |    6 +-
 compiler/nativeGen/RegAlloc/Graph/SpillClean.hs    |    6 +-
 compiler/nativeGen/RegAlloc/Graph/SpillCost.hs     |    6 +-
 compiler/nativeGen/RegAlloc/Linear/Base.hs         |    2 +-
 .../nativeGen/RegAlloc/Linear/JoinToTargets.hs     |    4 +-
 compiler/nativeGen/RegAlloc/Linear/Main.hs         |    8 +-
 compiler/nativeGen/RegAlloc/Linear/State.hs        |    2 +-
 compiler/nativeGen/RegAlloc/Liveness.hs            |    8 +-
 compiler/nativeGen/SPARC/CodeGen.hs                |   14 +-
 compiler/nativeGen/SPARC/CodeGen/Amode.hs          |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Base.hs           |    4 +-
 compiler/nativeGen/SPARC/CodeGen/CondCode.hs       |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Expand.hs         |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Gen32.hs          |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot     |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Gen64.hs          |    2 +-
 compiler/nativeGen/SPARC/CodeGen/Sanity.hs         |    2 +-
 compiler/nativeGen/SPARC/Imm.hs                    |    4 +-
 compiler/nativeGen/SPARC/Instr.hs                  |    6 +-
 compiler/nativeGen/SPARC/Ppr.hs                    |   12 +-
 compiler/nativeGen/SPARC/ShortcutJump.hs           |    6 +-
 compiler/nativeGen/X86/CodeGen.hs                  |   22 +-
 compiler/nativeGen/X86/Instr.hs                    |   12 +-
 compiler/nativeGen/X86/Ppr.hs                      |   10 +-
 compiler/nativeGen/X86/Regs.hs                     |    4 +-
 compiler/prelude/PrimOp.hs                         |    2 +-
 compiler/profiling/ProfInit.hs                     |    2 +-
 ghc.mk                                             |    4 +-
 hadrian/src/Rules.hs                               |    4 +-
 hadrian/src/Rules/SourceDist.hs                    |    4 +-
 includes/Cmm.h                                     |    4 +-
 includes/CodeGen.Platform.hs                       |    2 +-
 rts/Apply.cmm                                      |    2 +-
 rts/Exception.cmm                                  |    2 +-
 rts/HeapStackCheck.cmm                             |    2 +-
 rts/PrimOps.cmm                                    |    2 +-
 rts/StgMiscClosures.cmm                            |    2 +-
 rts/StgStartup.cmm                                 |    2 +-
 rts/StgStdThunks.cmm                               |    2 +-
 rts/Updates.cmm                                    |    2 +-
 testsuite/tests/cmm/should_run/HooplPostorder.hs   |    8 +-
 testsuite/tests/codeGen/should_run/T13825-unit.hs  |    2 +-
 testsuite/tests/regalloc/regalloc_unit_tests.hs    |   12 +-
 179 files changed, 19294 insertions(+), 19293 deletions(-)
 create mode 100644 compiler/GHC/Cmm.hs
 create mode 100644 compiler/GHC/Cmm/BlockId.hs
 create mode 100644 compiler/GHC/Cmm/BlockId.hs-boot
 create mode 100644 compiler/GHC/Cmm/CLabel.hs
 create mode 100644 compiler/GHC/Cmm/CallConv.hs
 create mode 100644 compiler/GHC/Cmm/CommonBlockElim.hs
 create mode 100644 compiler/GHC/Cmm/ContFlowOpt.hs
 create mode 100644 compiler/GHC/Cmm/Dataflow.hs
 create mode 100644 compiler/GHC/Cmm/Dataflow/Block.hs
 create mode 100644 compiler/GHC/Cmm/Dataflow/Collections.hs
 create mode 100644 compiler/GHC/Cmm/Dataflow/Graph.hs
 create mode 100644 compiler/GHC/Cmm/Dataflow/Label.hs
 create mode 100644 compiler/GHC/Cmm/DebugBlock.hs
 create mode 100644 compiler/GHC/Cmm/Expr.hs
 create mode 100644 compiler/GHC/Cmm/Graph.hs
 create mode 100644 compiler/GHC/Cmm/Info.hs
 create mode 100644 compiler/GHC/Cmm/Info/Build.hs
 create mode 100644 compiler/GHC/Cmm/LayoutStack.hs
 create mode 100644 compiler/GHC/Cmm/Lexer.x
 create mode 100644 compiler/GHC/Cmm/Lint.hs
 create mode 100644 compiler/GHC/Cmm/Liveness.hs
 create mode 100644 compiler/GHC/Cmm/MachOp.hs
 create mode 100644 compiler/GHC/Cmm/Monad.hs
 create mode 100644 compiler/GHC/Cmm/Node.hs
 create mode 100644 compiler/GHC/Cmm/Opt.hs
 create mode 100644 compiler/GHC/Cmm/Parser.y
 create mode 100644 compiler/GHC/Cmm/Pipeline.hs
 create mode 100644 compiler/GHC/Cmm/Ppr.hs
 create mode 100644 compiler/GHC/Cmm/Ppr/Decl.hs
 create mode 100644 compiler/GHC/Cmm/Ppr/Expr.hs
 create mode 100644 compiler/GHC/Cmm/ProcPoint.hs
 create mode 100644 compiler/GHC/Cmm/Sink.hs
 create mode 100644 compiler/GHC/Cmm/Switch.hs
 create mode 100644 compiler/GHC/Cmm/Switch/Implement.hs
 create mode 100644 compiler/GHC/Cmm/Type.hs
 create mode 100644 compiler/GHC/Cmm/Utils.hs
 create mode 100644 compiler/GHC/Cmm/cmm-notes
 create mode 100644 compiler/GHC/CmmToC.hs
 create mode 100644 compiler/GHC/Data/Bitmap.hs
 create mode 100644 compiler/GHC/Runtime/Layout.hs
 delete mode 100644 compiler/cmm/Bitmap.hs
 delete mode 100644 compiler/cmm/BlockId.hs
 delete mode 100644 compiler/cmm/BlockId.hs-boot
 delete mode 100644 compiler/cmm/CLabel.hs
 delete mode 100644 compiler/cmm/Cmm.hs
 delete mode 100644 compiler/cmm/CmmBuildInfoTables.hs
 delete mode 100644 compiler/cmm/CmmCallConv.hs
 delete mode 100644 compiler/cmm/CmmCommonBlockElim.hs
 delete mode 100644 compiler/cmm/CmmContFlowOpt.hs
 delete mode 100644 compiler/cmm/CmmExpr.hs
 delete mode 100644 compiler/cmm/CmmImplementSwitchPlans.hs
 delete mode 100644 compiler/cmm/CmmInfo.hs
 delete mode 100644 compiler/cmm/CmmLayoutStack.hs
 delete mode 100644 compiler/cmm/CmmLex.x
 delete mode 100644 compiler/cmm/CmmLint.hs
 delete mode 100644 compiler/cmm/CmmLive.hs
 delete mode 100644 compiler/cmm/CmmMachOp.hs
 delete mode 100644 compiler/cmm/CmmMonad.hs
 delete mode 100644 compiler/cmm/CmmNode.hs
 delete mode 100644 compiler/cmm/CmmOpt.hs
 delete mode 100644 compiler/cmm/CmmParse.y
 delete mode 100644 compiler/cmm/CmmPipeline.hs
 delete mode 100644 compiler/cmm/CmmProcPoint.hs
 delete mode 100644 compiler/cmm/CmmSink.hs
 delete mode 100644 compiler/cmm/CmmSwitch.hs
 delete mode 100644 compiler/cmm/CmmType.hs
 delete mode 100644 compiler/cmm/CmmUtils.hs
 delete mode 100644 compiler/cmm/Debug.hs
 delete mode 100644 compiler/cmm/Hoopl/Block.hs
 delete mode 100644 compiler/cmm/Hoopl/Collections.hs
 delete mode 100644 compiler/cmm/Hoopl/Dataflow.hs
 delete mode 100644 compiler/cmm/Hoopl/Graph.hs
 delete mode 100644 compiler/cmm/Hoopl/Label.hs
 delete mode 100644 compiler/cmm/MkGraph.hs
 delete mode 100644 compiler/cmm/PprC.hs
 delete mode 100644 compiler/cmm/PprCmm.hs
 delete mode 100644 compiler/cmm/PprCmmDecl.hs
 delete mode 100644 compiler/cmm/PprCmmExpr.hs
 delete mode 100644 compiler/cmm/SMRep.hs
 delete mode 100644 compiler/cmm/cmm-notes

diff --git a/aclocal.m4 b/aclocal.m4
index 4a037a46fd..3dc30eb7d9 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -999,7 +999,7 @@ else
 fi;
 changequote([, ])dnl
 ])
-if test ! -f compiler/parser/Parser.hs || test ! -f compiler/cmm/CmmParse.hs
+if test ! -f compiler/parser/Parser.hs || test ! -f compiler/GHC/Cmm/Parser.hs
 then
     FP_COMPARE_VERSIONS([$fptools_cv_happy_version],[-lt],[1.19.10],
       [AC_MSG_ERROR([Happy version 1.19.10 or later is required to compile GHC.])])[]
diff --git a/compiler/GHC/Cmm.hs b/compiler/GHC/Cmm.hs
new file mode 100644
index 0000000000..5efecdc534
--- /dev/null
+++ b/compiler/GHC/Cmm.hs
@@ -0,0 +1,231 @@
+-- Cmm representations using Hoopl's Graph CmmNode e x.
+{-# LANGUAGE GADTs #-}
+
+module GHC.Cmm (
+     -- * Cmm top-level datatypes
+     CmmProgram, CmmGroup, GenCmmGroup,
+     CmmDecl, GenCmmDecl(..),
+     CmmGraph, GenCmmGraph(..),
+     CmmBlock,
+     RawCmmDecl, RawCmmGroup,
+     Section(..), SectionType(..), CmmStatics(..), CmmStatic(..),
+     isSecConstant,
+
+     -- ** Blocks containing lists
+     GenBasicBlock(..), blockId,
+     ListGraph(..), pprBBlock,
+
+     -- * Info Tables
+     CmmTopInfo(..), CmmStackInfo(..), CmmInfoTable(..), topInfoTable,
+     ClosureTypeInfo(..),
+     ProfilingInfo(..), ConstrDescription,
+
+     -- * Statements, expressions and types
+     module GHC.Cmm.Node,
+     module GHC.Cmm.Expr,
+  ) where
+
+import GhcPrelude
+
+import Id
+import CostCentre
+import GHC.Cmm.CLabel
+import GHC.Cmm.BlockId
+import GHC.Cmm.Node
+import GHC.Runtime.Layout
+import GHC.Cmm.Expr
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import Outputable
+import Data.ByteString (ByteString)
+
+-----------------------------------------------------------------------------
+--  Cmm, GenCmm
+-----------------------------------------------------------------------------
+
+-- A CmmProgram is a list of CmmGroups
+-- A CmmGroup is a list of top-level declarations
+
+-- When object-splitting is on, each group is compiled into a separate
+-- .o file. So typically we put closely related stuff in a CmmGroup.
+-- Section-splitting follows suit and makes one .text subsection for each
+-- CmmGroup.
+
+type CmmProgram = [CmmGroup]
+
+type GenCmmGroup d h g = [GenCmmDecl d h g]
+type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
+type RawCmmGroup = GenCmmGroup CmmStatics (LabelMap CmmStatics) CmmGraph
+
+-----------------------------------------------------------------------------
+--  CmmDecl, GenCmmDecl
+-----------------------------------------------------------------------------
+
+-- GenCmmDecl is abstracted over
+--   d, the type of static data elements in CmmData
+--   h, the static info preceding the code of a CmmProc
+--   g, the control-flow graph of a CmmProc
+--
+-- We expect there to be two main instances of this type:
+--   (a) C--, i.e. populated with various C-- constructs
+--   (b) Native code, populated with data/instructions
+
+-- | A top-level chunk, abstracted over the type of the contents of
+-- the basic blocks (Cmm or instructions are the likely instantiations).
+data GenCmmDecl d h g
+  = CmmProc     -- A procedure
+     h                 -- Extra header such as the info table
+     CLabel            -- Entry label
+     [GlobalReg]       -- Registers live on entry. Note that the set of live
+                       -- registers will be correct in generated C-- code, but
+                       -- not in hand-written C-- code. However,
+                       -- splitAtProcPoints calculates correct liveness
+                       -- information for CmmProcs.
+     g                 -- Control-flow graph for the procedure's code
+
+  | CmmData     -- Static data
+        Section
+        d
+
+type CmmDecl = GenCmmDecl CmmStatics CmmTopInfo CmmGraph
+
+type RawCmmDecl
+   = GenCmmDecl
+        CmmStatics
+        (LabelMap CmmStatics)
+        CmmGraph
+
+-----------------------------------------------------------------------------
+--     Graphs
+-----------------------------------------------------------------------------
+
+type CmmGraph = GenCmmGraph CmmNode
+data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
+type CmmBlock = Block CmmNode C C
+
+-----------------------------------------------------------------------------
+--     Info Tables
+-----------------------------------------------------------------------------
+
+-- | CmmTopInfo is attached to each CmmDecl (see defn of CmmGroup), and contains
+-- the extra info (beyond the executable code) that belongs to that CmmDecl.
+data CmmTopInfo   = TopInfo { info_tbls  :: LabelMap CmmInfoTable
+                            , stack_info :: CmmStackInfo }
+
+topInfoTable :: GenCmmDecl a CmmTopInfo (GenCmmGraph n) -> Maybe CmmInfoTable
+topInfoTable (CmmProc infos _ _ g) = mapLookup (g_entry g) (info_tbls infos)
+topInfoTable _                     = Nothing
+
+data CmmStackInfo
+   = StackInfo {
+       arg_space :: ByteOff,
+               -- number of bytes of arguments on the stack on entry to the
+               -- the proc.  This is filled in by GHC.StgToCmm.codeGen, and
+               -- used by the stack allocator later.
+       updfr_space :: Maybe ByteOff,
+               -- XXX: this never contains anything useful, but it should.
+               -- See comment in GHC.Cmm.LayoutStack.
+       do_layout :: Bool
+               -- Do automatic stack layout for this proc.  This is
+               -- True for all code generated by the code generator,
+               -- but is occasionally False for hand-written Cmm where
+               -- we want to do the stack manipulation manually.
+  }
+
+-- | Info table as a haskell data type
+data CmmInfoTable
+  = CmmInfoTable {
+      cit_lbl  :: CLabel, -- Info table label
+      cit_rep  :: SMRep,
+      cit_prof :: ProfilingInfo,
+      cit_srt  :: Maybe CLabel,   -- empty, or a closure address
+      cit_clo  :: Maybe (Id, CostCentreStack)
+        -- Just (id,ccs) <=> build a static closure later
+        -- Nothing <=> don't build a static closure
+        --
+        -- Static closures for FUNs and THUNKs are *not* generated by
+        -- the code generator, because we might want to add SRT
+        -- entries to them later (for FUNs at least; THUNKs are
+        -- treated the same for consistency). See Note [SRTs] in
+        -- GHC.Cmm.Info.Build, in particular the [FUN] optimisation.
+        --
+        -- This is strictly speaking not a part of the info table that
+        -- will be finally generated, but it's the only convenient
+        -- place to convey this information from the code generator to
+        -- where we build the static closures in
+        -- GHC.Cmm.Info.Build.doSRTs.
+    }
+
+data ProfilingInfo
+  = NoProfilingInfo
+  | ProfilingInfo ByteString ByteString -- closure_type, closure_desc
+
+-----------------------------------------------------------------------------
+--              Static Data
+-----------------------------------------------------------------------------
+
+data SectionType
+  = Text
+  | Data
+  | ReadOnlyData
+  | RelocatableReadOnlyData
+  | UninitialisedData
+  | ReadOnlyData16      -- .rodata.cst16 on x86_64, 16-byte aligned
+  | CString
+  | OtherSection String
+  deriving (Show)
+
+-- | Should a data in this section be considered constant
+isSecConstant :: Section -> Bool
+isSecConstant (Section t _) = case t of
+    Text                    -> True
+    ReadOnlyData            -> True
+    RelocatableReadOnlyData -> True
+    ReadOnlyData16          -> True
+    CString                 -> True
+    Data                    -> False
+    UninitialisedData       -> False
+    (OtherSection _)        -> False
+
+data Section = Section SectionType CLabel
+
+data CmmStatic
+  = CmmStaticLit CmmLit
+        -- a literal value, size given by cmmLitRep of the literal.
+  | CmmUninitialised Int
+        -- uninitialised data, N bytes long
+  | CmmString ByteString
+        -- string of 8-bit values only, not zero terminated.
+
+data CmmStatics
+   = Statics
+       CLabel      -- Label of statics
+       [CmmStatic] -- The static data itself
+
+-- -----------------------------------------------------------------------------
+-- Basic blocks consisting of lists
+
+-- These are used by the LLVM and NCG backends, when populating Cmm
+-- with lists of instructions.
+
+data GenBasicBlock i = BasicBlock BlockId [i]
+
+-- | The branch block id is that of the first block in
+-- the branch, which is that branch's entry point
+blockId :: GenBasicBlock i -> BlockId
+blockId (BasicBlock blk_id _ ) = blk_id
+
+newtype ListGraph i = ListGraph [GenBasicBlock i]
+
+instance Outputable instr => Outputable (ListGraph instr) where
+    ppr (ListGraph blocks) = vcat (map ppr blocks)
+
+instance Outputable instr => Outputable (GenBasicBlock instr) where
+    ppr = pprBBlock
+
+pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc
+pprBBlock (BasicBlock ident stmts) =
+    hang (ppr ident <> colon) 4 (vcat (map ppr stmts))
+
diff --git a/compiler/GHC/Cmm/BlockId.hs b/compiler/GHC/Cmm/BlockId.hs
new file mode 100644
index 0000000000..f7f369551b
--- /dev/null
+++ b/compiler/GHC/Cmm/BlockId.hs
@@ -0,0 +1,46 @@
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+{- BlockId module should probably go away completely, being superseded by Label -}
+module GHC.Cmm.BlockId
+  ( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet
+  , newBlockId
+  , blockLbl, infoTblLbl
+  ) where
+
+import GhcPrelude
+
+import GHC.Cmm.CLabel
+import IdInfo
+import Name
+import Unique
+import UniqSupply
+
+import GHC.Cmm.Dataflow.Label (Label, mkHooplLabel)
+
+----------------------------------------------------------------
+--- Block Ids, their environments, and their sets
+
+{- Note [Unique BlockId]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Although a 'BlockId' is a local label, for reasons of implementation,
+'BlockId's must be unique within an entire compilation unit.  The reason
+is that each local label is mapped to an assembly-language label, and in
+most assembly languages allow, a label is visible throughout the entire
+compilation unit in which it appears.
+-}
+
+type BlockId = Label
+
+mkBlockId :: Unique -> BlockId
+mkBlockId unique = mkHooplLabel $ getKey unique
+
+newBlockId :: MonadUnique m => m BlockId
+newBlockId = mkBlockId <$> getUniqueM
+
+blockLbl :: BlockId -> CLabel
+blockLbl label = mkLocalBlockLabel (getUnique label)
+
+infoTblLbl :: BlockId -> CLabel
+infoTblLbl label
+  = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
diff --git a/compiler/GHC/Cmm/BlockId.hs-boot b/compiler/GHC/Cmm/BlockId.hs-boot
new file mode 100644
index 0000000000..76fd6180a9
--- /dev/null
+++ b/compiler/GHC/Cmm/BlockId.hs-boot
@@ -0,0 +1,8 @@
+module GHC.Cmm.BlockId (BlockId, mkBlockId) where
+
+import GHC.Cmm.Dataflow.Label (Label)
+import Unique (Unique)
+
+type BlockId = Label
+
+mkBlockId :: Unique -> BlockId
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
new file mode 100644
index 0000000000..e84278bf65
--- /dev/null
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -0,0 +1,1571 @@
+-----------------------------------------------------------------------------
+--
+-- Object-file symbols (called CLabel for histerical raisins).
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+{-# LANGUAGE CPP #-}
+
+module GHC.Cmm.CLabel (
+        CLabel, -- abstract type
+        ForeignLabelSource(..),
+        pprDebugCLabel,
+
+        mkClosureLabel,
+        mkSRTLabel,
+        mkInfoTableLabel,
+        mkEntryLabel,
+        mkRednCountsLabel,
+        mkConInfoTableLabel,
+        mkApEntryLabel,
+        mkApInfoTableLabel,
+        mkClosureTableLabel,
+        mkBytesLabel,
+
+        mkLocalBlockLabel,
+        mkLocalClosureLabel,
+        mkLocalInfoTableLabel,
+        mkLocalClosureTableLabel,
+
+        mkBlockInfoTableLabel,
+
+        mkBitmapLabel,
+        mkStringLitLabel,
+
+        mkAsmTempLabel,
+        mkAsmTempDerivedLabel,
+        mkAsmTempEndLabel,
+        mkAsmTempDieLabel,
+
+        mkDirty_MUT_VAR_Label,
+        mkNonmovingWriteBarrierEnabledLabel,
+        mkUpdInfoLabel,
+        mkBHUpdInfoLabel,
+        mkIndStaticInfoLabel,
+        mkMainCapabilityLabel,
+        mkMAP_FROZEN_CLEAN_infoLabel,
+        mkMAP_FROZEN_DIRTY_infoLabel,
+        mkMAP_DIRTY_infoLabel,
+        mkSMAP_FROZEN_CLEAN_infoLabel,
+        mkSMAP_FROZEN_DIRTY_infoLabel,
+        mkSMAP_DIRTY_infoLabel,
+        mkBadAlignmentLabel,
+        mkArrWords_infoLabel,
+        mkSRTInfoLabel,
+
+        mkTopTickyCtrLabel,
+        mkCAFBlackHoleInfoTableLabel,
+        mkRtsPrimOpLabel,
+        mkRtsSlowFastTickyCtrLabel,
+
+        mkSelectorInfoLabel,
+        mkSelectorEntryLabel,
+
+        mkCmmInfoLabel,
+        mkCmmEntryLabel,
+        mkCmmRetInfoLabel,
+        mkCmmRetLabel,
+        mkCmmCodeLabel,
+        mkCmmDataLabel,
+        mkCmmClosureLabel,
+
+        mkRtsApFastLabel,
+
+        mkPrimCallLabel,
+
+        mkForeignLabel,
+        addLabelSize,
+
+        foreignLabelStdcallInfo,
+        isBytesLabel,
+        isForeignLabel,
+        isSomeRODataLabel,
+        isStaticClosureLabel,
+        mkCCLabel, mkCCSLabel,
+
+        DynamicLinkerLabelInfo(..),
+        mkDynamicLinkerLabel,
+        dynamicLinkerLabelInfo,
+
+        mkPicBaseLabel,
+        mkDeadStripPreventer,
+
+        mkHpcTicksLabel,
+
+        -- * Predicates
+        hasCAF,
+        needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel,
+        isMathFun,
+        isCFunctionLabel, isGcPtrLabel, labelDynamic,
+        isLocalCLabel, mayRedirectTo,
+
+        -- * Conversions
+        toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName,
+
+        pprCLabel,
+        isInfoTableLabel,
+        isConInfoTableLabel
+    ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import IdInfo
+import BasicTypes
+import {-# SOURCE #-} GHC.Cmm.BlockId (BlockId, mkBlockId)
+import Packages
+import Module
+import Name
+import Unique
+import PrimOp
+import CostCentre
+import Outputable
+import FastString
+import DynFlags
+import GHC.Platform
+import UniqSet
+import Util
+import PprCore ( {- instances -} )
+
+-- -----------------------------------------------------------------------------
+-- The CLabel type
+
+{- |
+  'CLabel' is an abstract type that supports the following operations:
+
+  - Pretty printing
+
+  - In a C file, does it need to be declared before use?  (i.e. is it
+    guaranteed to be already in scope in the places we need to refer to it?)
+
+  - If it needs to be declared, what type (code or data) should it be
+    declared to have?
+
+  - Is it visible outside this object file or not?
+
+  - Is it "dynamic" (see details below)
+
+  - Eq and Ord, so that we can make sets of CLabels (currently only
+    used in outputting C as far as I can tell, to avoid generating
+    more than one declaration for any given label).
+
+  - Converting an info table label into an entry label.
+
+  CLabel usage is a bit messy in GHC as they are used in a number of different
+  contexts:
+
+  - By the C-- AST to identify labels
+
+  - By the unregisterised C code generator ("PprC") for naming functions (hence
+    the name 'CLabel')
+
+  - By the native and LLVM code generators to identify labels
+
+  For extra fun, each of these uses a slightly different subset of constructors
+  (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and
+  LLVM backends).
+
+  In general, we use 'IdLabel' to represent Haskell things early in the
+  pipeline. However, later optimization passes will often represent blocks they
+  create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the
+  label.
+-}
+
+data CLabel
+  = -- | A label related to the definition of a particular Id or Con in a .hs file.
+    IdLabel
+        Name
+        CafInfo
+        IdLabelInfo             -- encodes the suffix of the label
+
+  -- | A label from a .cmm file that is not associated with a .hs level Id.
+  | CmmLabel
+        UnitId               -- what package the label belongs to.
+        FastString              -- identifier giving the prefix of the label
+        CmmLabelInfo            -- encodes the suffix of the label
+
+  -- | A label with a baked-in \/ algorithmically generated name that definitely
+  --    comes from the RTS. The code for it must compile into libHSrts.a \/ libHSrts.so
+  --    If it doesn't have an algorithmically generated name then use a CmmLabel
+  --    instead and give it an appropriate UnitId argument.
+  | RtsLabel
+        RtsLabelInfo
+
+  -- | A label associated with a block. These aren't visible outside of the
+  -- compilation unit in which they are defined. These are generally used to
+  -- name blocks produced by Cmm-to-Cmm passes and the native code generator,
+  -- where we don't have a 'Name' to associate the label to and therefore can't
+  -- use 'IdLabel'.
+  | LocalBlockLabel
+        {-# UNPACK #-} !Unique
+
+  -- | A 'C' (or otherwise foreign) label.
+  --
+  | ForeignLabel
+        FastString              -- name of the imported label.
+
+        (Maybe Int)             -- possible '@n' suffix for stdcall functions
+                                -- When generating C, the '@n' suffix is omitted, but when
+                                -- generating assembler we must add it to the label.
+
+        ForeignLabelSource      -- what package the foreign label is in.
+
+        FunctionOrData
+
+  -- | Local temporary label used for native (or LLVM) code generation; must not
+  -- appear outside of these contexts. Use primarily for debug information
+  | AsmTempLabel
+        {-# UNPACK #-} !Unique
+
+  -- | A label \"derived\" from another 'CLabel' by the addition of a suffix.
+  -- Must not occur outside of the NCG or LLVM code generators.
+  | AsmTempDerivedLabel
+        CLabel
+        FastString              -- suffix
+
+  | StringLitLabel
+        {-# UNPACK #-} !Unique
+
+  | CC_Label  CostCentre
+  | CCS_Label CostCentreStack
+
+
+  -- | These labels are generated and used inside the NCG only.
+  --    They are special variants of a label used for dynamic linking
+  --    see module PositionIndependentCode for details.
+  | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel
+
+  -- | This label is generated and used inside the NCG only.
+  --    It is used as a base for PIC calculations on some platforms.
+  --    It takes the form of a local numeric assembler label '1'; and
+  --    is pretty-printed as 1b, referring to the previous definition
+  --    of 1: in the assembler source file.
+  | PicBaseLabel
+
+  -- | A label before an info table to prevent excessive dead-stripping on darwin
+  | DeadStripPreventer CLabel
+
+
+  -- | Per-module table of tick locations
+  | HpcTicksLabel Module
+
+  -- | Static reference table
+  | SRTLabel
+        {-# UNPACK #-} !Unique
+
+  -- | A bitmap (function or case return)
+  | LargeBitmapLabel
+        {-# UNPACK #-} !Unique
+
+  deriving Eq
+
+-- This is laborious, but necessary. We can't derive Ord because
+-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
+-- implementation. See Note [No Ord for Unique]
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+instance Ord CLabel where
+  compare (IdLabel a1 b1 c1) (IdLabel a2 b2 c2) =
+    compare a1 a2 `thenCmp`
+    compare b1 b2 `thenCmp`
+    compare c1 c2
+  compare (CmmLabel a1 b1 c1) (CmmLabel a2 b2 c2) =
+    compare a1 a2 `thenCmp`
+    compare b1 b2 `thenCmp`
+    compare c1 c2
+  compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2
+  compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2
+  compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) =
+    compare a1 a2 `thenCmp`
+    compare b1 b2 `thenCmp`
+    compare c1 c2 `thenCmp`
+    compare d1 d2
+  compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2
+  compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) =
+    compare a1 a2 `thenCmp`
+    compare b1 b2
+  compare (StringLitLabel u1) (StringLitLabel u2) =
+    nonDetCmpUnique u1 u2
+  compare (CC_Label a1) (CC_Label a2) =
+    compare a1 a2
+  compare (CCS_Label a1) (CCS_Label a2) =
+    compare a1 a2
+  compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) =
+    compare a1 a2 `thenCmp`
+    compare b1 b2
+  compare PicBaseLabel PicBaseLabel = EQ
+  compare (DeadStripPreventer a1) (DeadStripPreventer a2) =
+    compare a1 a2
+  compare (HpcTicksLabel a1) (HpcTicksLabel a2) =
+    compare a1 a2
+  compare (SRTLabel u1) (SRTLabel u2) =
+    nonDetCmpUnique u1 u2
+  compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) =
+    nonDetCmpUnique u1 u2
+  compare IdLabel{} _ = LT
+  compare _ IdLabel{} = GT
+  compare CmmLabel{} _ = LT
+  compare _ CmmLabel{} = GT
+  compare RtsLabel{} _ = LT
+  compare _ RtsLabel{} = GT
+  compare LocalBlockLabel{} _ = LT
+  compare _ LocalBlockLabel{} = GT
+  compare ForeignLabel{} _ = LT
+  compare _ ForeignLabel{} = GT
+  compare AsmTempLabel{} _ = LT
+  compare _ AsmTempLabel{} = GT
+  compare AsmTempDerivedLabel{} _ = LT
+  compare _ AsmTempDerivedLabel{} = GT
+  compare StringLitLabel{} _ = LT
+  compare _ StringLitLabel{} = GT
+  compare CC_Label{} _ = LT
+  compare _ CC_Label{} = GT
+  compare CCS_Label{} _ = LT
+  compare _ CCS_Label{} = GT
+  compare DynamicLinkerLabel{} _ = LT
+  compare _ DynamicLinkerLabel{} = GT
+  compare PicBaseLabel{} _ = LT
+  compare _ PicBaseLabel{} = GT
+  compare DeadStripPreventer{} _ = LT
+  compare _ DeadStripPreventer{} = GT
+  compare HpcTicksLabel{} _ = LT
+  compare _ HpcTicksLabel{} = GT
+  compare SRTLabel{} _ = LT
+  compare _ SRTLabel{} = GT
+
+-- | Record where a foreign label is stored.
+data ForeignLabelSource
+
+   -- | Label is in a named package
+   = ForeignLabelInPackage      UnitId
+
+   -- | Label is in some external, system package that doesn't also
+   --   contain compiled Haskell code, and is not associated with any .hi files.
+   --   We don't have to worry about Haskell code being inlined from
+   --   external packages. It is safe to treat the RTS package as "external".
+   | ForeignLabelInExternalPackage
+
+   -- | Label is in the package currently being compiled.
+   --   This is only used for creating hacky tmp labels during code generation.
+   --   Don't use it in any code that might be inlined across a package boundary
+   --   (ie, core code) else the information will be wrong relative to the
+   --   destination module.
+   | ForeignLabelInThisPackage
+
+   deriving (Eq, Ord)
+
+
+-- | For debugging problems with the CLabel representation.
+--      We can't make a Show instance for CLabel because lots of its components don't have instances.
+--      The regular Outputable instance only shows the label name, and not its other info.
+--
+pprDebugCLabel :: CLabel -> SDoc
+pprDebugCLabel lbl
+ = case lbl of
+        IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel"
+                                       <> whenPprDebug (text ":" <> text (show info)))
+        CmmLabel pkg _name _info
+         -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg)
+
+        RtsLabel{}      -> ppr lbl <> (parens $ text "RtsLabel")
+
+        ForeignLabel _name mSuffix src funOrData
+            -> ppr lbl <> (parens $ text "ForeignLabel"
+                                <+> ppr mSuffix
+                                <+> ppr src
+                                <+> ppr funOrData)
+
+        _               -> ppr lbl <> (parens $ text "other CLabel")
+
+
+data IdLabelInfo
+  = Closure             -- ^ Label for closure
+  | InfoTable           -- ^ Info tables for closures; always read-only
+  | Entry               -- ^ Entry point
+  | Slow                -- ^ Slow entry point
+
+  | LocalInfoTable      -- ^ Like InfoTable but not externally visible
+  | LocalEntry          -- ^ Like Entry but not externally visible
+
+  | RednCounts          -- ^ Label of place to keep Ticky-ticky  info for this Id
+
+  | ConEntry            -- ^ Constructor entry point
+  | ConInfoTable        -- ^ Corresponding info table
+
+  | ClosureTable        -- ^ Table of closures for Enum tycons
+
+  | Bytes               -- ^ Content of a string literal. See
+                        -- Note [Bytes label].
+  | BlockInfoTable      -- ^ Like LocalInfoTable but for a proc-point block
+                        -- instead of a closure entry-point.
+                        -- See Note [Proc-point local block entry-point].
+
+  deriving (Eq, Ord, Show)
+
+
+data RtsLabelInfo
+  = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-}  -- ^ Selector thunks
+  | RtsSelectorEntry     Bool{-updatable-} Int{-offset-}
+
+  | RtsApInfoTable       Bool{-updatable-} Int{-arity-}    -- ^ AP thunks
+  | RtsApEntry           Bool{-updatable-} Int{-arity-}
+
+  | RtsPrimOp PrimOp
+  | RtsApFast     FastString    -- ^ _fast versions of generic apply
+  | RtsSlowFastTickyCtr String
+
+  deriving (Eq, Ord)
+  -- NOTE: Eq on PtrString compares the pointer only, so this isn't
+  -- a real equality.
+
+
+-- | What type of Cmm label we're dealing with.
+--      Determines the suffix appended to the name when a CLabel.CmmLabel
+--      is pretty printed.
+data CmmLabelInfo
+  = CmmInfo                     -- ^ misc rts info tables,      suffix _info
+  | CmmEntry                    -- ^ misc rts entry points,     suffix _entry
+  | CmmRetInfo                  -- ^ misc rts ret info tables,  suffix _info
+  | CmmRet                      -- ^ misc rts return points,    suffix _ret
+  | CmmData                     -- ^ misc rts data bits, eg CHARLIKE_closure
+  | CmmCode                     -- ^ misc rts code
+  | CmmClosure                  -- ^ closures eg CHARLIKE_closure
+  | CmmPrimCall                 -- ^ a prim call to some hand written Cmm code
+  deriving (Eq, Ord)
+
+data DynamicLinkerLabelInfo
+  = CodeStub                    -- MachO: Lfoo$stub, ELF: foo@plt
+  | SymbolPtr                   -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo
+  | GotSymbolPtr                -- ELF: foo@got
+  | GotSymbolOffset             -- ELF: foo@gotoff
+
+  deriving (Eq, Ord)
+
+
+-- -----------------------------------------------------------------------------
+-- Constructing CLabels
+-- -----------------------------------------------------------------------------
+
+-- Constructing IdLabels
+-- These are always local:
+
+mkSRTLabel     :: Unique -> CLabel
+mkSRTLabel u = SRTLabel u
+
+mkRednCountsLabel :: Name -> CLabel
+mkRednCountsLabel       name    =
+  IdLabel name NoCafRefs RednCounts  -- Note [ticky for LNE]
+
+-- These have local & (possibly) external variants:
+mkLocalClosureLabel      :: Name -> CafInfo -> CLabel
+mkLocalInfoTableLabel    :: Name -> CafInfo -> CLabel
+mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel
+mkLocalClosureLabel     name c  = IdLabel name  c Closure
+mkLocalInfoTableLabel   name c  = IdLabel name  c LocalInfoTable
+mkLocalClosureTableLabel name c = IdLabel name  c ClosureTable
+
+mkClosureLabel              :: Name -> CafInfo -> CLabel
+mkInfoTableLabel            :: Name -> CafInfo -> CLabel
+mkEntryLabel                :: Name -> CafInfo -> CLabel
+mkClosureTableLabel         :: Name -> CafInfo -> CLabel
+mkConInfoTableLabel         :: Name -> CafInfo -> CLabel
+mkBytesLabel                :: Name -> CLabel
+mkClosureLabel name         c     = IdLabel name c Closure
+mkInfoTableLabel name       c     = IdLabel name c InfoTable
+mkEntryLabel name           c     = IdLabel name c Entry
+mkClosureTableLabel name    c     = IdLabel name c ClosureTable
+mkConInfoTableLabel name    c     = IdLabel name c ConInfoTable
+mkBytesLabel name                 = IdLabel name NoCafRefs Bytes
+
+mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
+mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
+                               -- See Note [Proc-point local block entry-point].
+
+-- Constructing Cmm Labels
+mkDirty_MUT_VAR_Label,
+    mkNonmovingWriteBarrierEnabledLabel,
+    mkUpdInfoLabel,
+    mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
+    mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
+    mkMAP_DIRTY_infoLabel,
+    mkArrWords_infoLabel,
+    mkTopTickyCtrLabel,
+    mkCAFBlackHoleInfoTableLabel,
+    mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
+    mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
+mkDirty_MUT_VAR_Label           = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
+mkNonmovingWriteBarrierEnabledLabel
+                                = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
+mkUpdInfoLabel                  = CmmLabel rtsUnitId (fsLit "stg_upd_frame")         CmmInfo
+mkBHUpdInfoLabel                = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" )     CmmInfo
+mkIndStaticInfoLabel            = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC")        CmmInfo
+mkMainCapabilityLabel           = CmmLabel rtsUnitId (fsLit "MainCapability")        CmmData
+mkMAP_FROZEN_CLEAN_infoLabel    = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkMAP_FROZEN_DIRTY_infoLabel    = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
+mkMAP_DIRTY_infoLabel           = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo
+mkTopTickyCtrLabel              = CmmLabel rtsUnitId (fsLit "top_ct")                CmmData
+mkCAFBlackHoleInfoTableLabel    = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE")     CmmInfo
+mkArrWords_infoLabel            = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS")         CmmInfo
+mkSMAP_FROZEN_CLEAN_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkSMAP_FROZEN_DIRTY_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
+mkSMAP_DIRTY_infoLabel          = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
+mkBadAlignmentLabel             = CmmLabel rtsUnitId (fsLit "stg_badAlignment")      CmmEntry
+
+mkSRTInfoLabel :: Int -> CLabel
+mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo
+ where
+   lbl =
+     case n of
+       1 -> fsLit "stg_SRT_1"
+       2 -> fsLit "stg_SRT_2"
+       3 -> fsLit "stg_SRT_3"
+       4 -> fsLit "stg_SRT_4"
+       5 -> fsLit "stg_SRT_5"
+       6 -> fsLit "stg_SRT_6"
+       7 -> fsLit "stg_SRT_7"
+       8 -> fsLit "stg_SRT_8"
+       9 -> fsLit "stg_SRT_9"
+       10 -> fsLit "stg_SRT_10"
+       11 -> fsLit "stg_SRT_11"
+       12 -> fsLit "stg_SRT_12"
+       13 -> fsLit "stg_SRT_13"
+       14 -> fsLit "stg_SRT_14"
+       15 -> fsLit "stg_SRT_15"
+       16 -> fsLit "stg_SRT_16"
+       _ -> panic "mkSRTInfoLabel"
+
+-----
+mkCmmInfoLabel,   mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel,
+  mkCmmCodeLabel, mkCmmDataLabel,  mkCmmClosureLabel
+        :: UnitId -> FastString -> CLabel
+
+mkCmmInfoLabel      pkg str     = CmmLabel pkg str CmmInfo
+mkCmmEntryLabel     pkg str     = CmmLabel pkg str CmmEntry
+mkCmmRetInfoLabel   pkg str     = CmmLabel pkg str CmmRetInfo
+mkCmmRetLabel       pkg str     = CmmLabel pkg str CmmRet
+mkCmmCodeLabel      pkg str     = CmmLabel pkg str CmmCode
+mkCmmDataLabel      pkg str     = CmmLabel pkg str CmmData
+mkCmmClosureLabel   pkg str     = CmmLabel pkg str CmmClosure
+
+mkLocalBlockLabel :: Unique -> CLabel
+mkLocalBlockLabel u = LocalBlockLabel u
+
+-- Constructing RtsLabels
+mkRtsPrimOpLabel :: PrimOp -> CLabel
+mkRtsPrimOpLabel primop         = RtsLabel (RtsPrimOp primop)
+
+mkSelectorInfoLabel  :: Bool -> Int -> CLabel
+mkSelectorEntryLabel :: Bool -> Int -> CLabel
+mkSelectorInfoLabel  upd off    = RtsLabel (RtsSelectorInfoTable upd off)
+mkSelectorEntryLabel upd off    = RtsLabel (RtsSelectorEntry     upd off)
+
+mkApInfoTableLabel :: Bool -> Int -> CLabel
+mkApEntryLabel     :: Bool -> Int -> CLabel
+mkApInfoTableLabel   upd off    = RtsLabel (RtsApInfoTable       upd off)
+mkApEntryLabel       upd off    = RtsLabel (RtsApEntry           upd off)
+
+
+-- A call to some primitive hand written Cmm code
+mkPrimCallLabel :: PrimCall -> CLabel
+mkPrimCallLabel (PrimCall str pkg)
+        = CmmLabel pkg str CmmPrimCall
+
+
+-- Constructing ForeignLabels
+
+-- | Make a foreign label
+mkForeignLabel
+        :: FastString           -- name
+        -> Maybe Int            -- size prefix
+        -> ForeignLabelSource   -- what package it's in
+        -> FunctionOrData
+        -> CLabel
+
+mkForeignLabel = ForeignLabel
+
+
+-- | Update the label size field in a ForeignLabel
+addLabelSize :: CLabel -> Int -> CLabel
+addLabelSize (ForeignLabel str _ src  fod) sz
+    = ForeignLabel str (Just sz) src fod
+addLabelSize label _
+    = label
+
+-- | Whether label is a top-level string literal
+isBytesLabel :: CLabel -> Bool
+isBytesLabel (IdLabel _ _ Bytes) = True
+isBytesLabel _lbl = False
+
+-- | Whether label is a non-haskell label (defined in C code)
+isForeignLabel :: CLabel -> Bool
+isForeignLabel (ForeignLabel _ _ _ _) = True
+isForeignLabel _lbl = False
+
+-- | Whether label is a static closure label (can come from haskell or cmm)
+isStaticClosureLabel :: CLabel -> Bool
+-- Closure defined in haskell (.hs)
+isStaticClosureLabel (IdLabel _ _ Closure) = True
+-- Closure defined in cmm
+isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True
+isStaticClosureLabel _lbl = False
+
+-- | Whether label is a .rodata label
+isSomeRODataLabel :: CLabel -> Bool
+-- info table defined in haskell (.hs)
+isSomeRODataLabel (IdLabel _ _ ClosureTable) = True
+isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True
+isSomeRODataLabel (IdLabel _ _ InfoTable) = True
+isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True
+isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True
+-- info table defined in cmm (.cmm)
+isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True
+isSomeRODataLabel _lbl = False
+
+-- | Whether label is points to some kind of info table
+isInfoTableLabel :: CLabel -> Bool
+isInfoTableLabel (IdLabel _ _ InfoTable)      = True
+isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True
+isInfoTableLabel (IdLabel _ _ ConInfoTable)   = True
+isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True
+isInfoTableLabel _                            = False
+
+-- | Whether label is points to constructor info table
+isConInfoTableLabel :: CLabel -> Bool
+isConInfoTableLabel (IdLabel _ _ ConInfoTable)   = True
+isConInfoTableLabel _                            = False
+
+-- | Get the label size field from a ForeignLabel
+foreignLabelStdcallInfo :: CLabel -> Maybe Int
+foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info
+foreignLabelStdcallInfo _lbl = Nothing
+
+
+-- Constructing Large*Labels
+mkBitmapLabel   :: Unique -> CLabel
+mkBitmapLabel   uniq            = LargeBitmapLabel uniq
+
+-- Constructing Cost Center Labels
+mkCCLabel  :: CostCentre      -> CLabel
+mkCCSLabel :: CostCentreStack -> CLabel
+mkCCLabel           cc          = CC_Label cc
+mkCCSLabel          ccs         = CCS_Label ccs
+
+mkRtsApFastLabel :: FastString -> CLabel
+mkRtsApFastLabel str = RtsLabel (RtsApFast str)
+
+mkRtsSlowFastTickyCtrLabel :: String -> CLabel
+mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
+
+
+-- Constructing Code Coverage Labels
+mkHpcTicksLabel :: Module -> CLabel
+mkHpcTicksLabel                = HpcTicksLabel
+
+
+-- Constructing labels used for dynamic linking
+mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel
+mkDynamicLinkerLabel            = DynamicLinkerLabel
+
+dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel)
+dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl)
+dynamicLinkerLabelInfo _        = Nothing
+
+mkPicBaseLabel :: CLabel
+mkPicBaseLabel                  = PicBaseLabel
+
+
+-- Constructing miscellaneous other labels
+mkDeadStripPreventer :: CLabel -> CLabel
+mkDeadStripPreventer lbl        = DeadStripPreventer lbl
+
+mkStringLitLabel :: Unique -> CLabel
+mkStringLitLabel                = StringLitLabel
+
+mkAsmTempLabel :: Uniquable a => a -> CLabel
+mkAsmTempLabel a                = AsmTempLabel (getUnique a)
+
+mkAsmTempDerivedLabel :: CLabel -> FastString -> CLabel
+mkAsmTempDerivedLabel = AsmTempDerivedLabel
+
+mkAsmTempEndLabel :: CLabel -> CLabel
+mkAsmTempEndLabel l = mkAsmTempDerivedLabel l (fsLit "_end")
+
+-- | Construct a label for a DWARF Debug Information Entity (DIE)
+-- describing another symbol.
+mkAsmTempDieLabel :: CLabel -> CLabel
+mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die")
+
+-- -----------------------------------------------------------------------------
+-- Convert between different kinds of label
+
+toClosureLbl :: CLabel -> CLabel
+toClosureLbl (IdLabel n c _) = IdLabel n c Closure
+toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure
+toClosureLbl l = pprPanic "toClosureLbl" (ppr l)
+
+toSlowEntryLbl :: CLabel -> CLabel
+toSlowEntryLbl (IdLabel n _ BlockInfoTable)
+  = pprPanic "toSlowEntryLbl" (ppr n)
+toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow
+toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l)
+
+toEntryLbl :: CLabel -> CLabel
+toEntryLbl (IdLabel n c LocalInfoTable)  = IdLabel n c LocalEntry
+toEntryLbl (IdLabel n c ConInfoTable)    = IdLabel n c ConEntry
+toEntryLbl (IdLabel n _ BlockInfoTable)  = mkLocalBlockLabel (nameUnique n)
+                              -- See Note [Proc-point local block entry-point].
+toEntryLbl (IdLabel n c _)               = IdLabel n c Entry
+toEntryLbl (CmmLabel m str CmmInfo)      = CmmLabel m str CmmEntry
+toEntryLbl (CmmLabel m str CmmRetInfo)   = CmmLabel m str CmmRet
+toEntryLbl l = pprPanic "toEntryLbl" (ppr l)
+
+toInfoLbl :: CLabel -> CLabel
+toInfoLbl (IdLabel n c LocalEntry)     = IdLabel n c LocalInfoTable
+toInfoLbl (IdLabel n c ConEntry)       = IdLabel n c ConInfoTable
+toInfoLbl (IdLabel n c _)              = IdLabel n c InfoTable
+toInfoLbl (CmmLabel m str CmmEntry)    = CmmLabel m str CmmInfo
+toInfoLbl (CmmLabel m str CmmRet)      = CmmLabel m str CmmRetInfo
+toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l)
+
+hasHaskellName :: CLabel -> Maybe Name
+hasHaskellName (IdLabel n _ _) = Just n
+hasHaskellName _               = Nothing
+
+-- -----------------------------------------------------------------------------
+-- Does a CLabel's referent itself refer to a CAF?
+hasCAF :: CLabel -> Bool
+hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
+hasCAF (IdLabel _ MayHaveCafRefs _) = True
+hasCAF _                            = False
+
+-- Note [ticky for LNE]
+-- ~~~~~~~~~~~~~~~~~~~~~
+
+-- Until 14 Feb 2013, every ticky counter was associated with a
+-- closure. Thus, ticky labels used IdLabel. It is odd that
+-- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label
+-- reason to add the name to the CAFEnv (and thus eventually the SRT),
+-- but it was harmless because the ticky was only used if the closure
+-- was also.
+--
+-- Since we now have ticky counters for LNEs, it is no longer the case
+-- that every ticky counter has an actual closure. So I changed the
+-- generation of ticky counters' CLabels to not result in their
+-- associated id ending up in the SRT.
+--
+-- NB IdLabel is still appropriate for ticky ids (as opposed to
+-- CmmLabel) because the LNE's counter is still related to an .hs Id,
+-- that Id just isn't for a proper closure.
+
+-- -----------------------------------------------------------------------------
+-- Does a CLabel need declaring before use or not?
+--
+-- See wiki:commentary/compiler/backends/ppr-c#prototypes
+
+needsCDecl :: CLabel -> Bool
+  -- False <=> it's pre-declared; don't bother
+  -- don't bother declaring Bitmap labels, we always make sure
+  -- they are defined before use.
+needsCDecl (SRTLabel _)                 = True
+needsCDecl (LargeBitmapLabel _)         = False
+needsCDecl (IdLabel _ _ _)              = True
+needsCDecl (LocalBlockLabel _)          = True
+
+needsCDecl (StringLitLabel _)           = False
+needsCDecl (AsmTempLabel _)             = False
+needsCDecl (AsmTempDerivedLabel _ _)    = False
+needsCDecl (RtsLabel _)                 = False
+
+needsCDecl (CmmLabel pkgId _ _)
+        -- Prototypes for labels defined in the runtime system are imported
+        --      into HC files via includes/Stg.h.
+        | pkgId == rtsUnitId         = False
+
+        -- For other labels we inline one into the HC file directly.
+        | otherwise                     = True
+
+needsCDecl l@(ForeignLabel{})           = not (isMathFun l)
+needsCDecl (CC_Label _)                 = True
+needsCDecl (CCS_Label _)                = True
+needsCDecl (HpcTicksLabel _)            = True
+needsCDecl (DynamicLinkerLabel {})      = panic "needsCDecl DynamicLinkerLabel"
+needsCDecl PicBaseLabel                 = panic "needsCDecl PicBaseLabel"
+needsCDecl (DeadStripPreventer {})      = panic "needsCDecl DeadStripPreventer"
+
+-- | If a label is a local block label then return just its 'BlockId', otherwise
+-- 'Nothing'.
+maybeLocalBlockLabel :: CLabel -> Maybe BlockId
+maybeLocalBlockLabel (LocalBlockLabel uq)  = Just $ mkBlockId uq
+maybeLocalBlockLabel _                     = Nothing
+
+
+-- | Check whether a label corresponds to a C function that has
+--      a prototype in a system header somewhere, or is built-in
+--      to the C compiler. For these labels we avoid generating our
+--      own C prototypes.
+isMathFun :: CLabel -> Bool
+isMathFun (ForeignLabel fs _ _ _)       = fs `elementOfUniqSet` math_funs
+isMathFun _ = False
+
+math_funs :: UniqSet FastString
+math_funs = mkUniqSet [
+        -- _ISOC99_SOURCE
+        (fsLit "acos"),         (fsLit "acosf"),        (fsLit "acosh"),
+        (fsLit "acoshf"),       (fsLit "acoshl"),       (fsLit "acosl"),
+        (fsLit "asin"),         (fsLit "asinf"),        (fsLit "asinl"),
+        (fsLit "asinh"),        (fsLit "asinhf"),       (fsLit "asinhl"),
+        (fsLit "atan"),         (fsLit "atanf"),        (fsLit "atanl"),
+        (fsLit "atan2"),        (fsLit "atan2f"),       (fsLit "atan2l"),
+        (fsLit "atanh"),        (fsLit "atanhf"),       (fsLit "atanhl"),
+        (fsLit "cbrt"),         (fsLit "cbrtf"),        (fsLit "cbrtl"),
+        (fsLit "ceil"),         (fsLit "ceilf"),        (fsLit "ceill"),
+        (fsLit "copysign"),     (fsLit "copysignf"),    (fsLit "copysignl"),
+        (fsLit "cos"),          (fsLit "cosf"),         (fsLit "cosl"),
+        (fsLit "cosh"),         (fsLit "coshf"),        (fsLit "coshl"),
+        (fsLit "erf"),          (fsLit "erff"),         (fsLit "erfl"),
+        (fsLit "erfc"),         (fsLit "erfcf"),        (fsLit "erfcl"),
+        (fsLit "exp"),          (fsLit "expf"),         (fsLit "expl"),
+        (fsLit "exp2"),         (fsLit "exp2f"),        (fsLit "exp2l"),
+        (fsLit "expm1"),        (fsLit "expm1f"),       (fsLit "expm1l"),
+        (fsLit "fabs"),         (fsLit "fabsf"),        (fsLit "fabsl"),
+        (fsLit "fdim"),         (fsLit "fdimf"),        (fsLit "fdiml"),
+        (fsLit "floor"),        (fsLit "floorf"),       (fsLit "floorl"),
+        (fsLit "fma"),          (fsLit "fmaf"),         (fsLit "fmal"),
+        (fsLit "fmax"),         (fsLit "fmaxf"),        (fsLit "fmaxl"),
+        (fsLit "fmin"),         (fsLit "fminf"),        (fsLit "fminl"),
+        (fsLit "fmod"),         (fsLit "fmodf"),        (fsLit "fmodl"),
+        (fsLit "frexp"),        (fsLit "frexpf"),       (fsLit "frexpl"),
+        (fsLit "hypot"),        (fsLit "hypotf"),       (fsLit "hypotl"),
+        (fsLit "ilogb"),        (fsLit "ilogbf"),       (fsLit "ilogbl"),
+        (fsLit "ldexp"),        (fsLit "ldexpf"),       (fsLit "ldexpl"),
+        (fsLit "lgamma"),       (fsLit "lgammaf"),      (fsLit "lgammal"),
+        (fsLit "llrint"),       (fsLit "llrintf"),      (fsLit "llrintl"),
+        (fsLit "llround"),      (fsLit "llroundf"),     (fsLit "llroundl"),
+        (fsLit "log"),          (fsLit "logf"),         (fsLit "logl"),
+        (fsLit "log10l"),       (fsLit "log10"),        (fsLit "log10f"),
+        (fsLit "log1pl"),       (fsLit "log1p"),        (fsLit "log1pf"),
+        (fsLit "log2"),         (fsLit "log2f"),        (fsLit "log2l"),
+        (fsLit "logb"),         (fsLit "logbf"),        (fsLit "logbl"),
+        (fsLit "lrint"),        (fsLit "lrintf"),       (fsLit "lrintl"),
+        (fsLit "lround"),       (fsLit "lroundf"),      (fsLit "lroundl"),
+        (fsLit "modf"),         (fsLit "modff"),        (fsLit "modfl"),
+        (fsLit "nan"),          (fsLit "nanf"),         (fsLit "nanl"),
+        (fsLit "nearbyint"),    (fsLit "nearbyintf"),   (fsLit "nearbyintl"),
+        (fsLit "nextafter"),    (fsLit "nextafterf"),   (fsLit "nextafterl"),
+        (fsLit "nexttoward"),   (fsLit "nexttowardf"),  (fsLit "nexttowardl"),
+        (fsLit "pow"),          (fsLit "powf"),         (fsLit "powl"),
+        (fsLit "remainder"),    (fsLit "remainderf"),   (fsLit "remainderl"),
+        (fsLit "remquo"),       (fsLit "remquof"),      (fsLit "remquol"),
+        (fsLit "rint"),         (fsLit "rintf"),        (fsLit "rintl"),
+        (fsLit "round"),        (fsLit "roundf"),       (fsLit "roundl"),
+        (fsLit "scalbln"),      (fsLit "scalblnf"),     (fsLit "scalblnl"),
+        (fsLit "scalbn"),       (fsLit "scalbnf"),      (fsLit "scalbnl"),
+        (fsLit "sin"),          (fsLit "sinf"),         (fsLit "sinl"),
+        (fsLit "sinh"),         (fsLit "sinhf"),        (fsLit "sinhl"),
+        (fsLit "sqrt"),         (fsLit "sqrtf"),        (fsLit "sqrtl"),
+        (fsLit "tan"),          (fsLit "tanf"),         (fsLit "tanl"),
+        (fsLit "tanh"),         (fsLit "tanhf"),        (fsLit "tanhl"),
+        (fsLit "tgamma"),       (fsLit "tgammaf"),      (fsLit "tgammal"),
+        (fsLit "trunc"),        (fsLit "truncf"),       (fsLit "truncl"),
+        -- ISO C 99 also defines these function-like macros in math.h:
+        -- fpclassify, isfinite, isinf, isnormal, signbit, isgreater,
+        -- isgreaterequal, isless, islessequal, islessgreater, isunordered
+
+        -- additional symbols from _BSD_SOURCE
+        (fsLit "drem"),         (fsLit "dremf"),        (fsLit "dreml"),
+        (fsLit "finite"),       (fsLit "finitef"),      (fsLit "finitel"),
+        (fsLit "gamma"),        (fsLit "gammaf"),       (fsLit "gammal"),
+        (fsLit "isinf"),        (fsLit "isinff"),       (fsLit "isinfl"),
+        (fsLit "isnan"),        (fsLit "isnanf"),       (fsLit "isnanl"),
+        (fsLit "j0"),           (fsLit "j0f"),          (fsLit "j0l"),
+        (fsLit "j1"),           (fsLit "j1f"),          (fsLit "j1l"),
+        (fsLit "jn"),           (fsLit "jnf"),          (fsLit "jnl"),
+        (fsLit "lgamma_r"),     (fsLit "lgammaf_r"),    (fsLit "lgammal_r"),
+        (fsLit "scalb"),        (fsLit "scalbf"),       (fsLit "scalbl"),
+        (fsLit "significand"),  (fsLit "significandf"), (fsLit "significandl"),
+        (fsLit "y0"),           (fsLit "y0f"),          (fsLit "y0l"),
+        (fsLit "y1"),           (fsLit "y1f"),          (fsLit "y1l"),
+        (fsLit "yn"),           (fsLit "ynf"),          (fsLit "ynl"),
+
+        -- These functions are described in IEEE Std 754-2008 -
+        -- Standard for Floating-Point Arithmetic and ISO/IEC TS 18661
+        (fsLit "nextup"),       (fsLit "nextupf"),      (fsLit "nextupl"),
+        (fsLit "nextdown"),     (fsLit "nextdownf"),    (fsLit "nextdownl")
+    ]
+
+-- -----------------------------------------------------------------------------
+-- | Is a CLabel visible outside this object file or not?
+--      From the point of view of the code generator, a name is
+--      externally visible if it has to be declared as exported
+--      in the .o file's symbol table; that is, made non-static.
+externallyVisibleCLabel :: CLabel -> Bool -- not C "static"
+externallyVisibleCLabel (StringLitLabel _)      = False
+externallyVisibleCLabel (AsmTempLabel _)        = False
+externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False
+externallyVisibleCLabel (RtsLabel _)            = True
+externallyVisibleCLabel (LocalBlockLabel _)     = False
+externallyVisibleCLabel (CmmLabel _ _ _)        = True
+externallyVisibleCLabel (ForeignLabel{})        = True
+externallyVisibleCLabel (IdLabel name _ info)   = isExternalName name && externallyVisibleIdLabel info
+externallyVisibleCLabel (CC_Label _)            = True
+externallyVisibleCLabel (CCS_Label _)           = True
+externallyVisibleCLabel (DynamicLinkerLabel _ _)  = False
+externallyVisibleCLabel (HpcTicksLabel _)       = True
+externallyVisibleCLabel (LargeBitmapLabel _)    = False
+externallyVisibleCLabel (SRTLabel _)            = False
+externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel"
+externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer"
+
+externallyVisibleIdLabel :: IdLabelInfo -> Bool
+externallyVisibleIdLabel LocalInfoTable  = False
+externallyVisibleIdLabel LocalEntry      = False
+externallyVisibleIdLabel BlockInfoTable  = False
+externallyVisibleIdLabel _               = True
+
+-- -----------------------------------------------------------------------------
+-- Finding the "type" of a CLabel
+
+-- For generating correct types in label declarations:
+
+data CLabelType
+  = CodeLabel   -- Address of some executable instructions
+  | DataLabel   -- Address of data, not a GC ptr
+  | GcPtrLabel  -- Address of a (presumably static) GC object
+
+isCFunctionLabel :: CLabel -> Bool
+isCFunctionLabel lbl = case labelType lbl of
+                        CodeLabel -> True
+                        _other    -> False
+
+isGcPtrLabel :: CLabel -> Bool
+isGcPtrLabel lbl = case labelType lbl of
+                        GcPtrLabel -> True
+                        _other     -> False
+
+
+-- | Work out the general type of data at the address of this label
+--    whether it be code, data, or static GC object.
+labelType :: CLabel -> CLabelType
+labelType (IdLabel _ _ info)                    = idInfoLabelType info
+labelType (CmmLabel _ _ CmmData)                = DataLabel
+labelType (CmmLabel _ _ CmmClosure)             = GcPtrLabel
+labelType (CmmLabel _ _ CmmCode)                = CodeLabel
+labelType (CmmLabel _ _ CmmInfo)                = DataLabel
+labelType (CmmLabel _ _ CmmEntry)               = CodeLabel
+labelType (CmmLabel _ _ CmmPrimCall)            = CodeLabel
+labelType (CmmLabel _ _ CmmRetInfo)             = DataLabel
+labelType (CmmLabel _ _ CmmRet)                 = CodeLabel
+labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
+labelType (RtsLabel (RtsApInfoTable _ _))       = DataLabel
+labelType (RtsLabel (RtsApFast _))              = CodeLabel
+labelType (RtsLabel _)                          = DataLabel
+labelType (LocalBlockLabel _)                   = CodeLabel
+labelType (SRTLabel _)                          = DataLabel
+labelType (ForeignLabel _ _ _ IsFunction)       = CodeLabel
+labelType (ForeignLabel _ _ _ IsData)           = DataLabel
+labelType (AsmTempLabel _)                      = panic "labelType(AsmTempLabel)"
+labelType (AsmTempDerivedLabel _ _)             = panic "labelType(AsmTempDerivedLabel)"
+labelType (StringLitLabel _)                    = DataLabel
+labelType (CC_Label _)                          = DataLabel
+labelType (CCS_Label _)                         = DataLabel
+labelType (DynamicLinkerLabel _ _)              = DataLabel -- Is this right?
+labelType PicBaseLabel                          = DataLabel
+labelType (DeadStripPreventer _)                = DataLabel
+labelType (HpcTicksLabel _)                     = DataLabel
+labelType (LargeBitmapLabel _)                  = DataLabel
+
+idInfoLabelType :: IdLabelInfo -> CLabelType
+idInfoLabelType info =
+  case info of
+    InfoTable     -> DataLabel
+    LocalInfoTable -> DataLabel
+    BlockInfoTable -> DataLabel
+    Closure       -> GcPtrLabel
+    ConInfoTable  -> DataLabel
+    ClosureTable  -> DataLabel
+    RednCounts    -> DataLabel
+    Bytes         -> DataLabel
+    _             -> CodeLabel
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Is a 'CLabel' defined in the current module being compiled?
+--
+-- Sometimes we can optimise references within a compilation unit in ways that
+-- we couldn't for inter-module references. This provides a conservative
+-- estimate of whether a 'CLabel' lives in the current module.
+isLocalCLabel :: Module -> CLabel -> Bool
+isLocalCLabel this_mod lbl =
+  case lbl of
+    IdLabel name _ _
+      | isInternalName name -> True
+      | otherwise           -> nameModule name == this_mod
+    LocalBlockLabel _       -> True
+    _                       -> False
+
+-- -----------------------------------------------------------------------------
+
+-- | Does a 'CLabel' need dynamic linkage?
+--
+-- When referring to data in code, we need to know whether
+-- that data resides in a DLL or not. [Win32 only.]
+-- @labelDynamic@ returns @True@ if the label is located
+-- in a DLL, be it a data reference or not.
+labelDynamic :: DynFlags -> Module -> CLabel -> Bool
+labelDynamic dflags this_mod lbl =
+  case lbl of
+   -- is the RTS in a DLL or not?
+   RtsLabel _ ->
+     externalDynamicRefs && (this_pkg /= rtsUnitId)
+
+   IdLabel n _ _ ->
+     isDllName dflags this_mod n
+
+   -- When compiling in the "dyn" way, each package is to be linked into
+   -- its own shared library.
+   CmmLabel pkg _ _
+    | os == OSMinGW32 ->
+       externalDynamicRefs && (this_pkg /= pkg)
+    | otherwise ->
+       gopt Opt_ExternalDynamicRefs dflags
+
+   LocalBlockLabel _    -> False
+
+   ForeignLabel _ _ source _  ->
+       if os == OSMinGW32
+       then case source of
+            -- Foreign label is in some un-named foreign package (or DLL).
+            ForeignLabelInExternalPackage -> True
+
+            -- Foreign label is linked into the same package as the
+            -- source file currently being compiled.
+            ForeignLabelInThisPackage -> False
+
+            -- Foreign label is in some named package.
+            -- When compiling in the "dyn" way, each package is to be
+            -- linked into its own DLL.
+            ForeignLabelInPackage pkgId ->
+                externalDynamicRefs && (this_pkg /= pkgId)
+
+       else -- On Mac OS X and on ELF platforms, false positives are OK,
+            -- so we claim that all foreign imports come from dynamic
+            -- libraries
+            True
+
+   CC_Label cc ->
+     externalDynamicRefs && not (ccFromThisModule cc this_mod)
+
+   -- CCS_Label always contains a CostCentre defined in the current module
+   CCS_Label _ -> False
+
+   HpcTicksLabel m ->
+     externalDynamicRefs && this_mod /= m
+
+   -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves.
+   _                 -> False
+  where
+    externalDynamicRefs = gopt Opt_ExternalDynamicRefs dflags
+    os = platformOS (targetPlatform dflags)
+    this_pkg = moduleUnitId this_mod
+
+
+-----------------------------------------------------------------------------
+-- Printing out CLabels.
+
+{-
+Convention:
+
+      <name>_<type>
+
+where <name> is <Module>_<name> for external names and <unique> for
+internal names. <type> is one of the following:
+
+         info                   Info table
+         srt                    Static reference table
+         entry                  Entry code (function, closure)
+         slow                   Slow entry code (if any)
+         ret                    Direct return address
+         vtbl                   Vector table
+         <n>_alt                Case alternative (tag n)
+         dflt                   Default case alternative
+         btm                    Large bitmap vector
+         closure                Static closure
+         con_entry              Dynamic Constructor entry code
+         con_info               Dynamic Constructor info table
+         static_entry           Static Constructor entry code
+         static_info            Static Constructor info table
+         sel_info               Selector info table
+         sel_entry              Selector entry code
+         cc                     Cost centre
+         ccs                    Cost centre stack
+
+Many of these distinctions are only for documentation reasons.  For
+example, _ret is only distinguished from _entry to make it easy to
+tell whether a code fragment is a return point or a closure/function
+entry.
+
+Note [Closure and info labels]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For a function 'foo, we have:
+   foo_info    : Points to the info table describing foo's closure
+                 (and entry code for foo with tables next to code)
+   foo_closure : Static (no-free-var) closure only:
+                 points to the statically-allocated closure
+
+For a data constructor (such as Just or Nothing), we have:
+    Just_con_info: Info table for the data constructor itself
+                   the first word of a heap-allocated Just
+    Just_info:     Info table for the *worker function*, an
+                   ordinary Haskell function of arity 1 that
+                   allocates a (Just x) box:
+                      Just = \x -> Just x
+    Just_closure:  The closure for this worker
+
+    Nothing_closure: a statically allocated closure for Nothing
+    Nothing_static_info: info table for Nothing_closure
+
+All these must be exported symbol, EXCEPT Just_info.  We don't need to
+export this because in other modules we either have
+       * A reference to 'Just'; use Just_closure
+       * A saturated call 'Just x'; allocate using Just_con_info
+Not exporting these Just_info labels reduces the number of symbols
+somewhat.
+
+Note [Bytes label]
+~~~~~~~~~~~~~~~~~~
+For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which
+points to a static data block containing the content of the literal.
+
+Note [Proc-point local block entry-points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A label for a proc-point local block entry-point has no "_entry" suffix. With
+`infoTblLbl` we derive an info table label from a proc-point block ID. If
+we convert such an info table label into an entry label we must produce
+the label without an "_entry" suffix. So an info table label records
+the fact that it was derived from a block ID in `IdLabelInfo` as
+`BlockInfoTable`.
+
+The info table label and the local block label are both local labels
+and are not externally visible.
+-}
+
+instance Outputable CLabel where
+  ppr c = sdocWithDynFlags $ \dynFlags -> pprCLabel dynFlags c
+
+pprCLabel :: DynFlags -> CLabel -> SDoc
+
+pprCLabel _ (LocalBlockLabel u)
+  =  tempLabelPrefixOrUnderscore <> pprUniqueAlways u
+
+pprCLabel dynFlags (AsmTempLabel u)
+ | not (platformUnregisterised $ targetPlatform dynFlags)
+  =  tempLabelPrefixOrUnderscore <> pprUniqueAlways u
+
+pprCLabel dynFlags (AsmTempDerivedLabel l suf)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+   = ptext (asmTempLabelPrefix $ targetPlatform dynFlags)
+     <> case l of AsmTempLabel u    -> pprUniqueAlways u
+                  LocalBlockLabel u -> pprUniqueAlways u
+                  _other            -> pprCLabel dynFlags l
+     <> ftext suf
+
+pprCLabel dynFlags (DynamicLinkerLabel info lbl)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+   = pprDynamicLinkerAsmLabel (targetPlatform dynFlags) info lbl
+
+pprCLabel dynFlags PicBaseLabel
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+   = text "1b"
+
+pprCLabel dynFlags (DeadStripPreventer lbl)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+   =
+   {-
+      `lbl` can be temp one but we need to ensure that dsp label will stay
+      in the final binary so we prepend non-temp prefix ("dsp_") and
+      optional `_` (underscore) because this is how you mark non-temp symbols
+      on some platforms (Darwin)
+   -}
+   maybe_underscore dynFlags $ text "dsp_"
+   <> pprCLabel dynFlags lbl <> text "_dsp"
+
+pprCLabel dynFlags (StringLitLabel u)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+  = pprUniqueAlways u <> ptext (sLit "_str")
+
+pprCLabel dynFlags lbl
+   = getPprStyle $ \ sty ->
+     if platformMisc_ghcWithNativeCodeGen (platformMisc dynFlags) && asmStyle sty
+     then maybe_underscore dynFlags $ pprAsmCLbl (targetPlatform dynFlags) lbl
+     else pprCLbl lbl
+
+maybe_underscore :: DynFlags -> SDoc -> SDoc
+maybe_underscore dynFlags doc =
+  if platformMisc_leadingUnderscore $ platformMisc dynFlags
+  then pp_cSEP <> doc
+  else doc
+
+pprAsmCLbl :: Platform -> CLabel -> SDoc
+pprAsmCLbl platform (ForeignLabel fs (Just sz) _ _)
+ | platformOS platform == OSMinGW32
+    -- In asm mode, we need to put the suffix on a stdcall ForeignLabel.
+    -- (The C compiler does this itself).
+    = ftext fs <> char '@' <> int sz
+pprAsmCLbl _ lbl
+   = pprCLbl lbl
+
+pprCLbl :: CLabel -> SDoc
+pprCLbl (StringLitLabel u)
+  = pprUniqueAlways u <> text "_str"
+
+pprCLbl (SRTLabel u)
+  = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt"
+
+pprCLbl (LargeBitmapLabel u)  =
+  tempLabelPrefixOrUnderscore
+  <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"
+-- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7')
+-- until that gets resolved we'll just force them to start
+-- with a letter so the label will be legal assembly code.
+
+
+pprCLbl (CmmLabel _ str CmmCode)        = ftext str
+pprCLbl (CmmLabel _ str CmmData)        = ftext str
+pprCLbl (CmmLabel _ str CmmPrimCall)    = ftext str
+
+pprCLbl (LocalBlockLabel u)             =
+    tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u
+
+pprCLbl (RtsLabel (RtsApFast str))   = ftext str <> text "_fast"
+
+pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
+  = sdocWithDynFlags $ \dflags ->
+    ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
+    hcat [text "stg_sel_", text (show offset),
+          ptext (if upd_reqd
+                 then (sLit "_upd_info")
+                 else (sLit "_noupd_info"))
+        ]
+
+pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset))
+  = sdocWithDynFlags $ \dflags ->
+    ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
+    hcat [text "stg_sel_", text (show offset),
+                ptext (if upd_reqd
+                        then (sLit "_upd_entry")
+                        else (sLit "_noupd_entry"))
+        ]
+
+pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity))
+  = sdocWithDynFlags $ \dflags ->
+    ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
+    hcat [text "stg_ap_", text (show arity),
+                ptext (if upd_reqd
+                        then (sLit "_upd_info")
+                        else (sLit "_noupd_info"))
+        ]
+
+pprCLbl (RtsLabel (RtsApEntry upd_reqd arity))
+  = sdocWithDynFlags $ \dflags ->
+    ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
+    hcat [text "stg_ap_", text (show arity),
+                ptext (if upd_reqd
+                        then (sLit "_upd_entry")
+                        else (sLit "_noupd_entry"))
+        ]
+
+pprCLbl (CmmLabel _ fs CmmInfo)
+  = ftext fs <> text "_info"
+
+pprCLbl (CmmLabel _ fs CmmEntry)
+  = ftext fs <> text "_entry"
+
+pprCLbl (CmmLabel _ fs CmmRetInfo)
+  = ftext fs <> text "_info"
+
+pprCLbl (CmmLabel _ fs CmmRet)
+  = ftext fs <> text "_ret"
+
+pprCLbl (CmmLabel _ fs CmmClosure)
+  = ftext fs <> text "_closure"
+
+pprCLbl (RtsLabel (RtsPrimOp primop))
+  = text "stg_" <> ppr primop
+
+pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat))
+  = text "SLOW_CALL_fast_" <> text pat <> ptext (sLit "_ctr")
+
+pprCLbl (ForeignLabel str _ _ _)
+  = ftext str
+
+pprCLbl (IdLabel name _cafs flavor) =
+  internalNamePrefix name <> ppr name <> ppIdFlavor flavor
+
+pprCLbl (CC_Label cc)           = ppr cc
+pprCLbl (CCS_Label ccs)         = ppr ccs
+
+pprCLbl (HpcTicksLabel mod)
+  = text "_hpc_tickboxes_"  <> ppr mod <> ptext (sLit "_hpc")
+
+pprCLbl (AsmTempLabel {})       = panic "pprCLbl AsmTempLabel"
+pprCLbl (AsmTempDerivedLabel {})= panic "pprCLbl AsmTempDerivedLabel"
+pprCLbl (DynamicLinkerLabel {}) = panic "pprCLbl DynamicLinkerLabel"
+pprCLbl (PicBaseLabel {})       = panic "pprCLbl PicBaseLabel"
+pprCLbl (DeadStripPreventer {}) = panic "pprCLbl DeadStripPreventer"
+
+ppIdFlavor :: IdLabelInfo -> SDoc
+ppIdFlavor x = pp_cSEP <> text
+               (case x of
+                       Closure          -> "closure"
+                       InfoTable        -> "info"
+                       LocalInfoTable   -> "info"
+                       Entry            -> "entry"
+                       LocalEntry       -> "entry"
+                       Slow             -> "slow"
+                       RednCounts       -> "ct"
+                       ConEntry         -> "con_entry"
+                       ConInfoTable     -> "con_info"
+                       ClosureTable     -> "closure_tbl"
+                       Bytes            -> "bytes"
+                       BlockInfoTable   -> "info"
+                      )
+
+
+pp_cSEP :: SDoc
+pp_cSEP = char '_'
+
+
+instance Outputable ForeignLabelSource where
+ ppr fs
+  = case fs of
+        ForeignLabelInPackage pkgId     -> parens $ text "package: " <> ppr pkgId
+        ForeignLabelInThisPackage       -> parens $ text "this package"
+        ForeignLabelInExternalPackage   -> parens $ text "external package"
+
+internalNamePrefix :: Name -> SDoc
+internalNamePrefix name = getPprStyle $ \ sty ->
+  if asmStyle sty && isRandomGenerated then
+    sdocWithPlatform $ \platform ->
+      ptext (asmTempLabelPrefix platform)
+  else
+    empty
+  where
+    isRandomGenerated = not $ isExternalName name
+
+tempLabelPrefixOrUnderscore :: SDoc
+tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform ->
+  getPprStyle $ \ sty ->
+   if asmStyle sty then
+      ptext (asmTempLabelPrefix platform)
+   else
+      char '_'
+
+-- -----------------------------------------------------------------------------
+-- Machine-dependent knowledge about labels.
+
+asmTempLabelPrefix :: Platform -> PtrString  -- for formatting labels
+asmTempLabelPrefix platform = case platformOS platform of
+    OSDarwin -> sLit "L"
+    OSAIX    -> sLit "__L" -- follow IBM XL C's convention
+    _        -> sLit ".L"
+
+pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc
+pprDynamicLinkerAsmLabel platform dllInfo lbl =
+    case platformOS platform of
+      OSDarwin
+        | platformArch platform == ArchX86_64 ->
+          case dllInfo of
+            CodeStub        -> char 'L' <> ppr lbl <> text "$stub"
+            SymbolPtr       -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+            GotSymbolPtr    -> ppr lbl <> text "@GOTPCREL"
+            GotSymbolOffset -> ppr lbl
+        | otherwise ->
+          case dllInfo of
+            CodeStub  -> char 'L' <> ppr lbl <> text "$stub"
+            SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+            _         -> panic "pprDynamicLinkerAsmLabel"
+
+      OSAIX ->
+          case dllInfo of
+            SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention
+            _         -> panic "pprDynamicLinkerAsmLabel"
+
+      _ | osElfTarget (platformOS platform) -> elfLabel
+
+      OSMinGW32 ->
+          case dllInfo of
+            SymbolPtr -> text "__imp_" <> ppr lbl
+            _         -> panic "pprDynamicLinkerAsmLabel"
+
+      _ -> panic "pprDynamicLinkerAsmLabel"
+  where
+    elfLabel
+      | platformArch platform == ArchPPC
+      = case dllInfo of
+          CodeStub  -> -- See Note [.LCTOC1 in PPC PIC code]
+                       ppr lbl <> text "+32768@plt"
+          SymbolPtr -> text ".LC_" <> ppr lbl
+          _         -> panic "pprDynamicLinkerAsmLabel"
+
+      | platformArch platform == ArchX86_64
+      = case dllInfo of
+          CodeStub        -> ppr lbl <> text "@plt"
+          GotSymbolPtr    -> ppr lbl <> text "@gotpcrel"
+          GotSymbolOffset -> ppr lbl
+          SymbolPtr       -> text ".LC_" <> ppr lbl
+
+      | platformArch platform == ArchPPC_64 ELF_V1
+        || platformArch platform == ArchPPC_64 ELF_V2
+      = case dllInfo of
+          GotSymbolPtr    -> text ".LC_"  <> ppr lbl
+                                  <> text "@toc"
+          GotSymbolOffset -> ppr lbl
+          SymbolPtr       -> text ".LC_" <> ppr lbl
+          _               -> panic "pprDynamicLinkerAsmLabel"
+
+      | otherwise
+      = case dllInfo of
+          CodeStub        -> ppr lbl <> text "@plt"
+          SymbolPtr       -> text ".LC_" <> ppr lbl
+          GotSymbolPtr    -> ppr lbl <> text "@got"
+          GotSymbolOffset -> ppr lbl <> text "@gotoff"
+
+-- Figure out whether `symbol` may serve as an alias
+-- to `target` within one compilation unit.
+--
+-- This is true if any of these holds:
+-- * `target` is a module-internal haskell name.
+-- * `target` is an exported name, but comes from the same
+--   module as `symbol`
+--
+-- These are sufficient conditions for establishing e.g. a
+-- GNU assembly alias ('.equiv' directive). Sadly, there is
+-- no such thing as an alias to an imported symbol (conf.
+-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/)
+-- See note [emit-time elimination of static indirections].
+--
+-- Precondition is that both labels represent the
+-- same semantic value.
+
+mayRedirectTo :: CLabel -> CLabel -> Bool
+mayRedirectTo symbol target
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isExternalName nam
+ , Just mod <- nameModule_maybe nam
+ , Just anam <- hasHaskellName symbol
+ , Just amod <- nameModule_maybe anam
+ = amod == mod
+
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isInternalName nam
+ = True
+
+ | otherwise = False
+   where staticClosureLabel = isStaticClosureLabel target
+         haskellName = hasHaskellName target
+
+
+{-
+Note [emit-time elimination of static indirections]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As described in #15155, certain static values are representationally
+equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers).
+
+             newtype A = A Int
+             {-# NOINLINE a #-}
+             a = A 42
+
+a1_rYB :: Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []]
+a1_rYB = GHC.Types.I# 42#
+
+a [InlPrag=NOINLINE] :: A
+[GblId, Unf=OtherCon []]
+a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A)
+
+Formerly we created static indirections for these (IND_STATIC), which
+consist of a statically allocated forwarding closure that contains
+the (possibly tagged) indirectee. (See CMM/assembly below.)
+This approach is suboptimal for two reasons:
+  (a) they occupy extra space,
+  (b) they need to be entered in order to obtain the indirectee,
+      thus they cannot be tagged.
+
+Fortunately there is a common case where static indirections can be
+eliminated while emitting assembly (native or LLVM), viz. when the
+indirectee is in the same module (object file) as the symbol that
+points to it. In this case an assembly-level identification can
+be created ('.equiv' directive), and as such the same object will
+be assigned two names in the symbol table. Any of the identified
+symbols can be referenced by a tagged pointer.
+
+Currently the 'mayRedirectTo' predicate will
+give a clue whether a label can be equated with another, already
+emitted, label (which can in turn be an alias). The general mechanics
+is that we identify data (IND_STATIC closures) that are amenable
+to aliasing while pretty-printing of assembly output, and emit the
+'.equiv' directive instead of static data in such a case.
+
+Here is a sketch how the output is massaged:
+
+                     Consider
+newtype A = A Int
+{-# NOINLINE a #-}
+a = A 42                                -- I# 42# is the indirectee
+                                        -- 'a' is exported
+
+                 results in STG
+
+a1_rXq :: GHC.Types.Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []] =
+    CCS_DONT_CARE GHC.Types.I#! [42#];
+
+T15155.a [InlPrag=NOINLINE] :: T15155.A
+[GblId, Unf=OtherCon []] =
+    CAF_ccs  \ u  []  a1_rXq;
+
+                 and CMM
+
+[section ""data" . a1_rXq_closure" {
+     a1_rXq_closure:
+         const GHC.Types.I#_con_info;
+         const 42;
+ }]
+
+[section ""data" . T15155.a_closure" {
+     T15155.a_closure:
+         const stg_IND_STATIC_info;
+         const a1_rXq_closure+1;
+         const 0;
+         const 0;
+ }]
+
+The emitted assembly is
+
+#### INDIRECTEE
+a1_rXq_closure:                         -- module local haskell value
+        .quad   GHC.Types.I#_con_info   -- an Int
+        .quad   42
+
+#### BEFORE
+.globl T15155.a_closure                 -- exported newtype wrapped value
+T15155.a_closure:
+        .quad   stg_IND_STATIC_info     -- the closure info
+        .quad   a1_rXq_closure+1        -- indirectee ('+1' being the tag)
+        .quad   0
+        .quad   0
+
+#### AFTER
+.globl T15155.a_closure                 -- exported newtype wrapped value
+.equiv a1_rXq_closure,T15155.a_closure  -- both are shared
+
+The transformation is performed because
+     T15155.a_closure `mayRedirectTo` a1_rXq_closure+1
+returns True.
+-}
diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs
new file mode 100644
index 0000000000..9200daec57
--- /dev/null
+++ b/compiler/GHC/Cmm/CallConv.hs
@@ -0,0 +1,212 @@
+module GHC.Cmm.CallConv (
+  ParamLocation(..),
+  assignArgumentsPos,
+  assignStack,
+  realArgRegsCover
+) where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+import GHC.Runtime.Layout
+import GHC.Cmm (Convention(..))
+import GHC.Cmm.Ppr () -- For Outputable instances
+
+import DynFlags
+import GHC.Platform
+import Outputable
+
+-- Calculate the 'GlobalReg' or stack locations for function call
+-- parameters as used by the Cmm calling convention.
+
+data ParamLocation
+  = RegisterParam GlobalReg
+  | StackParam ByteOff
+
+instance Outputable ParamLocation where
+  ppr (RegisterParam g) = ppr g
+  ppr (StackParam p)    = ppr p
+
+-- |
+-- Given a list of arguments, and a function that tells their types,
+-- return a list showing where each argument is passed
+--
+assignArgumentsPos :: DynFlags
+                   -> ByteOff           -- stack offset to start with
+                   -> Convention
+                   -> (a -> CmmType)    -- how to get a type from an arg
+                   -> [a]               -- args
+                   -> (
+                        ByteOff              -- bytes of stack args
+                      , [(a, ParamLocation)] -- args and locations
+                      )
+
+assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
+    where
+      regs = case (reps, conv) of
+               (_,   NativeNodeCall)   -> getRegsWithNode dflags
+               (_,   NativeDirectCall) -> getRegsWithoutNode dflags
+               ([_], NativeReturn)     -> allRegs dflags
+               (_,   NativeReturn)     -> getRegsWithNode dflags
+               -- GC calling convention *must* put values in registers
+               (_,   GC)               -> allRegs dflags
+               (_,   Slow)             -> nodeOnly
+      -- The calling conventions first assign arguments to registers,
+      -- then switch to the stack when we first run out of registers
+      -- (even if there are still available registers for args of a
+      -- different type).  When returning an unboxed tuple, we also
+      -- separate the stack arguments by pointerhood.
+      (reg_assts, stk_args)  = assign_regs [] reps regs
+      (stk_off,   stk_assts) = assignStack dflags off arg_ty stk_args
+      assignments = reg_assts ++ stk_assts
+
+      assign_regs assts []     _    = (assts, [])
+      assign_regs assts (r:rs) regs | isVecType ty   = vec
+                                    | isFloatType ty = float
+                                    | otherwise      = int
+        where vec = case (w, regs) of
+                      (W128, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+                      (W256, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+                      (W512, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+                      _ -> (assts, (r:rs))
+              float = case (w, regs) of
+                        (W32, (vs, fs, ds, ls, s:ss))
+                            | passFloatInXmm          -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
+                        (W32, (vs, f:fs, ds, ls, ss))
+                            | not passFloatInXmm      -> k (RegisterParam f, (vs, fs, ds, ls, ss))
+                        (W64, (vs, fs, ds, ls, s:ss))
+                            | passFloatInXmm          -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
+                        (W64, (vs, fs, d:ds, ls, ss))
+                            | not passFloatInXmm      -> k (RegisterParam d, (vs, fs, ds, ls, ss))
+                        _ -> (assts, (r:rs))
+              int = case (w, regs) of
+                      (W128, _) -> panic "W128 unsupported register type"
+                      (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags)
+                          -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss))
+                      (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
+                          -> k (RegisterParam l, (vs, fs, ds, ls, ss))
+                      _   -> (assts, (r:rs))
+              k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
+              ty = arg_ty r
+              w  = typeWidth ty
+              gcp | isGcPtrType ty = VGcPtr
+                  | otherwise      = VNonGcPtr
+              passFloatInXmm = passFloatArgsInXmm dflags
+
+passFloatArgsInXmm :: DynFlags -> Bool
+passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
+                              ArchX86_64 -> True
+                              ArchX86    -> False
+                              _          -> False
+
+-- We used to spill vector registers to the stack since the LLVM backend didn't
+-- support vector registers in its calling convention. However, this has now
+-- been fixed. This function remains only as a convenient way to re-enable
+-- spilling when debugging code generation.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg _ _ = True
+
+assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
+            -> (
+                 ByteOff              -- bytes of stack args
+               , [(a, ParamLocation)] -- args and locations
+               )
+assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
+ where
+      assign_stk offset assts [] = (offset, assts)
+      assign_stk offset assts (r:rs)
+        = assign_stk off' ((r, StackParam off') : assts) rs
+        where w    = typeWidth (arg_ty r)
+              off' = offset + size
+              -- Stack arguments always take a whole number of words, we never
+              -- pack them unlike constructor fields.
+              size = roundUpToWords dflags (widthInBytes w)
+
+-----------------------------------------------------------------------------
+-- Local information about the registers available
+
+type AvailRegs = ( [VGcPtr -> GlobalReg]   -- available vanilla regs.
+                 , [GlobalReg]   -- floats
+                 , [GlobalReg]   -- doubles
+                 , [GlobalReg]   -- longs (int64 and word64)
+                 , [Int]         -- XMM (floats and doubles)
+                 )
+
+-- Vanilla registers can contain pointers, Ints, Chars.
+-- Floats and doubles have separate register supplies.
+--
+-- We take these register supplies from the *real* registers, i.e. those
+-- that are guaranteed to map to machine registers.
+
+getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs
+getRegsWithoutNode dflags =
+  ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags)
+  , realFloatRegs dflags
+  , realDoubleRegs dflags
+  , realLongRegs dflags
+  , realXmmRegNos dflags)
+
+-- getRegsWithNode uses R1/node even if it isn't a register
+getRegsWithNode dflags =
+  ( if null (realVanillaRegs dflags)
+    then [VanillaReg 1]
+    else realVanillaRegs dflags
+  , realFloatRegs dflags
+  , realDoubleRegs dflags
+  , realLongRegs dflags
+  , realXmmRegNos dflags)
+
+allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg]
+allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
+allXmmRegs :: DynFlags -> [Int]
+
+allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags)
+allFloatRegs   dflags = map FloatReg   $ regList (mAX_Float_REG   dflags)
+allDoubleRegs  dflags = map DoubleReg  $ regList (mAX_Double_REG  dflags)
+allLongRegs    dflags = map LongReg    $ regList (mAX_Long_REG    dflags)
+allXmmRegs     dflags =                  regList (mAX_XMM_REG     dflags)
+
+realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg]
+realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
+realXmmRegNos :: DynFlags -> [Int]
+
+realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
+realFloatRegs   dflags = map FloatReg   $ regList (mAX_Real_Float_REG   dflags)
+realDoubleRegs  dflags = map DoubleReg  $ regList (mAX_Real_Double_REG  dflags)
+realLongRegs    dflags = map LongReg    $ regList (mAX_Real_Long_REG    dflags)
+
+realXmmRegNos dflags
+    | isSse2Enabled dflags = regList (mAX_Real_XMM_REG     dflags)
+    | otherwise            = []
+
+regList :: Int -> [Int]
+regList n = [1 .. n]
+
+allRegs :: DynFlags -> AvailRegs
+allRegs dflags = (allVanillaRegs dflags,
+                  allFloatRegs dflags,
+                  allDoubleRegs dflags,
+                  allLongRegs dflags,
+                  allXmmRegs dflags)
+
+nodeOnly :: AvailRegs
+nodeOnly = ([VanillaReg 1], [], [], [], [])
+
+-- This returns the set of global registers that *cover* the machine registers
+-- used for argument passing. On platforms where registers can overlap---right
+-- now just x86-64, where Float and Double registers overlap---passing this set
+-- of registers is guaranteed to preserve the contents of all live registers. We
+-- only use this functionality in hand-written C-- code in the RTS.
+realArgRegsCover :: DynFlags -> [GlobalReg]
+realArgRegsCover dflags
+    | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
+                                  realLongRegs dflags ++
+                                  map XmmReg (realXmmRegNos dflags)
+    | otherwise                 = map ($VGcPtr) (realVanillaRegs dflags) ++
+                                  realFloatRegs dflags ++
+                                  realDoubleRegs dflags ++
+                                  realLongRegs dflags ++
+                                  map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs
new file mode 100644
index 0000000000..86ea0e94e2
--- /dev/null
+++ b/compiler/GHC/Cmm/CommonBlockElim.hs
@@ -0,0 +1,320 @@
+{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-}
+
+module GHC.Cmm.CommonBlockElim
+  ( elimCommonBlocks
+  )
+where
+
+
+import GhcPrelude hiding (iterate, succ, unzip, zip)
+
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch (eqSwitchTargetWith)
+import GHC.Cmm.ContFlowOpt
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import Data.Bits
+import Data.Maybe (mapMaybe)
+import qualified Data.List as List
+import Data.Word
+import qualified Data.Map as M
+import Outputable
+import qualified TrieMap as TM
+import UniqFM
+import Unique
+import Control.Arrow (first, second)
+
+-- -----------------------------------------------------------------------------
+-- Eliminate common blocks
+
+-- If two blocks are identical except for the label on the first node,
+-- then we can eliminate one of the blocks. To ensure that the semantics
+-- of the program are preserved, we have to rewrite each predecessor of the
+-- eliminated block to proceed with the block we keep.
+
+-- The algorithm iterates over the blocks in the graph,
+-- checking whether it has seen another block that is equal modulo labels.
+-- If so, then it adds an entry in a map indicating that the new block
+-- is made redundant by the old block.
+-- Otherwise, it is added to the useful blocks.
+
+-- To avoid comparing every block with every other block repeatedly, we group
+-- them by
+--   * a hash of the block, ignoring labels (explained below)
+--   * the list of outgoing labels
+-- The hash is invariant under relabeling, so we only ever compare within
+-- the same group of blocks.
+--
+-- The list of outgoing labels is updated as we merge blocks (that is why they
+-- are not included in the hash, which we want to calculate only once).
+--
+-- All in all, two blocks should never be compared if they have different
+-- hashes, and at most once otherwise. Previously, we were slower, and people
+-- rightfully complained: #10397
+
+-- TODO: Use optimization fuel
+elimCommonBlocks :: CmmGraph -> CmmGraph
+elimCommonBlocks g = replaceLabels env $ copyTicks env g
+  where
+     env = iterate mapEmpty blocks_with_key
+     -- The order of blocks doesn't matter here. While we could use
+     -- revPostorder which drops unreachable blocks this is done in
+     -- ContFlowOpt already which runs before this pass. So we use
+     -- toBlockList since it is faster.
+     groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]]
+     blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
+
+-- Invariant: The blocks in the list are pairwise distinct
+-- (so avoid comparing them again)
+type DistinctBlocks = [CmmBlock]
+type Key = [Label]
+type Subst = LabelMap BlockId
+
+-- The outer list groups by hash. We retain this grouping throughout.
+iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
+iterate subst blocks
+    | mapNull new_substs = subst
+    | otherwise = iterate subst' updated_blocks
+  where
+    grouped_blocks :: [[(Key, [DistinctBlocks])]]
+    grouped_blocks = map groupByLabel blocks
+
+    merged_blocks :: [[(Key, DistinctBlocks)]]
+    (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks
+      where
+        go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db))
+          where
+            (new_subst2, db) = mergeBlockList subst dbs
+
+    subst' = subst `mapUnion` new_substs
+    updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
+
+-- Combine two lists of blocks.
+-- While they are internally distinct they can still share common blocks.
+mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks)
+mergeBlocks subst existing new = go new
+  where
+    go [] = (mapEmpty, existing)
+    go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of
+        -- This block is a duplicate. Drop it, and add it to the substitution
+        Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs
+        -- This block is not a duplicate, keep it.
+        Nothing -> second (b:) $ go bs
+
+mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks)
+mergeBlockList _ [] = pprPanic "mergeBlockList" empty
+mergeBlockList subst (b:bs) = go mapEmpty b bs
+  where
+    go !new_subst1 b [] = (new_subst1, b)
+    go !new_subst1 b1 (b2:bs) = go new_subst b bs
+      where
+        (new_subst2, b) =  mergeBlocks subst b1 b2
+        new_subst = new_subst1 `mapUnion` new_subst2
+
+
+-- -----------------------------------------------------------------------------
+-- Hashing and equality on blocks
+
+-- Below here is mostly boilerplate: hashing blocks ignoring labels,
+-- and comparing blocks modulo a label mapping.
+
+-- To speed up comparisons, we hash each basic block modulo jump labels.
+-- The hashing is a bit arbitrary (the numbers are completely arbitrary),
+-- but it should be fast and good enough.
+
+-- We want to get as many small buckets as possible, as comparing blocks is
+-- expensive. So include as much as possible in the hash. Ideally everything
+-- that is compared with (==) in eqBlockBodyWith.
+
+type HashCode = Int
+
+hash_block :: CmmBlock -> HashCode
+hash_block block =
+  fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
+  -- UniqFM doesn't like negative Ints
+  where hash_fst _ h = h
+        hash_mid m h = hash_node m + h `shiftL` 1
+        hash_lst m h = hash_node m + h `shiftL` 1
+
+        hash_node :: CmmNode O x -> Word32
+        hash_node n | dont_care n = 0 -- don't care
+        hash_node (CmmAssign r e) = hash_reg r + hash_e e
+        hash_node (CmmStore e e') = hash_e e + hash_e e'
+        hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
+        hash_node (CmmBranch _) = 23 -- NB. ignore the label
+        hash_node (CmmCondBranch p _ _ _) = hash_e p
+        hash_node (CmmCall e _ _ _ _ _) = hash_e e
+        hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t
+        hash_node (CmmSwitch e _) = hash_e e
+        hash_node _ = error "hash_node: unknown Cmm node!"
+
+        hash_reg :: CmmReg -> Word32
+        hash_reg   (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397
+        hash_reg   (CmmGlobal _)    = 19
+
+        hash_e :: CmmExpr -> Word32
+        hash_e (CmmLit l) = hash_lit l
+        hash_e (CmmLoad e _) = 67 + hash_e e
+        hash_e (CmmReg r) = hash_reg r
+        hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check
+        hash_e (CmmRegOff r i) = hash_reg r + cvt i
+        hash_e (CmmStackSlot _ _) = 13
+
+        hash_lit :: CmmLit -> Word32
+        hash_lit (CmmInt i _) = fromInteger i
+        hash_lit (CmmFloat r _) = truncate r
+        hash_lit (CmmVec ls) = hash_list hash_lit ls
+        hash_lit (CmmLabel _) = 119 -- ugh
+        hash_lit (CmmLabelOff _ i) = cvt $ 199 + i
+        hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i
+        hash_lit (CmmBlock _) = 191 -- ugh
+        hash_lit (CmmHighStackMark) = cvt 313
+
+        hash_tgt (ForeignTarget e _) = hash_e e
+        hash_tgt (PrimTarget _) = 31 -- lots of these
+
+        hash_list f = foldl' (\z x -> f x + z) (0::Word32)
+
+        cvt = fromInteger . toInteger
+
+        hash_unique :: Uniquable a => a -> Word32
+        hash_unique = cvt . getKey . getUnique
+
+-- | Ignore these node types for equality
+dont_care :: CmmNode O x -> Bool
+dont_care CmmComment {}  = True
+dont_care CmmTick {}     = True
+dont_care CmmUnwind {}   = True
+dont_care _other         = False
+
+-- Utilities: equality and substitution on the graph.
+
+-- Given a map ``subst'' from BlockID -> BlockID, we define equality.
+eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool
+eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
+lookupBid :: LabelMap BlockId -> BlockId -> BlockId
+lookupBid subst bid = case mapLookup bid subst of
+                        Just bid  -> lookupBid subst bid
+                        Nothing -> bid
+
+-- Middle nodes and expressions can contain BlockIds, in particular in
+-- CmmStackSlot and CmmBlock, so we have to use a special equality for
+-- these.
+--
+eqMiddleWith :: (BlockId -> BlockId -> Bool)
+             -> CmmNode O O -> CmmNode O O -> Bool
+eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
+  = r1 == r2 && eqExprWith eqBid e1 e2
+eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
+  = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
+eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
+                   (CmmUnsafeForeignCall t2 r2 a2)
+  = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2
+eqMiddleWith _ _ _ = False
+
+eqExprWith :: (BlockId -> BlockId -> Bool)
+           -> CmmExpr -> CmmExpr -> Bool
+eqExprWith eqBid = eq
+ where
+  CmmLit l1          `eq` CmmLit l2          = eqLit l1 l2
+  CmmLoad e1 _       `eq` CmmLoad e2 _       = e1 `eq` e2
+  CmmReg r1          `eq` CmmReg r2          = r1==r2
+  CmmRegOff r1 i1    `eq` CmmRegOff r2 i2    = r1==r2 && i1==i2
+  CmmMachOp op1 es1  `eq` CmmMachOp op2 es2  = op1==op2 && es1 `eqs` es2
+  CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
+  _e1                `eq` _e2                = False
+
+  xs `eqs` ys = eqListWith eq xs ys
+
+  eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
+  eqLit l1 l2 = l1 == l2
+
+  eqArea Old Old = True
+  eqArea (Young id1) (Young id2) = eqBid id1 id2
+  eqArea _ _ = False
+
+-- Equality on the body of a block, modulo a function mapping block
+-- IDs to block IDs.
+eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool
+eqBlockBodyWith eqBid block block'
+  {-
+  | equal     = pprTrace "equal" (vcat [ppr block, ppr block']) True
+  | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False
+  -}
+  = equal
+  where (_,m,l)   = blockSplit block
+        nodes     = filter (not . dont_care) (blockToList m)
+        (_,m',l') = blockSplit block'
+        nodes'    = filter (not . dont_care) (blockToList m')
+
+        equal = eqListWith (eqMiddleWith eqBid) nodes nodes' &&
+                eqLastWith eqBid l l'
+
+
+eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool
+eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2
+eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) =
+  c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2
+eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) =
+  t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2
+eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) =
+  e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2
+eqLastWith _ _ _ = False
+
+eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool
+eqMaybeWith eltEq (Just e) (Just e') = eltEq e e'
+eqMaybeWith _ Nothing Nothing = True
+eqMaybeWith _ _ _ = False
+
+eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool
+eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs
+eqListWith _ []       []       = True
+eqListWith _ _        _        = False
+
+-- | Given a block map, ensure that all "target" blocks are covered by
+-- the same ticks as the respective "source" blocks. This not only
+-- means copying ticks, but also adjusting tick scopes where
+-- necessary.
+copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph
+copyTicks env g
+  | mapNull env = g
+  | otherwise   = ofBlockMap (g_entry g) $ mapMap copyTo blockMap
+  where -- Reverse block merge map
+        blockMap = toBlockMap g
+        revEnv = mapFoldlWithKey insertRev M.empty env
+        insertRev m k x = M.insertWith (const (k:)) x [k] m
+        -- Copy ticks and scopes into the given block
+        copyTo block = case M.lookup (entryLabel block) revEnv of
+          Nothing -> block
+          Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls
+        copy from to =
+          let ticks = blockTicks from
+              CmmEntry  _   scp0        = firstNode from
+              (CmmEntry lbl scp1, code) = blockSplitHead to
+          in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead`
+             foldr blockCons code (map CmmTick ticks)
+
+-- Group by [Label]
+-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap.
+groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])]
+groupByLabel =
+  go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks]))
+    where
+      go !m [] = TM.foldTM (:) m []
+      go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries
+        where --k' = map (getKey . getUnique) k
+              adjust Nothing       = Just (k,[v])
+              adjust (Just (_,vs)) = Just (k,v:vs)
+
+groupByInt :: (a -> Int) -> [a] -> [[a]]
+groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs
+   -- See Note [Unique Determinism and code generation]
+  where
+    go m x = alterUFM addEntry m (f x)
+      where
+        addEntry xs = Just $! maybe [x] (x:) xs
diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs
new file mode 100644
index 0000000000..7765972d02
--- /dev/null
+++ b/compiler/GHC/Cmm/ContFlowOpt.hs
@@ -0,0 +1,451 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+module GHC.Cmm.ContFlowOpt
+    ( cmmCfgOpts
+    , cmmCfgOptsProc
+    , removeUnreachableBlocksProc
+    , replaceLabels
+    )
+where
+
+import GhcPrelude hiding (succ, unzip, zip)
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
+import Maybes
+import Panic
+import Util
+
+import Control.Monad
+
+
+-- Note [What is shortcutting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this Cmm code:
+--
+-- L1: ...
+--     goto L2;
+-- L2: goto L3;
+-- L3: ...
+--
+-- Here L2 is an empty block and contains only an unconditional branch
+-- to L3. In this situation any block that jumps to L2 can jump
+-- directly to L3:
+--
+-- L1: ...
+--     goto L3;
+-- L2: goto L3;
+-- L3: ...
+--
+-- In this situation we say that we shortcut L2 to L3. One of
+-- consequences of shortcutting is that some blocks of code may become
+-- unreachable (in the example above this is true for L2).
+
+
+-- Note [Control-flow optimisations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- This optimisation does three things:
+--
+--   - If a block finishes in an unconditional branch to another block
+--     and that is the only jump to that block we concatenate the
+--     destination block at the end of the current one.
+--
+--   - If a block finishes in a call whose continuation block is a
+--     goto, then we can shortcut the destination, making the
+--     continuation block the destination of the goto - but see Note
+--     [Shortcut call returns].
+--
+--   - For any block that is not a call we try to shortcut the
+--     destination(s). Additionally, if a block ends with a
+--     conditional branch we try to invert the condition.
+--
+-- Blocks are processed using postorder DFS traversal. A side effect
+-- of determining traversal order with a graph search is elimination
+-- of any blocks that are unreachable.
+--
+-- Transformations are improved by working from the end of the graph
+-- towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- Note [Shortcut call returns]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We are going to maintain the "current" graph (LabelMap CmmBlock) as
+-- we go, and also a mapping from BlockId to BlockId, representing
+-- continuation labels that we have renamed.  This latter mapping is
+-- important because we might shortcut a CmmCall continuation.  For
+-- example:
+--
+--    Sp[0] = L
+--    call g returns to L
+--    L: goto M
+--    M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L):
+--
+--    Sp[0] = M
+--    call g returns to M
+--    M: ...
+--
+-- So we keep track of which labels we have renamed and apply the mapping
+-- at the end with replaceLabels.
+
+
+-- Note [Shortcut call returns and proc-points]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this code that you might get from a recursive
+-- let-no-escape:
+--
+--       goto L1
+--      L1:
+--       if (Hp > HpLim) then L2 else L3
+--      L2:
+--       call stg_gc_noregs returns to L4
+--      L4:
+--       goto L1
+--      L3:
+--       ...
+--       goto L1
+--
+-- Then the control-flow optimiser shortcuts L4.  But that turns L1
+-- into the call-return proc point, and every iteration of the loop
+-- has to shuffle variables to and from the stack.  So we must *not*
+-- shortcut L4.
+--
+-- Moreover not shortcutting call returns is probably fine.  If L4 can
+-- concat with its branch target then it will still do so.  And we
+-- save some compile time because we don't have to traverse all the
+-- code in replaceLabels.
+--
+-- However, we probably do want to do this if we are splitting proc
+-- points, because L1 will be a proc-point anyway, so merging it with
+-- L4 reduces the number of proc points.  Unfortunately recursive
+-- let-no-escapes won't generate very good code with proc-point
+-- splitting on - we should probably compile them to explicitly use
+-- the native calling convention instead.
+
+cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
+cmmCfgOpts split g = fst (blockConcat split g)
+
+cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
+cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
+    where (g', env) = blockConcat split g
+          info' = info{ info_tbls = new_info_tbls }
+          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
+
+          -- If we changed any labels, then we have to update the info tables
+          -- too, except for the top-level info table because that might be
+          -- referred to by other procs.
+          upd_info (k,info)
+             | Just k' <- mapLookup k env
+             = (k', if k' == g_entry g'
+                       then info
+                       else info{ cit_lbl = infoTblLbl k' })
+             | otherwise
+             = (k,info)
+cmmCfgOptsProc _ top = top
+
+
+blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
+blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
+  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
+  where
+     -- We might be able to shortcut the entry BlockId itself.
+     -- Remember to update the shortcut_map, since we also have to
+     -- update the info_tbls mapping now.
+     (new_entry, shortcut_map')
+       | Just entry_blk <- mapLookup entry_id new_blocks
+       , Just dest      <- canShortcut entry_blk
+       = (dest, mapInsert entry_id dest shortcut_map)
+       | otherwise
+       = (entry_id, shortcut_map)
+
+     -- blocks are sorted in reverse postorder, but we want to go from the exit
+     -- towards beginning, so we use foldr below.
+     blocks = revPostorder g
+     blockmap = foldl' (flip addBlock) emptyBody blocks
+
+     -- Accumulator contains three components:
+     --  * map of blocks in a graph
+     --  * map of shortcut labels. See Note [Shortcut call returns]
+     --  * map containing number of predecessors for each block. We discard
+     --    it after we process all blocks.
+     (new_blocks, shortcut_map, _) =
+           foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
+
+     -- Map of predecessors for initial graph. We increase number of
+     -- predecessors for entry block by one to denote that it is
+     -- target of a jump, even if no block in the current graph jumps
+     -- to it.
+     initialBackEdges = incPreds entry_id (predMap blocks)
+
+     maybe_concat :: CmmBlock
+                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+     maybe_concat block (!blocks, !shortcut_map, !backEdges)
+        -- If:
+        --   (1) current block ends with unconditional branch to b' and
+        --   (2) it has exactly one predecessor (namely, current block)
+        --
+        -- Then:
+        --   (1) append b' block at the end of current block
+        --   (2) remove b' from the map of blocks
+        --   (3) remove information about b' from predecessors map
+        --
+        -- Since we know that the block has only one predecessor we call
+        -- mapDelete directly instead of calling decPreds.
+        --
+        -- Note that we always maintain an up-to-date list of predecessors, so
+        -- we can ignore the contents of shortcut_map
+        | CmmBranch b' <- last
+        , hasOnePredecessor b'
+        , Just blk' <- mapLookup b' blocks
+        = let bid' = entryLabel blk'
+          in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
+             , shortcut_map
+             , mapDelete b' backEdges )
+
+        -- If:
+        --   (1) we are splitting proc points (see Note
+        --       [Shortcut call returns and proc-points]) and
+        --   (2) current block is a CmmCall or CmmForeignCall with
+        --       continuation b' and
+        --   (3) we can shortcut that continuation to dest
+        -- Then:
+        --   (1) we change continuation to point to b'
+        --   (2) create mapping from b' to dest
+        --   (3) increase number of predecessors of dest by 1
+        --   (4) decrease number of predecessors of b' by 1
+        --
+        -- Later we will use replaceLabels to substitute all occurrences of b'
+        -- with dest.
+        | splitting_procs
+        , Just b'   <- callContinuation_maybe last
+        , Just blk' <- mapLookup b' blocks
+        , Just dest <- canShortcut blk'
+        = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
+          , mapInsert b' dest shortcut_map
+          , decPreds b' $ incPreds dest backEdges )
+
+        -- If:
+        --   (1) a block does not end with a call
+        -- Then:
+        --   (1) if it ends with a conditional attempt to invert the
+        --       conditional
+        --   (2) attempt to shortcut all destination blocks
+        --   (3) if new successors of a block are different from the old ones
+        --       update the of predecessors accordingly
+        --
+        -- A special case of this is a situation when a block ends with an
+        -- unconditional jump to a block that can be shortcut.
+        | Nothing <- callContinuation_maybe last
+        = let oldSuccs = successors last
+              newSuccs = successors rewrite_last
+          in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
+             , shortcut_map
+             , if oldSuccs == newSuccs
+               then backEdges
+               else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
+
+        -- Otherwise don't do anything
+        | otherwise
+        = ( blocks, shortcut_map, backEdges )
+        where
+          (head, last) = blockSplitTail block
+          bid = entryLabel block
+
+          -- Changes continuation of a call to a specified label
+          update_cont dest =
+              case last of
+                CmmCall{}        -> last { cml_cont = Just dest }
+                CmmForeignCall{} -> last { succ = dest }
+                _                -> panic "Can't shortcut continuation."
+
+          -- Attempts to shortcut successors of last node
+          shortcut_last = mapSuccessors shortcut last
+            where
+              shortcut l =
+                 case mapLookup l blocks of
+                   Just b | Just dest <- canShortcut b -> dest
+                   _otherwise -> l
+
+          rewrite_last
+            -- Sometimes we can get rid of the conditional completely.
+            | CmmCondBranch _cond t f _l <- shortcut_last
+            , t == f
+            = CmmBranch t
+
+            -- See Note [Invert Cmm conditionals]
+            | CmmCondBranch cond t f l <- shortcut_last
+            , hasOnePredecessor t -- inverting will make t a fallthrough
+            , likelyTrue l || (numPreds f > 1)
+            , Just cond' <- maybeInvertCmmExpr cond
+            = CmmCondBranch cond' f t (invertLikeliness l)
+
+            -- If all jump destinations of a switch go to the
+            -- same target eliminate the switch.
+            | CmmSwitch _expr targets <- shortcut_last
+            , (t:ts) <- switchTargetsToList targets
+            , all (== t) ts
+            = CmmBranch t
+
+            | otherwise
+            = shortcut_last
+
+          likelyTrue (Just True)   = True
+          likelyTrue _             = False
+
+          invertLikeliness :: Maybe Bool -> Maybe Bool
+          invertLikeliness         = fmap not
+
+          -- Number of predecessors for a block
+          numPreds bid = mapLookup bid backEdges `orElse` 0
+
+          hasOnePredecessor b = numPreds b == 1
+
+{-
+  Note [Invert Cmm conditionals]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  The native code generator always produces jumps to the true branch.
+  Falling through to the false branch is however faster. So we try to
+  arrange for that to happen.
+  This means we invert the condition if:
+  * The likely path will become a fallthrough.
+  * We can't guarantee a fallthrough for the false branch but for the
+    true branch.
+
+  In some cases it's faster to avoid inverting when the false branch is likely.
+  However determining when that is the case is neither easy nor cheap so for
+  now we always invert as this produces smaller binaries and code that is
+  equally fast on average. (On an i7-6700K)
+
+  TODO:
+  There is also the edge case when both branches have multiple predecessors.
+  In this case we could assume that we will end up with a jump for BOTH
+  branches. In this case it might be best to put the likely path in the true
+  branch especially if there are large numbers of predecessors as this saves
+  us the jump thats not taken. However I haven't tested this and as of early
+  2018 we almost never generate cmm where this would apply.
+-}
+
+-- Functions for incrementing and decrementing number of predecessors. If
+-- decrementing would set the predecessor count to 0, we remove entry from the
+-- map.
+-- Invariant: if a block has no predecessors it should be dropped from the
+-- graph because it is unreachable. maybe_concat is constructed to maintain
+-- that invariant, but calling replaceLabels may introduce unreachable blocks.
+-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
+-- blocks.
+incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
+incPreds bid edges = mapInsertWith (+) bid 1 edges
+decPreds bid edges = case mapLookup bid edges of
+                       Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
+                       Just _                 -> mapDelete bid edges
+                       _                      -> edges
+
+
+-- Checks if a block consists only of "goto dest". If it does than we return
+-- "Just dest" label. See Note [What is shortcutting]
+canShortcut :: CmmBlock -> Maybe BlockId
+canShortcut block
+    | (_, middle, CmmBranch dest) <- blockSplit block
+    , all dont_care $ blockToList middle
+    = Just dest
+    | otherwise
+    = Nothing
+    where dont_care CmmComment{} = True
+          dont_care CmmTick{}    = True
+          dont_care _other       = False
+
+-- Concatenates two blocks. First one is assumed to be open on exit, the second
+-- is assumed to be closed on entry (i.e. it has a label attached to it, which
+-- the splice function removes by calling snd on result of blockSplitHead).
+splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
+  where (CmmEntry lbl sc0, code0) = blockSplitHead head
+        (CmmEntry _   sc1, code1) = blockSplitHead rest
+        entry = CmmEntry lbl (combineTickScopes sc0 sc1)
+
+-- If node is a call with continuation call return Just label of that
+-- continuation. Otherwise return Nothing.
+callContinuation_maybe :: CmmNode O C -> Maybe BlockId
+callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
+callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
+callContinuation_maybe _ = Nothing
+
+
+-- Map over the CmmGraph, replacing each label with its mapping in the
+-- supplied LabelMap.
+replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
+replaceLabels env g
+  | mapNull env = g
+  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
+   where
+     replace_eid g = g {g_entry = lookup (g_entry g)}
+     lookup id = mapLookup id env `orElse` id
+
+     txnode :: CmmNode e x -> CmmNode e x
+     txnode (CmmBranch bid) = CmmBranch (lookup bid)
+     txnode (CmmCondBranch p t f l) =
+       mkCmmCondBranch (exp p) (lookup t) (lookup f) l
+     txnode (CmmSwitch e ids) =
+       CmmSwitch (exp e) (mapSwitchTargets lookup ids)
+     txnode (CmmCall t k rg a res r) =
+       CmmCall (exp t) (liftM lookup k) rg a res r
+     txnode fc@CmmForeignCall{} =
+       fc{ args = map exp (args fc), succ = lookup (succ fc) }
+     txnode other = mapExpDeep exp other
+
+     exp :: CmmExpr -> CmmExpr
+     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
+     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
+     exp e                                      = e
+
+mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
+mkCmmCondBranch p t f l =
+  if t == f then CmmBranch t else CmmCondBranch p t f l
+
+-- Build a map from a block to its set of predecessors.
+predMap :: [CmmBlock] -> LabelMap Int
+predMap blocks = foldr add_preds mapEmpty blocks
+  where
+    add_preds block env = foldr add env (successors block)
+      where add lbl env = mapInsertWith (+) lbl 1 env
+
+-- Removing unreachable blocks
+removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
+removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
+   | used_blocks `lengthLessThan` mapSize (toBlockMap g)
+   = CmmProc info' lbl live g'
+   | otherwise
+   = proc
+   where
+     g'    = ofBlockList (g_entry g) used_blocks
+     info' = info { info_tbls = keep_used (info_tbls info) }
+             -- Remove any info_tbls for unreachable
+
+     keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
+     keep_used bs = mapFoldlWithKey keep mapEmpty bs
+
+     keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
+     keep env l i | l `setMember` used_lbls = mapInsert l i env
+                  | otherwise               = env
+
+     used_blocks :: [CmmBlock]
+     used_blocks = revPostorder g
+
+     used_lbls :: LabelSet
+     used_lbls = setFromList $ map entryLabel used_blocks
diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs
new file mode 100644
index 0000000000..fcabb1df0f
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow.hs
@@ -0,0 +1,441 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiParamTypeClasses  #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+
+--
+-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones,
+-- and Norman Ramsey
+--
+-- Modifications copyright (c) The University of Glasgow 2012
+--
+-- This module is a specialised and optimised version of
+-- Compiler.Hoopl.Dataflow in the hoopl package.  In particular it is
+-- specialised to the UniqSM monad.
+--
+
+module GHC.Cmm.Dataflow
+  ( C, O, Block
+  , lastNode, entryLabel
+  , foldNodesBwdOO
+  , foldRewriteNodesBwdOO
+  , DataflowLattice(..), OldFact(..), NewFact(..), JoinedFact(..)
+  , TransferFun, RewriteFun
+  , Fact, FactBase
+  , getFact, mkFactBase
+  , analyzeCmmFwd, analyzeCmmBwd
+  , rewriteCmmBwd
+  , changedIf
+  , joinOutFacts
+  , joinFacts
+  )
+where
+
+import GhcPrelude
+
+import GHC.Cmm
+import UniqSupply
+
+import Data.Array
+import Data.Maybe
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IntSet
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+
+type family   Fact (x :: Extensibility) f :: *
+type instance Fact C f = FactBase f
+type instance Fact O f = f
+
+newtype OldFact a = OldFact a
+
+newtype NewFact a = NewFact a
+
+-- | The result of joining OldFact and NewFact.
+data JoinedFact a
+    = Changed !a     -- ^ Result is different than OldFact.
+    | NotChanged !a  -- ^ Result is the same as OldFact.
+
+getJoined :: JoinedFact a -> a
+getJoined (Changed a) = a
+getJoined (NotChanged a) = a
+
+changedIf :: Bool -> a -> JoinedFact a
+changedIf True = Changed
+changedIf False = NotChanged
+
+type JoinFun a = OldFact a -> NewFact a -> JoinedFact a
+
+data DataflowLattice a = DataflowLattice
+    { fact_bot :: a
+    , fact_join :: JoinFun a
+    }
+
+data Direction = Fwd | Bwd
+
+type TransferFun f = CmmBlock -> FactBase f -> FactBase f
+
+-- | Function for rewrtiting and analysis combined. To be used with
+-- @rewriteCmm@.
+--
+-- Currently set to work with @UniqSM@ monad, but we could probably abstract
+-- that away (if we do that, we might want to specialize the fixpoint algorithms
+-- to the particular monads through SPECIALIZE).
+type RewriteFun f = CmmBlock -> FactBase f -> UniqSM (CmmBlock, FactBase f)
+
+analyzeCmmBwd, analyzeCmmFwd
+    :: DataflowLattice f
+    -> TransferFun f
+    -> CmmGraph
+    -> FactBase f
+    -> FactBase f
+analyzeCmmBwd = analyzeCmm Bwd
+analyzeCmmFwd = analyzeCmm Fwd
+
+analyzeCmm
+    :: Direction
+    -> DataflowLattice f
+    -> TransferFun f
+    -> CmmGraph
+    -> FactBase f
+    -> FactBase f
+analyzeCmm dir lattice transfer cmmGraph initFact =
+    {-# SCC analyzeCmm #-}
+    let entry = g_entry cmmGraph
+        hooplGraph = g_graph cmmGraph
+        blockMap =
+            case hooplGraph of
+                GMany NothingO bm NothingO -> bm
+    in fixpointAnalysis dir lattice transfer entry blockMap initFact
+
+-- Fixpoint algorithm.
+fixpointAnalysis
+    :: forall f.
+       Direction
+    -> DataflowLattice f
+    -> TransferFun f
+    -> Label
+    -> LabelMap CmmBlock
+    -> FactBase f
+    -> FactBase f
+fixpointAnalysis direction lattice do_block entry blockmap = loop start
+  where
+    -- Sorting the blocks helps to minimize the number of times we need to
+    -- process blocks. For instance, for forward analysis we want to look at
+    -- blocks in reverse postorder. Also, see comments for sortBlocks.
+    blocks     = sortBlocks direction entry blockmap
+    num_blocks = length blocks
+    block_arr  = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
+    start      = {-# SCC "start" #-} IntSet.fromDistinctAscList
+      [0 .. num_blocks - 1]
+    dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+    join       = fact_join lattice
+
+    loop
+        :: IntHeap     -- ^ Worklist, i.e., blocks to process
+        -> FactBase f  -- ^ Current result (increases monotonically)
+        -> FactBase f
+    loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo =
+        let block = block_arr ! index
+            out_facts = {-# SCC "do_block" #-} do_block block fbase1
+            -- For each of the outgoing edges, we join it with the current
+            -- information in fbase1 and (if something changed) we update it
+            -- and add the affected blocks to the worklist.
+            (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-}
+                mapFoldlWithKey
+                    (updateFact join dep_blocks) (todo1, fbase1) out_facts
+        in loop todo2 fbase2
+    loop _ !fbase1 = fbase1
+
+rewriteCmmBwd
+    :: DataflowLattice f
+    -> RewriteFun f
+    -> CmmGraph
+    -> FactBase f
+    -> UniqSM (CmmGraph, FactBase f)
+rewriteCmmBwd = rewriteCmm Bwd
+
+rewriteCmm
+    :: Direction
+    -> DataflowLattice f
+    -> RewriteFun f
+    -> CmmGraph
+    -> FactBase f
+    -> UniqSM (CmmGraph, FactBase f)
+rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do
+    let entry = g_entry cmmGraph
+        hooplGraph = g_graph cmmGraph
+        blockMap1 =
+            case hooplGraph of
+                GMany NothingO bm NothingO -> bm
+    (blockMap2, facts) <-
+        fixpointRewrite dir lattice rwFun entry blockMap1 initFact
+    return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts)
+
+fixpointRewrite
+    :: forall f.
+       Direction
+    -> DataflowLattice f
+    -> RewriteFun f
+    -> Label
+    -> LabelMap CmmBlock
+    -> FactBase f
+    -> UniqSM (LabelMap CmmBlock, FactBase f)
+fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap
+  where
+    -- Sorting the blocks helps to minimize the number of times we need to
+    -- process blocks. For instance, for forward analysis we want to look at
+    -- blocks in reverse postorder. Also, see comments for sortBlocks.
+    blocks     = sortBlocks dir entry blockmap
+    num_blocks = length blocks
+    block_arr  = {-# SCC "block_arr_rewrite" #-}
+                 listArray (0, num_blocks - 1) blocks
+    start      = {-# SCC "start_rewrite" #-}
+                 IntSet.fromDistinctAscList [0 .. num_blocks - 1]
+    dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks
+    join       = fact_join lattice
+
+    loop
+        :: IntHeap            -- ^ Worklist, i.e., blocks to process
+        -> LabelMap CmmBlock  -- ^ Rewritten blocks.
+        -> FactBase f         -- ^ Current facts.
+        -> UniqSM (LabelMap CmmBlock, FactBase f)
+    loop todo !blocks1 !fbase1
+      | Just (index, todo1) <- IntSet.minView todo = do
+        -- Note that we use the *original* block here. This is important.
+        -- We're optimistically rewriting blocks even before reaching the fixed
+        -- point, which means that the rewrite might be incorrect. So if the
+        -- facts change, we need to rewrite the original block again (taking
+        -- into account the new facts).
+        let block = block_arr ! index
+        (new_block, out_facts) <- {-# SCC "do_block_rewrite" #-}
+            do_block block fbase1
+        let blocks2 = mapInsert (entryLabel new_block) new_block blocks1
+            (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-}
+                mapFoldlWithKey
+                    (updateFact join dep_blocks) (todo1, fbase1) out_facts
+        loop todo2 blocks2 fbase2
+    loop _ !blocks1 !fbase1 = return (blocks1, fbase1)
+
+
+{-
+Note [Unreachable blocks]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A block that is not in the domain of tfb_fbase is "currently unreachable".
+A currently-unreachable block is not even analyzed.  Reason: consider
+constant prop and this graph, with entry point L1:
+  L1: x:=3; goto L4
+  L2: x:=4; goto L4
+  L4: if x>3 goto L2 else goto L5
+Here L2 is actually unreachable, but if we process it with bottom input fact,
+we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
+
+* If a currently-unreachable block is not analyzed, then its rewritten
+  graph will not be accumulated in tfb_rg.  And that is good:
+  unreachable blocks simply do not appear in the output.
+
+* Note that clients must be careful to provide a fact (even if bottom)
+  for each entry point. Otherwise useful blocks may be garbage collected.
+
+* Note that updateFact must set the change-flag if a label goes from
+  not-in-fbase to in-fbase, even if its fact is bottom.  In effect the
+  real fact lattice is
+       UNR
+       bottom
+       the points above bottom
+
+* Even if the fact is going from UNR to bottom, we still call the
+  client's fact_join function because it might give the client
+  some useful debugging information.
+
+* All of this only applies for *forward* ixpoints.  For the backward
+  case we must treat every block as reachable; it might finish with a
+  'return', and therefore have no successors, for example.
+-}
+
+
+-----------------------------------------------------------------------------
+--  Pieces that are shared by fixpoint and fixpoint_anal
+-----------------------------------------------------------------------------
+
+-- | Sort the blocks into the right order for analysis. This means reverse
+-- postorder for a forward analysis. For the backward one, we simply reverse
+-- that (see Note [Backward vs forward analysis]).
+sortBlocks
+    :: NonLocal n
+    => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C]
+sortBlocks direction entry blockmap =
+    case direction of
+        Fwd -> fwd
+        Bwd -> reverse fwd
+  where
+    fwd = revPostorderFrom blockmap entry
+
+-- Note [Backward vs forward analysis]
+--
+-- The forward and backward cases are not dual.  In the forward case, the entry
+-- points are known, and one simply traverses the body blocks from those points.
+-- In the backward case, something is known about the exit points, but a
+-- backward analysis must also include reachable blocks that don't reach the
+-- exit, as in a procedure that loops forever and has side effects.)
+-- For instance, let E be the entry and X the exit blocks (arrows indicate
+-- control flow)
+--   E -> X
+--   E -> B
+--   B -> C
+--   C -> B
+-- We do need to include B and C even though they're unreachable in the
+-- *reverse* graph (that we could use for backward analysis):
+--   E <- X
+--   E <- B
+--   B <- C
+--   C <- B
+-- So when sorting the blocks for the backward analysis, we simply take the
+-- reverse of what is used for the forward one.
+
+
+-- | Construct a mapping from a @Label@ to the block indexes that should be
+-- re-analyzed if the facts at that @Label@ change.
+--
+-- Note that we're considering here the entry point of the block, so if the
+-- facts change at the entry:
+-- * for a backward analysis we need to re-analyze all the predecessors, but
+-- * for a forward analysis, we only need to re-analyze the current block
+--   (and that will in turn propagate facts into its successors).
+mkDepBlocks :: Direction -> [CmmBlock] -> LabelMap IntSet
+mkDepBlocks Fwd blocks = go blocks 0 mapEmpty
+  where
+    go []     !_ !dep_map = dep_map
+    go (b:bs) !n !dep_map =
+        go bs (n + 1) $ mapInsert (entryLabel b) (IntSet.singleton n) dep_map
+mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
+  where
+    go []     !_ !dep_map = dep_map
+    go (b:bs) !n !dep_map =
+        let insert m l = mapInsertWith IntSet.union l (IntSet.singleton n) m
+        in go bs (n + 1) $ foldl' insert dep_map (successors b)
+
+-- | After some new facts have been generated by analysing a block, we
+-- fold this function over them to generate (a) a list of block
+-- indices to (re-)analyse, and (b) the new FactBase.
+updateFact
+    :: JoinFun f
+    -> LabelMap IntSet
+    -> (IntHeap, FactBase f)
+    -> Label
+    -> f -- out fact
+    -> (IntHeap, FactBase f)
+updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
+  = case lookupFact lbl fbase of
+      Nothing ->
+          -- Note [No old fact]
+          let !z = mapInsert lbl new_fact fbase in (changed, z)
+      Just old_fact ->
+          case fact_join (OldFact old_fact) (NewFact new_fact) of
+              (NotChanged _) -> (todo, fbase)
+              (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z)
+  where
+    changed = todo `IntSet.union`
+              mapFindWithDefault IntSet.empty lbl dep_blocks
+
+{-
+Note [No old fact]
+
+We know that the new_fact is >= _|_, so we don't need to join.  However,
+if the new fact is also _|_, and we have already analysed its block,
+we don't need to record a change.  So there's a tradeoff here.  It turns
+out that always recording a change is faster.
+-}
+
+----------------------------------------------------------------
+--       Utilities
+----------------------------------------------------------------
+
+-- Fact lookup: the fact `orelse` bottom
+getFact  :: DataflowLattice f -> Label -> FactBase f -> f
+getFact lat l fb = case lookupFact l fb of Just  f -> f
+                                           Nothing -> fact_bot lat
+
+-- | Returns the result of joining the facts from all the successors of the
+-- provided node or block.
+joinOutFacts :: (NonLocal n) => DataflowLattice f -> n e C -> FactBase f -> f
+joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts
+  where
+    join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
+    facts =
+        [ fromJust fact
+        | s <- successors nonLocal
+        , let fact = lookupFact s fact_base
+        , isJust fact
+        ]
+
+joinFacts :: DataflowLattice f -> [f] -> f
+joinFacts lattice facts  = foldl' join (fact_bot lattice) facts
+  where
+    join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
+
+-- | Returns the joined facts for each label.
+mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f
+mkFactBase lattice = foldl' add mapEmpty
+  where
+    join = fact_join lattice
+
+    add result (l, f1) =
+        let !newFact =
+                case mapLookup l result of
+                    Nothing -> f1
+                    Just f2 -> getJoined $ join (OldFact f1) (NewFact f2)
+        in mapInsert l newFact result
+
+-- | Folds backward over all nodes of an open-open block.
+-- Strict in the accumulator.
+foldNodesBwdOO :: (CmmNode O O -> f -> f) -> Block CmmNode O O -> f -> f
+foldNodesBwdOO funOO = go
+  where
+    go (BCat b1 b2) f = go b1 $! go b2 f
+    go (BSnoc h n) f = go h $! funOO n f
+    go (BCons n t) f = funOO n $! go t f
+    go (BMiddle n) f = funOO n f
+    go BNil f = f
+{-# INLINABLE foldNodesBwdOO #-}
+
+-- | Folds backward over all the nodes of an open-open block and allows
+-- rewriting them. The accumulator is both the block of nodes and @f@ (usually
+-- dataflow facts).
+-- Strict in both accumulated parts.
+foldRewriteNodesBwdOO
+    :: forall f.
+       (CmmNode O O -> f -> UniqSM (Block CmmNode O O, f))
+    -> Block CmmNode O O
+    -> f
+    -> UniqSM (Block CmmNode O O, f)
+foldRewriteNodesBwdOO rewriteOO initBlock initFacts = go initBlock initFacts
+  where
+    go (BCons node1 block1) !fact1 = (rewriteOO node1 `comp` go block1) fact1
+    go (BSnoc block1 node1) !fact1 = (go block1 `comp` rewriteOO node1) fact1
+    go (BCat blockA1 blockB1) !fact1 = (go blockA1 `comp` go blockB1) fact1
+    go (BMiddle node) !fact1 = rewriteOO node fact1
+    go BNil !fact = return (BNil, fact)
+
+    comp rew1 rew2 = \f1 -> do
+        (b, f2) <- rew2 f1
+        (a, !f3) <- rew1 f2
+        let !c = joinBlocksOO a b
+        return (c, f3)
+    {-# INLINE comp #-}
+{-# INLINABLE foldRewriteNodesBwdOO #-}
+
+joinBlocksOO :: Block n O O -> Block n O O -> Block n O O
+joinBlocksOO BNil b = b
+joinBlocksOO b BNil = b
+joinBlocksOO (BMiddle n) b = blockCons n b
+joinBlocksOO b (BMiddle n) = blockSnoc b n
+joinBlocksOO b1 b2 = BCat b1 b2
+
+type IntHeap = IntSet
diff --git a/compiler/GHC/Cmm/Dataflow/Block.hs b/compiler/GHC/Cmm/Dataflow/Block.hs
new file mode 100644
index 0000000000..d2e52a8904
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Block.hs
@@ -0,0 +1,329 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE PolyKinds #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+module GHC.Cmm.Dataflow.Block
+    ( Extensibility (..)
+    , O
+    , C
+    , MaybeO(..)
+    , IndexedCO
+    , Block(..)
+    , blockAppend
+    , blockCons
+    , blockFromList
+    , blockJoin
+    , blockJoinHead
+    , blockJoinTail
+    , blockSnoc
+    , blockSplit
+    , blockSplitHead
+    , blockSplitTail
+    , blockToList
+    , emptyBlock
+    , firstNode
+    , foldBlockNodesB
+    , foldBlockNodesB3
+    , foldBlockNodesF
+    , isEmptyBlock
+    , lastNode
+    , mapBlock
+    , mapBlock'
+    , mapBlock3'
+    , replaceFirstNode
+    , replaceLastNode
+    ) where
+
+import GhcPrelude
+
+-- -----------------------------------------------------------------------------
+-- Shapes: Open and Closed
+
+-- | Used at the type level to indicate "open" vs "closed" structure.
+data Extensibility
+  -- | An "open" structure with a unique, unnamed control-flow edge flowing in
+  -- or out. "Fallthrough" and concatenation are permitted at an open point.
+  = Open
+  -- | A "closed" structure which supports control transfer only through the use
+  -- of named labels---no "fallthrough" is permitted. The number of control-flow
+  -- edges is unconstrained.
+  | Closed
+
+type O = 'Open
+type C = 'Closed
+
+-- | Either type indexed by closed/open using type families
+type family IndexedCO (ex :: Extensibility) (a :: k) (b :: k) :: k
+type instance IndexedCO C a _b = a
+type instance IndexedCO O _a b = b
+
+-- | Maybe type indexed by open/closed
+data MaybeO ex t where
+  JustO    :: t -> MaybeO O t
+  NothingO ::      MaybeO C t
+
+-- | Maybe type indexed by closed/open
+data MaybeC ex t where
+  JustC    :: t -> MaybeC C t
+  NothingC ::      MaybeC O t
+
+deriving instance Functor (MaybeO ex)
+deriving instance Functor (MaybeC ex)
+
+-- -----------------------------------------------------------------------------
+-- The Block type
+
+-- | A sequence of nodes.  May be any of four shapes (O/O, O/C, C/O, C/C).
+-- Open at the entry means single entry, mutatis mutandis for exit.
+-- A closed/closed block is a /basic/ block and can't be extended further.
+-- Clients should avoid manipulating blocks and should stick to either nodes
+-- or graphs.
+data Block n e x where
+  BlockCO  :: n C O -> Block n O O          -> Block n C O
+  BlockCC  :: n C O -> Block n O O -> n O C -> Block n C C
+  BlockOC  ::          Block n O O -> n O C -> Block n O C
+
+  BNil    :: Block n O O
+  BMiddle :: n O O                      -> Block n O O
+  BCat    :: Block n O O -> Block n O O -> Block n O O
+  BSnoc   :: Block n O O -> n O O       -> Block n O O
+  BCons   :: n O O       -> Block n O O -> Block n O O
+
+
+-- -----------------------------------------------------------------------------
+-- Simple operations on Blocks
+
+-- Predicates
+
+isEmptyBlock :: Block n e x -> Bool
+isEmptyBlock BNil       = True
+isEmptyBlock (BCat l r) = isEmptyBlock l && isEmptyBlock r
+isEmptyBlock _          = False
+
+
+-- Building
+
+emptyBlock :: Block n O O
+emptyBlock = BNil
+
+blockCons :: n O O -> Block n O x -> Block n O x
+blockCons n b = case b of
+  BlockOC b l  -> (BlockOC $! (n `blockCons` b)) l
+  BNil{}    -> BMiddle n
+  BMiddle{} -> n `BCons` b
+  BCat{}    -> n `BCons` b
+  BSnoc{}   -> n `BCons` b
+  BCons{}   -> n `BCons` b
+
+blockSnoc :: Block n e O -> n O O -> Block n e O
+blockSnoc b n = case b of
+  BlockCO f b -> BlockCO f $! (b `blockSnoc` n)
+  BNil{}      -> BMiddle n
+  BMiddle{}   -> b `BSnoc` n
+  BCat{}      -> b `BSnoc` n
+  BSnoc{}     -> b `BSnoc` n
+  BCons{}     -> b `BSnoc` n
+
+blockJoinHead :: n C O -> Block n O x -> Block n C x
+blockJoinHead f (BlockOC b l) = BlockCC f b l
+blockJoinHead f b = BlockCO f BNil `cat` b
+
+blockJoinTail :: Block n e O -> n O C -> Block n e C
+blockJoinTail (BlockCO f b) t = BlockCC f b t
+blockJoinTail b t = b `cat` BlockOC BNil t
+
+blockJoin :: n C O -> Block n O O -> n O C -> Block n C C
+blockJoin f b t = BlockCC f b t
+
+blockAppend :: Block n e O -> Block n O x -> Block n e x
+blockAppend = cat
+
+
+-- Taking apart
+
+firstNode :: Block n C x -> n C O
+firstNode (BlockCO n _)   = n
+firstNode (BlockCC n _ _) = n
+
+lastNode :: Block n x C -> n O C
+lastNode (BlockOC   _ n) = n
+lastNode (BlockCC _ _ n) = n
+
+blockSplitHead :: Block n C x -> (n C O, Block n O x)
+blockSplitHead (BlockCO n b)   = (n, b)
+blockSplitHead (BlockCC n b t) = (n, BlockOC b t)
+
+blockSplitTail :: Block n e C -> (Block n e O, n O C)
+blockSplitTail (BlockOC b n)   = (b, n)
+blockSplitTail (BlockCC f b t) = (BlockCO f b, t)
+
+-- | Split a closed block into its entry node, open middle block, and
+-- exit node.
+blockSplit :: Block n C C -> (n C O, Block n O O, n O C)
+blockSplit (BlockCC f b t) = (f, b, t)
+
+blockToList :: Block n O O -> [n O O]
+blockToList b = go b []
+   where go :: Block n O O -> [n O O] -> [n O O]
+         go BNil         r = r
+         go (BMiddle n)  r = n : r
+         go (BCat b1 b2) r = go b1 $! go b2 r
+         go (BSnoc b1 n) r = go b1 (n:r)
+         go (BCons n b1) r = n : go b1 r
+
+blockFromList :: [n O O] -> Block n O O
+blockFromList = foldr BCons BNil
+
+-- Modifying
+
+replaceFirstNode :: Block n C x -> n C O -> Block n C x
+replaceFirstNode (BlockCO _ b)   f = BlockCO f b
+replaceFirstNode (BlockCC _ b n) f = BlockCC f b n
+
+replaceLastNode :: Block n x C -> n O C -> Block n x C
+replaceLastNode (BlockOC   b _) n = BlockOC b n
+replaceLastNode (BlockCC l b _) n = BlockCC l b n
+
+-- -----------------------------------------------------------------------------
+-- General concatenation
+
+cat :: Block n e O -> Block n O x -> Block n e x
+cat x y = case x of
+  BNil -> y
+
+  BlockCO l b1 -> case y of
+                   BlockOC b2 n -> (BlockCC l $! (b1 `cat` b2)) n
+                   BNil         -> x
+                   BMiddle _    -> BlockCO l $! (b1 `cat` y)
+                   BCat{}       -> BlockCO l $! (b1 `cat` y)
+                   BSnoc{}      -> BlockCO l $! (b1 `cat` y)
+                   BCons{}      -> BlockCO l $! (b1 `cat` y)
+
+  BMiddle n -> case y of
+                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+                   BNil          -> x
+                   BMiddle{}     -> BCons n y
+                   BCat{}        -> BCons n y
+                   BSnoc{}       -> BCons n y
+                   BCons{}       -> BCons n y
+
+  BCat{} -> case y of
+                   BlockOC b3 n2 -> (BlockOC $! (x `cat` b3)) n2
+                   BNil          -> x
+                   BMiddle n     -> BSnoc x n
+                   BCat{}        -> BCat x y
+                   BSnoc{}       -> BCat x y
+                   BCons{}       -> BCat x y
+
+  BSnoc{} -> case y of
+                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+                   BNil          -> x
+                   BMiddle n     -> BSnoc x n
+                   BCat{}        -> BCat x y
+                   BSnoc{}       -> BCat x y
+                   BCons{}       -> BCat x y
+
+
+  BCons{} -> case y of
+                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+                   BNil          -> x
+                   BMiddle n     -> BSnoc x n
+                   BCat{}        -> BCat x y
+                   BSnoc{}       -> BCat x y
+                   BCons{}       -> BCat x y
+
+
+-- -----------------------------------------------------------------------------
+-- Mapping
+
+-- | map a function over the nodes of a 'Block'
+mapBlock :: (forall e x. n e x -> n' e x) -> Block n e x -> Block n' e x
+mapBlock f (BlockCO n b  ) = BlockCO (f n) (mapBlock f b)
+mapBlock f (BlockOC   b n) = BlockOC       (mapBlock f b) (f n)
+mapBlock f (BlockCC n b m) = BlockCC (f n) (mapBlock f b) (f m)
+mapBlock _  BNil           = BNil
+mapBlock f (BMiddle n)     = BMiddle (f n)
+mapBlock f (BCat b1 b2)    = BCat    (mapBlock f b1) (mapBlock f b2)
+mapBlock f (BSnoc b n)     = BSnoc   (mapBlock f b)  (f n)
+mapBlock f (BCons n b)     = BCons   (f n)  (mapBlock f b)
+
+-- | A strict 'mapBlock'
+mapBlock' :: (forall e x. n e x -> n' e x) -> (Block n e x -> Block n' e x)
+mapBlock' f = mapBlock3' (f, f, f)
+
+-- | map over a block, with different functions to apply to first nodes,
+-- middle nodes and last nodes respectively.  The map is strict.
+--
+mapBlock3' :: forall n n' e x .
+             ( n C O -> n' C O
+             , n O O -> n' O O,
+               n O C -> n' O C)
+          -> Block n e x -> Block n' e x
+mapBlock3' (f, m, l) b = go b
+  where go :: forall e x . Block n e x -> Block n' e x
+        go (BlockOC b y)   = (BlockOC $! go b) $! l y
+        go (BlockCO x b)   = (BlockCO $! f x) $! (go b)
+        go (BlockCC x b y) = ((BlockCC $! f x) $! go b) $! (l y)
+        go BNil            = BNil
+        go (BMiddle n)     = BMiddle $! m n
+        go (BCat x y)      = (BCat $! go x) $! (go y)
+        go (BSnoc x n)     = (BSnoc $! go x) $! (m n)
+        go (BCons n x)     = (BCons $! m n) $! (go x)
+
+-- -----------------------------------------------------------------------------
+-- Folding
+
+
+-- | Fold a function over every node in a block, forward or backward.
+-- The fold function must be polymorphic in the shape of the nodes.
+foldBlockNodesF3 :: forall n a b c .
+                   ( n C O       -> a -> b
+                   , n O O       -> b -> b
+                   , n O C       -> b -> c)
+                 -> (forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b)
+foldBlockNodesF  :: forall n a .
+                    (forall e x . n e x       -> a -> a)
+                 -> (forall e x . Block n e x -> IndexedCO e a a -> IndexedCO x a a)
+foldBlockNodesB3 :: forall n a b c .
+                   ( n C O       -> b -> c
+                   , n O O       -> b -> b
+                   , n O C       -> a -> b)
+                 -> (forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b)
+foldBlockNodesB  :: forall n a .
+                    (forall e x . n e x       -> a -> a)
+                 -> (forall e x . Block n e x -> IndexedCO x a a -> IndexedCO e a a)
+
+foldBlockNodesF3 (ff, fm, fl) = block
+  where block :: forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b
+        block (BlockCO f b  )   = ff f `cat` block b
+        block (BlockCC f b l)   = ff f `cat` block b `cat` fl l
+        block (BlockOC   b l)   =            block b `cat` fl l
+        block BNil              = id
+        block (BMiddle node)    = fm node
+        block (b1 `BCat`    b2) = block b1 `cat` block b2
+        block (b1 `BSnoc` n)    = block b1 `cat` fm n
+        block (n `BCons` b2)    = fm n `cat` block b2
+        cat :: forall a b c. (a -> b) -> (b -> c) -> a -> c
+        cat f f' = f' . f
+
+foldBlockNodesF f = foldBlockNodesF3 (f, f, f)
+
+foldBlockNodesB3 (ff, fm, fl) = block
+  where block :: forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b
+        block (BlockCO f b  )   = ff f `cat` block b
+        block (BlockCC f b l)   = ff f `cat` block b `cat` fl l
+        block (BlockOC   b l)   =            block b `cat` fl l
+        block BNil              = id
+        block (BMiddle node)    = fm node
+        block (b1 `BCat`    b2) = block b1 `cat` block b2
+        block (b1 `BSnoc` n)    = block b1 `cat` fm n
+        block (n `BCons` b2)    = fm n `cat` block b2
+        cat :: forall a b c. (b -> c) -> (a -> b) -> a -> c
+        cat f f' = f . f'
+
+foldBlockNodesB f = foldBlockNodesB3 (f, f, f)
+
diff --git a/compiler/GHC/Cmm/Dataflow/Collections.hs b/compiler/GHC/Cmm/Dataflow/Collections.hs
new file mode 100644
index 0000000000..f131f17cc1
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Collections.hs
@@ -0,0 +1,177 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+module GHC.Cmm.Dataflow.Collections
+    ( IsSet(..)
+    , setInsertList, setDeleteList, setUnions
+    , IsMap(..)
+    , mapInsertList, mapDeleteList, mapUnions
+    , UniqueMap, UniqueSet
+    ) where
+
+import GhcPrelude
+
+import qualified Data.IntMap.Strict as M
+import qualified Data.IntSet as S
+
+import Data.List (foldl1')
+
+class IsSet set where
+  type ElemOf set
+
+  setNull :: set -> Bool
+  setSize :: set -> Int
+  setMember :: ElemOf set -> set -> Bool
+
+  setEmpty :: set
+  setSingleton :: ElemOf set -> set
+  setInsert :: ElemOf set -> set -> set
+  setDelete :: ElemOf set -> set -> set
+
+  setUnion :: set -> set -> set
+  setDifference :: set -> set -> set
+  setIntersection :: set -> set -> set
+  setIsSubsetOf :: set -> set -> Bool
+  setFilter :: (ElemOf set -> Bool) -> set -> set
+
+  setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b
+  setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b
+
+  setElems :: set -> [ElemOf set]
+  setFromList :: [ElemOf set] -> set
+
+-- Helper functions for IsSet class
+setInsertList :: IsSet set => [ElemOf set] -> set -> set
+setInsertList keys set = foldl' (flip setInsert) set keys
+
+setDeleteList :: IsSet set => [ElemOf set] -> set -> set
+setDeleteList keys set = foldl' (flip setDelete) set keys
+
+setUnions :: IsSet set => [set] -> set
+setUnions [] = setEmpty
+setUnions sets = foldl1' setUnion sets
+
+
+class IsMap map where
+  type KeyOf map
+
+  mapNull :: map a -> Bool
+  mapSize :: map a -> Int
+  mapMember :: KeyOf map -> map a -> Bool
+  mapLookup :: KeyOf map -> map a -> Maybe a
+  mapFindWithDefault :: a -> KeyOf map -> map a -> a
+
+  mapEmpty :: map a
+  mapSingleton :: KeyOf map -> a -> map a
+  mapInsert :: KeyOf map -> a -> map a -> map a
+  mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a
+  mapDelete :: KeyOf map -> map a -> map a
+  mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a
+  mapAdjust :: (a -> a) -> KeyOf map -> map a -> map a
+
+  mapUnion :: map a -> map a -> map a
+  mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a
+  mapDifference :: map a -> map a -> map a
+  mapIntersection :: map a -> map a -> map a
+  mapIsSubmapOf :: Eq a => map a -> map a -> Bool
+
+  mapMap :: (a -> b) -> map a -> map b
+  mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b
+  mapFoldl :: (b -> a -> b) -> b -> map a -> b
+  mapFoldr :: (a -> b -> b) -> b -> map a -> b
+  mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b
+  mapFoldMapWithKey :: Monoid m => (KeyOf map -> a -> m) -> map a -> m
+  mapFilter :: (a -> Bool) -> map a -> map a
+  mapFilterWithKey :: (KeyOf map -> a -> Bool) -> map a -> map a
+
+
+  mapElems :: map a -> [a]
+  mapKeys :: map a -> [KeyOf map]
+  mapToList :: map a -> [(KeyOf map, a)]
+  mapFromList :: [(KeyOf map, a)] -> map a
+  mapFromListWith :: (a -> a -> a) -> [(KeyOf map,a)] -> map a
+
+-- Helper functions for IsMap class
+mapInsertList :: IsMap map => [(KeyOf map, a)] -> map a -> map a
+mapInsertList assocs map = foldl' (flip (uncurry mapInsert)) map assocs
+
+mapDeleteList :: IsMap map => [KeyOf map] -> map a -> map a
+mapDeleteList keys map = foldl' (flip mapDelete) map keys
+
+mapUnions :: IsMap map => [map a] -> map a
+mapUnions [] = mapEmpty
+mapUnions maps = foldl1' mapUnion maps
+
+-----------------------------------------------------------------------------
+-- Basic instances
+-----------------------------------------------------------------------------
+
+newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show, Semigroup, Monoid)
+
+instance IsSet UniqueSet where
+  type ElemOf UniqueSet = Int
+
+  setNull (US s) = S.null s
+  setSize (US s) = S.size s
+  setMember k (US s) = S.member k s
+
+  setEmpty = US S.empty
+  setSingleton k = US (S.singleton k)
+  setInsert k (US s) = US (S.insert k s)
+  setDelete k (US s) = US (S.delete k s)
+
+  setUnion (US x) (US y) = US (S.union x y)
+  setDifference (US x) (US y) = US (S.difference x y)
+  setIntersection (US x) (US y) = US (S.intersection x y)
+  setIsSubsetOf (US x) (US y) = S.isSubsetOf x y
+  setFilter f (US s) = US (S.filter f s)
+
+  setFoldl k z (US s) = S.foldl' k z s
+  setFoldr k z (US s) = S.foldr k z s
+
+  setElems (US s) = S.elems s
+  setFromList ks = US (S.fromList ks)
+
+newtype UniqueMap v = UM (M.IntMap v)
+  deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
+
+instance IsMap UniqueMap where
+  type KeyOf UniqueMap = Int
+
+  mapNull (UM m) = M.null m
+  mapSize (UM m) = M.size m
+  mapMember k (UM m) = M.member k m
+  mapLookup k (UM m) = M.lookup k m
+  mapFindWithDefault def k (UM m) = M.findWithDefault def k m
+
+  mapEmpty = UM M.empty
+  mapSingleton k v = UM (M.singleton k v)
+  mapInsert k v (UM m) = UM (M.insert k v m)
+  mapInsertWith f k v (UM m) = UM (M.insertWith f k v m)
+  mapDelete k (UM m) = UM (M.delete k m)
+  mapAlter f k (UM m) = UM (M.alter f k m)
+  mapAdjust f k (UM m) = UM (M.adjust f k m)
+
+  mapUnion (UM x) (UM y) = UM (M.union x y)
+  mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y)
+  mapDifference (UM x) (UM y) = UM (M.difference x y)
+  mapIntersection (UM x) (UM y) = UM (M.intersection x y)
+  mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y
+
+  mapMap f (UM m) = UM (M.map f m)
+  mapMapWithKey f (UM m) = UM (M.mapWithKey f m)
+  mapFoldl k z (UM m) = M.foldl' k z m
+  mapFoldr k z (UM m) = M.foldr k z m
+  mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m
+  mapFoldMapWithKey f (UM m) = M.foldMapWithKey f m
+  mapFilter f (UM m) = UM (M.filter f m)
+  mapFilterWithKey f (UM m) = UM (M.filterWithKey f m)
+
+  mapElems (UM m) = M.elems m
+  mapKeys (UM m) = M.keys m
+  mapToList (UM m) = M.toList m
+  mapFromList assocs = UM (M.fromList assocs)
+  mapFromListWith f assocs = UM (M.fromListWith f assocs)
diff --git a/compiler/GHC/Cmm/Dataflow/Graph.hs b/compiler/GHC/Cmm/Dataflow/Graph.hs
new file mode 100644
index 0000000000..3f361de0fb
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Graph.hs
@@ -0,0 +1,186 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+module GHC.Cmm.Dataflow.Graph
+    ( Body
+    , Graph
+    , Graph'(..)
+    , NonLocal(..)
+    , addBlock
+    , bodyList
+    , emptyBody
+    , labelsDefined
+    , mapGraph
+    , mapGraphBlocks
+    , revPostorderFrom
+    ) where
+
+
+import GhcPrelude
+import Util
+
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+
+-- | A (possibly empty) collection of closed/closed blocks
+type Body n = LabelMap (Block n C C)
+
+-- | @Body@ abstracted over @block@
+type Body' block (n :: Extensibility -> Extensibility -> *) = LabelMap (block n C C)
+
+-------------------------------
+-- | Gives access to the anchor points for
+-- nonlocal edges as well as the edges themselves
+class NonLocal thing where
+  entryLabel :: thing C x -> Label   -- ^ The label of a first node or block
+  successors :: thing e C -> [Label] -- ^ Gives control-flow successors
+
+instance NonLocal n => NonLocal (Block n) where
+  entryLabel (BlockCO f _)   = entryLabel f
+  entryLabel (BlockCC f _ _) = entryLabel f
+
+  successors (BlockOC   _ n) = successors n
+  successors (BlockCC _ _ n) = successors n
+
+
+emptyBody :: Body' block n
+emptyBody = mapEmpty
+
+bodyList :: Body' block n -> [(Label,block n C C)]
+bodyList body = mapToList body
+
+addBlock
+    :: (NonLocal block, HasDebugCallStack)
+    => block C C -> LabelMap (block C C) -> LabelMap (block C C)
+addBlock block body = mapAlter add lbl body
+  where
+    lbl = entryLabel block
+    add Nothing = Just block
+    add _ = error $ "duplicate label " ++ show lbl ++ " in graph"
+
+
+-- ---------------------------------------------------------------------------
+-- Graph
+
+-- | A control-flow graph, which may take any of four shapes (O/O,
+-- O/C, C/O, C/C).  A graph open at the entry has a single,
+-- distinguished, anonymous entry point; if a graph is closed at the
+-- entry, its entry point(s) are supplied by a context.
+type Graph = Graph' Block
+
+-- | @Graph'@ is abstracted over the block type, so that we can build
+-- graphs of annotated blocks for example (Compiler.Hoopl.Dataflow
+-- needs this).
+data Graph' block (n :: Extensibility -> Extensibility -> *) e x where
+  GNil  :: Graph' block n O O
+  GUnit :: block n O O -> Graph' block n O O
+  GMany :: MaybeO e (block n O C)
+        -> Body' block n
+        -> MaybeO x (block n C O)
+        -> Graph' block n e x
+
+
+-- -----------------------------------------------------------------------------
+-- Mapping over graphs
+
+-- | Maps over all nodes in a graph.
+mapGraph :: (forall e x. n e x -> n' e x) -> Graph n e x -> Graph n' e x
+mapGraph f = mapGraphBlocks (mapBlock f)
+
+-- | Function 'mapGraphBlocks' enables a change of representation of blocks,
+-- nodes, or both.  It lifts a polymorphic block transform into a polymorphic
+-- graph transform.  When the block representation stabilizes, a similar
+-- function should be provided for blocks.
+mapGraphBlocks :: forall block n block' n' e x .
+                  (forall e x . block n e x -> block' n' e x)
+               -> (Graph' block n e x -> Graph' block' n' e x)
+
+mapGraphBlocks f = map
+  where map :: Graph' block n e x -> Graph' block' n' e x
+        map GNil = GNil
+        map (GUnit b) = GUnit (f b)
+        map (GMany e b x) = GMany (fmap f e) (mapMap f b) (fmap f x)
+
+-- -----------------------------------------------------------------------------
+-- Extracting Labels from graphs
+
+labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x
+              -> LabelSet
+labelsDefined GNil      = setEmpty
+labelsDefined (GUnit{}) = setEmpty
+labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body
+  where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet
+        addEntry labels label _ = setInsert label labels
+        exitLabel :: MaybeO x (block n C O) -> LabelSet
+        exitLabel NothingO  = setEmpty
+        exitLabel (JustO b) = setSingleton (entryLabel b)
+
+
+----------------------------------------------------------------
+
+-- | Returns a list of blocks reachable from the provided Labels in the reverse
+-- postorder.
+--
+-- This is the most important traversal over this data structure.  It drops
+-- unreachable code and puts blocks in an order that is good for solving forward
+-- dataflow problems quickly.  The reverse order is good for solving backward
+-- dataflow problems quickly.  The forward order is also reasonably good for
+-- emitting instructions, except that it will not usually exploit Forrest
+-- Baskett's trick of eliminating the unconditional branch from a loop.  For
+-- that you would need a more serious analysis, probably based on dominators, to
+-- identify loop headers.
+--
+-- For forward analyses we want reverse postorder visitation, consider:
+-- @
+--      A -> [B,C]
+--      B -> D
+--      C -> D
+-- @
+-- Postorder: [D, C, B, A] (or [D, B, C, A])
+-- Reverse postorder: [A, B, C, D] (or [A, C, B, D])
+-- This matters for, e.g., forward analysis, because we want to analyze *both*
+-- B and C before we analyze D.
+revPostorderFrom
+  :: forall block.  (NonLocal block)
+  => LabelMap (block C C) -> Label -> [block C C]
+revPostorderFrom graph start = go start_worklist setEmpty []
+  where
+    start_worklist = lookup_for_descend start Nil
+
+    -- To compute the postorder we need to "visit" a block (mark as done)
+    -- *after* visiting all its successors. So we need to know whether we
+    -- already processed all successors of each block (and @NonLocal@ allows
+    -- arbitrary many successors). So we use an explicit stack with an extra bit
+    -- of information:
+    -- * @ConsTodo@ means to explore the block if it wasn't visited before
+    -- * @ConsMark@ means that all successors were already done and we can add
+    --   the block to the result.
+    --
+    -- NOTE: We add blocks to the result list in postorder, but we *prepend*
+    -- them (i.e., we use @(:)@), which means that the final list is in reverse
+    -- postorder.
+    go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C]
+    go Nil                      !_           !result = result
+    go (ConsMark block rest)    !wip_or_done !result =
+        go rest wip_or_done (block : result)
+    go (ConsTodo block rest)    !wip_or_done !result
+        | entryLabel block `setMember` wip_or_done = go rest wip_or_done result
+        | otherwise =
+            let new_worklist =
+                    foldr lookup_for_descend
+                          (ConsMark block rest)
+                          (successors block)
+            in go new_worklist (setInsert (entryLabel block) wip_or_done) result
+
+    lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C)
+    lookup_for_descend label wl
+      | Just b <- mapLookup label graph = ConsTodo b wl
+      | otherwise =
+           error $ "Label that doesn't have a block?! " ++ show label
+
+data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil
diff --git a/compiler/GHC/Cmm/Dataflow/Label.hs b/compiler/GHC/Cmm/Dataflow/Label.hs
new file mode 100644
index 0000000000..c571cedb48
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Label.hs
@@ -0,0 +1,142 @@
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+module GHC.Cmm.Dataflow.Label
+    ( Label
+    , LabelMap
+    , LabelSet
+    , FactBase
+    , lookupFact
+    , mkHooplLabel
+    ) where
+
+import GhcPrelude
+
+import Outputable
+
+-- TODO: This should really just use GHC's Unique and Uniq{Set,FM}
+import GHC.Cmm.Dataflow.Collections
+
+import Unique (Uniquable(..))
+import TrieMap
+
+
+-----------------------------------------------------------------------------
+--              Label
+-----------------------------------------------------------------------------
+
+newtype Label = Label { lblToUnique :: Int }
+  deriving (Eq, Ord)
+
+mkHooplLabel :: Int -> Label
+mkHooplLabel = Label
+
+instance Show Label where
+  show (Label n) = "L" ++ show n
+
+instance Uniquable Label where
+  getUnique label = getUnique (lblToUnique label)
+
+instance Outputable Label where
+  ppr label = ppr (getUnique label)
+
+-----------------------------------------------------------------------------
+-- LabelSet
+
+newtype LabelSet = LS UniqueSet deriving (Eq, Ord, Show, Monoid, Semigroup)
+
+instance IsSet LabelSet where
+  type ElemOf LabelSet = Label
+
+  setNull (LS s) = setNull s
+  setSize (LS s) = setSize s
+  setMember (Label k) (LS s) = setMember k s
+
+  setEmpty = LS setEmpty
+  setSingleton (Label k) = LS (setSingleton k)
+  setInsert (Label k) (LS s) = LS (setInsert k s)
+  setDelete (Label k) (LS s) = LS (setDelete k s)
+
+  setUnion (LS x) (LS y) = LS (setUnion x y)
+  setDifference (LS x) (LS y) = LS (setDifference x y)
+  setIntersection (LS x) (LS y) = LS (setIntersection x y)
+  setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y
+  setFilter f (LS s) = LS (setFilter (f . mkHooplLabel) s)
+  setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s
+  setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s
+
+  setElems (LS s) = map mkHooplLabel (setElems s)
+  setFromList ks = LS (setFromList (map lblToUnique ks))
+
+-----------------------------------------------------------------------------
+-- LabelMap
+
+newtype LabelMap v = LM (UniqueMap v)
+  deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
+
+instance IsMap LabelMap where
+  type KeyOf LabelMap = Label
+
+  mapNull (LM m) = mapNull m
+  mapSize (LM m) = mapSize m
+  mapMember (Label k) (LM m) = mapMember k m
+  mapLookup (Label k) (LM m) = mapLookup k m
+  mapFindWithDefault def (Label k) (LM m) = mapFindWithDefault def k m
+
+  mapEmpty = LM mapEmpty
+  mapSingleton (Label k) v = LM (mapSingleton k v)
+  mapInsert (Label k) v (LM m) = LM (mapInsert k v m)
+  mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m)
+  mapDelete (Label k) (LM m) = LM (mapDelete k m)
+  mapAlter f (Label k) (LM m) = LM (mapAlter f k m)
+  mapAdjust f (Label k) (LM m) = LM (mapAdjust f k m)
+
+  mapUnion (LM x) (LM y) = LM (mapUnion x y)
+  mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y)
+  mapDifference (LM x) (LM y) = LM (mapDifference x y)
+  mapIntersection (LM x) (LM y) = LM (mapIntersection x y)
+  mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y
+
+  mapMap f (LM m) = LM (mapMap f m)
+  mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m)
+  mapFoldl k z (LM m) = mapFoldl k z m
+  mapFoldr k z (LM m) = mapFoldr k z m
+  mapFoldlWithKey k z (LM m) =
+      mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m
+  mapFoldMapWithKey f (LM m) = mapFoldMapWithKey (\k v -> f (mkHooplLabel k) v) m
+  mapFilter f (LM m) = LM (mapFilter f m)
+  mapFilterWithKey f (LM m) = LM (mapFilterWithKey (f . mkHooplLabel) m)
+
+  mapElems (LM m) = mapElems m
+  mapKeys (LM m) = map mkHooplLabel (mapKeys m)
+  mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m]
+  mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs])
+  mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs])
+
+-----------------------------------------------------------------------------
+-- Instances
+
+instance Outputable LabelSet where
+  ppr = ppr . setElems
+
+instance Outputable a => Outputable (LabelMap a) where
+  ppr = ppr . mapToList
+
+instance TrieMap LabelMap where
+  type Key LabelMap = Label
+  emptyTM = mapEmpty
+  lookupTM k m = mapLookup k m
+  alterTM k f m = mapAlter f k m
+  foldTM k m z = mapFoldr k z m
+  mapTM f m = mapMap f m
+
+-----------------------------------------------------------------------------
+-- FactBase
+
+type FactBase f = LabelMap f
+
+lookupFact :: Label -> FactBase f -> Maybe f
+lookupFact = mapLookup
diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs
new file mode 100644
index 0000000000..70fc08ee94
--- /dev/null
+++ b/compiler/GHC/Cmm/DebugBlock.hs
@@ -0,0 +1,546 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiWayIf #-}
+
+-----------------------------------------------------------------------------
+--
+-- Debugging data
+--
+-- Association of debug data on the Cmm level, with methods to encode it in
+-- event log format for later inclusion in profiling event logs.
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.DebugBlock (
+
+  DebugBlock(..),
+  cmmDebugGen,
+  cmmDebugLabels,
+  cmmDebugLink,
+  debugToMap,
+
+  -- * Unwinding information
+  UnwindTable, UnwindPoint(..),
+  UnwindExpr(..), toUnwindExpr
+  ) where
+
+import GhcPrelude
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import CoreSyn
+import FastString      ( nilFS, mkFastString )
+import Module
+import Outputable
+import GHC.Cmm.Ppr.Expr ( pprExpr )
+import SrcLoc
+import Util            ( seqList )
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+
+import Data.Maybe
+import Data.List     ( minimumBy, nubBy )
+import Data.Ord      ( comparing )
+import qualified Data.Map as Map
+import Data.Either   ( partitionEithers )
+
+-- | Debug information about a block of code. Ticks scope over nested
+-- blocks.
+data DebugBlock =
+  DebugBlock
+  { dblProcedure  :: !Label        -- ^ Entry label of containing proc
+  , dblLabel      :: !Label        -- ^ Hoopl label
+  , dblCLabel     :: !CLabel       -- ^ Output label
+  , dblHasInfoTbl :: !Bool         -- ^ Has an info table?
+  , dblParent     :: !(Maybe DebugBlock)
+    -- ^ The parent of this proc. See Note [Splitting DebugBlocks]
+  , dblTicks      :: ![CmmTickish] -- ^ Ticks defined in this block
+  , dblSourceTick :: !(Maybe CmmTickish) -- ^ Best source tick covering block
+  , dblPosition   :: !(Maybe Int)  -- ^ Output position relative to
+                                   -- other blocks. @Nothing@ means
+                                   -- the block was optimized out
+  , dblUnwind     :: [UnwindPoint]
+  , dblBlocks     :: ![DebugBlock] -- ^ Nested blocks
+  }
+
+instance Outputable DebugBlock where
+  ppr blk = (if | dblProcedure blk == dblLabel blk
+                -> text "proc"
+                | dblHasInfoTbl blk
+                -> text "pp-blk"
+                | otherwise
+                -> text "blk") <+>
+            ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+>
+            (maybe empty ppr (dblSourceTick blk)) <+>
+            (maybe (text "removed") ((text "pos " <>) . ppr)
+                   (dblPosition blk)) <+>
+            (ppr (dblUnwind blk)) $+$
+            (if null (dblBlocks blk) then empty else nest 4 (ppr (dblBlocks blk)))
+
+-- | Intermediate data structure holding debug-relevant context information
+-- about a block.
+type BlockContext = (CmmBlock, RawCmmDecl)
+
+-- | Extract debug data from a group of procedures. We will prefer
+-- source notes that come from the given module (presumably the module
+-- that we are currently compiling).
+cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock]
+cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
+  where
+      blockCtxs :: Map.Map CmmTickScope [BlockContext]
+      blockCtxs = blockContexts decls
+
+      -- Analyse tick scope structure: Each one is either a top-level
+      -- tick scope, or the child of another.
+      (topScopes, childScopes)
+        = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
+      findP tsc GlobalScope = Left tsc -- top scope
+      findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc)
+                    | otherwise                   = findP tsc scp'
+        where -- Note that we only following the left parent of
+              -- combined scopes. This loses us ticks, which we will
+              -- recover by copying ticks below.
+              scp' | SubScope _ scp' <- scp      = scp'
+                   | CombinedScope scp' _ <- scp = scp'
+                   | otherwise                   = panic "findP impossible"
+
+      scopeMap = foldr (uncurry insertMulti) Map.empty childScopes
+
+      -- This allows us to recover ticks that we lost by flattening
+      -- the graph. Basically, if the parent is A but the child is
+      -- CBA, we know that there is no BA, because it would have taken
+      -- priority - but there might be a B scope, with ticks that
+      -- would not be associated with our child anymore. Note however
+      -- that there might be other childs (DB), which we have to
+      -- filter out.
+      --
+      -- We expect this to be called rarely, which is why we are not
+      -- trying too hard to be efficient here. In many cases we won't
+      -- have to construct blockCtxsU in the first place.
+      ticksToCopy :: CmmTickScope -> [CmmTickish]
+      ticksToCopy (CombinedScope scp s) = go s
+        where go s | scp `isTickSubScope` s   = [] -- done
+                   | SubScope _ s' <- s       = ticks ++ go s'
+                   | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2
+                   | otherwise                = panic "ticksToCopy impossible"
+                where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs
+      ticksToCopy _ = []
+      bCtxsTicks = concatMap (blockTicks . fst)
+
+      -- Finding the "best" source tick is somewhat arbitrary -- we
+      -- select the first source span, while preferring source ticks
+      -- from the same source file.  Furthermore, dumps take priority
+      -- (if we generated one, we probably want debug information to
+      -- refer to it).
+      bestSrcTick = minimumBy (comparing rangeRating)
+      rangeRating (SourceNote span _)
+        | srcSpanFile span == thisFile = 1
+        | otherwise                    = 2 :: Int
+      rangeRating note                 = pprPanic "rangeRating" (ppr note)
+      thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
+
+      -- Returns block tree for this scope as well as all nested
+      -- scopes. Note that if there are multiple blocks in the (exact)
+      -- same scope we elect one as the "branch" node and add the rest
+      -- as children.
+      blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock
+      blocksForScope cstick scope = mkBlock True (head bctxs)
+        where bctxs = fromJust $ Map.lookup scope blockCtxs
+              nested = fromMaybe [] $ Map.lookup scope scopeMap
+              childs = map (mkBlock False) (tail bctxs) ++
+                       map (blocksForScope stick) nested
+
+              mkBlock :: Bool -> BlockContext -> DebugBlock
+              mkBlock top (block, prc)
+                = DebugBlock { dblProcedure    = g_entry graph
+                             , dblLabel        = label
+                             , dblCLabel       = case info of
+                                 Just (Statics infoLbl _)   -> infoLbl
+                                 Nothing
+                                   | g_entry graph == label -> entryLbl
+                                   | otherwise              -> blockLbl label
+                             , dblHasInfoTbl   = isJust info
+                             , dblParent       = Nothing
+                             , dblTicks        = ticks
+                             , dblPosition     = Nothing -- see cmmDebugLink
+                             , dblSourceTick   = stick
+                             , dblBlocks       = blocks
+                             , dblUnwind       = []
+                             }
+                where (CmmProc infos entryLbl _ graph) = prc
+                      label = entryLabel block
+                      info = mapLookup label infos
+                      blocks | top       = seqList childs childs
+                             | otherwise = []
+
+              -- A source tick scopes over all nested blocks. However
+              -- their source ticks might take priority.
+              isSourceTick SourceNote {} = True
+              isSourceTick _             = False
+              -- Collect ticks from all blocks inside the tick scope.
+              -- We attempt to filter out duplicates while we're at it.
+              ticks = nubBy (flip tickishContains) $
+                      bCtxsTicks bctxs ++ ticksToCopy scope
+              stick = case filter isSourceTick ticks of
+                []     -> cstick
+                sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick)
+
+-- | Build a map of blocks sorted by their tick scopes
+--
+-- This involves a pre-order traversal, as we want blocks in rough
+-- control flow order (so ticks have a chance to be sorted in the
+-- right order).
+blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext]
+blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls
+  where walkProc :: RawCmmDecl
+                 -> Map.Map CmmTickScope [BlockContext]
+                 -> Map.Map CmmTickScope [BlockContext]
+        walkProc CmmData{}                 m = m
+        walkProc prc@(CmmProc _ _ _ graph) m
+          | mapNull blocks = m
+          | otherwise      = snd $ walkBlock prc entry (emptyLbls, m)
+          where blocks = toBlockMap graph
+                entry  = [mapFind (g_entry graph) blocks]
+                emptyLbls = setEmpty :: LabelSet
+
+        walkBlock :: RawCmmDecl -> [Block CmmNode C C]
+                  -> (LabelSet, Map.Map CmmTickScope [BlockContext])
+                  -> (LabelSet, Map.Map CmmTickScope [BlockContext])
+        walkBlock _   []             c            = c
+        walkBlock prc (block:blocks) (visited, m)
+          | lbl `setMember` visited
+          = walkBlock prc blocks (visited, m)
+          | otherwise
+          = walkBlock prc blocks $
+            walkBlock prc succs
+              (lbl `setInsert` visited,
+               insertMulti scope (block, prc) m)
+          where CmmEntry lbl scope = firstNode block
+                (CmmProc _ _ _ graph) = prc
+                succs = map (flip mapFind (toBlockMap graph))
+                            (successors (lastNode block))
+        mapFind = mapFindWithDefault (error "contextTree: block not found!")
+
+insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a]
+insertMulti k v = Map.insertWith (const (v:)) k [v]
+
+cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label]
+cmmDebugLabels isMeta nats = seqList lbls lbls
+  where -- Find order in which procedures will be generated by the
+        -- back-end (that actually matters for DWARF generation).
+        --
+        -- Note that we might encounter blocks that are missing or only
+        -- consist of meta instructions -- we will declare them missing,
+        -- which will skip debug data generation without messing up the
+        -- block hierarchy.
+        lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats
+        getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs
+        getBlocks _other                         = []
+        allMeta (BasicBlock _ instrs) = all isMeta instrs
+
+-- | Sets position and unwind table fields in the debug block tree according to
+-- native generated code.
+cmmDebugLink :: [Label] -> LabelMap [UnwindPoint]
+             -> [DebugBlock] -> [DebugBlock]
+cmmDebugLink labels unwindPts blocks = map link blocks
+  where blockPos :: LabelMap Int
+        blockPos = mapFromList $ flip zip [0..] labels
+        link block = block { dblPosition = mapLookup (dblLabel block) blockPos
+                           , dblBlocks   = map link (dblBlocks block)
+                           , dblUnwind   = fromMaybe mempty
+                                         $ mapLookup (dblLabel block) unwindPts
+                           }
+
+-- | Converts debug blocks into a label map for easier lookups
+debugToMap :: [DebugBlock] -> LabelMap DebugBlock
+debugToMap = mapUnions . map go
+   where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b)
+
+{-
+Note [What is this unwinding business?]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unwinding tables are a variety of debugging information used by debugging tools
+to reconstruct the execution history of a program at runtime. These tables
+consist of sets of "instructions", one set for every instruction in the program,
+which describe how to reconstruct the state of the machine at the point where
+the current procedure was called. For instance, consider the following annotated
+pseudo-code,
+
+  a_fun:
+    add rsp, 8            -- unwind: rsp = rsp - 8
+    mov rax, 1            -- unwind: rax = unknown
+    call another_block
+    sub rsp, 8            -- unwind: rsp = rsp
+
+We see that attached to each instruction there is an "unwind" annotation, which
+provides a relationship between each updated register and its value at the
+time of entry to a_fun. This is the sort of information that allows gdb to give
+you a stack backtrace given the execution state of your program. This
+unwinding information is captured in various ways by various debug information
+formats; in the case of DWARF (the only format supported by GHC) it is known as
+Call Frame Information (CFI) and can be found in the .debug.frames section of
+your object files.
+
+Currently we only bother to produce unwinding information for registers which
+are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp
+(which is the STG stack pointer) and $rsp (the C stack pointer).
+
+Let's consider how GHC would annotate a C-- program with unwinding information
+with a typical C-- procedure as would come from the STG-to-Cmm code generator,
+
+  entry()
+     { c2fe:
+           v :: P64 = R2;
+           if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
+       c2ff:
+           R2 = v :: P64;
+           R1 = test_closure;
+           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
+       c2fg:
+           I64[Sp - 8] = c2dD;
+           R1 = v :: P64;
+           Sp = Sp - 8;          // Sp updated here
+           if (R1 & 7 != 0) goto c2dD; else goto c2dE;
+       c2dE:
+           call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
+       c2dD:
+           w :: P64 = R1;
+           Hp = Hp + 48;
+           if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
+       ...
+  },
+
+Let's consider how this procedure will be decorated with unwind information
+(largely by GHC.Cmm.LayoutStack). Naturally, when we enter the procedure `entry` the
+value of Sp is no different from what it was at its call site. Therefore we will
+add an `unwind` statement saying this at the beginning of its unwind-annotated
+code,
+
+  entry()
+     { c2fe:
+           unwind Sp = Just Sp + 0;
+           v :: P64 = R2;
+           if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
+
+After c2fe we may pass to either c2ff or c2fg; let's first consider the
+former. In this case there is nothing in particular that we need to do other
+than reiterate what we already know about Sp,
+
+       c2ff:
+           unwind Sp = Just Sp + 0;
+           R2 = v :: P64;
+           R1 = test_closure;
+           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
+
+In contrast, c2fg updates Sp midway through its body. To ensure that unwinding
+can happen correctly after this point we must include an unwind statement there,
+in addition to the usual beginning-of-block statement,
+
+       c2fg:
+           unwind Sp = Just Sp + 0;
+           I64[Sp - 8] = c2dD;
+           R1 = v :: P64;
+           Sp = Sp - 8;
+           unwind Sp = Just Sp + 8;
+           if (R1 & 7 != 0) goto c2dD; else goto c2dE;
+
+The remaining blocks are simple,
+
+       c2dE:
+           unwind Sp = Just Sp + 8;
+           call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
+       c2dD:
+           unwind Sp = Just Sp + 8;
+           w :: P64 = R1;
+           Hp = Hp + 48;
+           if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
+       ...
+  },
+
+
+The flow of unwinding information through the compiler is a bit convoluted:
+
+ * C-- begins life in StgToCmm without any unwind information. This is because we
+   haven't actually done any register assignment or stack layout yet, so there
+   is no need for unwind information.
+
+ * GHC.Cmm.LayoutStack figures out how to layout each procedure's stack, and produces
+   appropriate unwinding nodes for each adjustment of the STG Sp register.
+
+ * The unwind nodes are carried through the sinking pass. Currently this is
+   guaranteed not to invalidate unwind information since it won't touch stores
+   to Sp, but this will need revisiting if CmmSink gets smarter in the future.
+
+ * Eventually we make it to the native code generator backend which can then
+   preserve the unwind nodes in its machine-specific instructions. In so doing
+   the backend can also modify or add unwinding information; this is necessary,
+   for instance, in the case of x86-64, where adjustment of $rsp may be
+   necessary during calls to native foreign code due to the native calling
+   convention.
+
+ * The NCG then retrieves the final unwinding table for each block from the
+   backend with extractUnwindPoints.
+
+ * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen
+
+ * These DebugBlocks are then converted to, e.g., DWARF unwinding tables
+   (by the Dwarf module) and emitted in the final object.
+
+See also:
+  Note [Unwinding information in the NCG] in AsmCodeGen,
+  Note [Unwind pseudo-instruction in Cmm],
+  Note [Debugging DWARF unwinding info].
+
+
+Note [Debugging DWARF unwinding info]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For debugging generated unwinding info I've found it most useful to dump the
+disassembled binary with objdump -D and dump the debug info with
+readelf --debug-dump=frames-interp.
+
+You should get something like this:
+
+  0000000000000010 <stg_catch_frame_info>:
+    10:   48 83 c5 18             add    $0x18,%rbp
+    14:   ff 65 00                jmpq   *0x0(%rbp)
+
+and:
+
+  Contents of the .debug_frame section:
+
+  00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16
+     LOC           CFA      rbp   rsp   ra
+  0000000000000000 rbp+0    v+0   s     c+0
+
+  00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017
+     LOC           CFA      rbp   rsp   ra
+  000000000000000f rbp+0    v+0   s     c+0
+  000000000000000f rbp+24   v+0   s     c+0
+
+To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in
+Appendix 5 (page 101 of the pdf) and more details in the relevant section.
+
+The key thing to keep in mind is that the value at LOC is the value from
+*before* the instruction at LOC executes. In other words it answers the
+question: if my $rip is at LOC, how do I get the relevant values given the
+values obtained through unwinding so far.
+
+If the readelf --debug-dump=frames-interp output looks wrong, it may also be
+useful to look at readelf --debug-dump=frames, which is closer to the
+information that GHC generated.
+
+It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm
+-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm]
+explains how to interpret it.
+
+Inside gdb there are a couple useful commands for inspecting frames.
+For example:
+
+  gdb> info frame <num>
+
+It shows the values of registers obtained through unwinding.
+
+Another useful thing to try when debugging the DWARF unwinding is to enable
+extra debugging output in GDB:
+
+  gdb> set debug frame 1
+
+This makes GDB produce a trace of its internal workings. Having gone this far,
+it's just a tiny step to run GDB in GDB. Make sure you install debugging
+symbols for gdb if you obtain it through a package manager.
+
+Keep in mind that the current release of GDB has an instruction pointer handling
+heuristic that works well for C-like languages, but doesn't always work for
+Haskell. See Note [Info Offset] in Dwarf.Types for more details.
+
+Note [Unwind pseudo-instruction in Cmm]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't
+generate any assembly, but controls what DWARF unwinding information gets
+generated.
+
+It's important to understand what ranges of code the unwind pseudo-instruction
+refers to.
+For a sequence of CmmNodes like:
+
+  A // starts at addr X and ends at addr Y-1
+  unwind Sp = Just Sp + 16;
+  B // starts at addr Y and ends at addr Z
+
+the unwind statement reflects the state after A has executed, but before B
+has executed. If you consult the Note [Debugging DWARF unwinding info], the
+LOC this information will end up in is Y.
+-}
+
+-- | A label associated with an 'UnwindTable'
+data UnwindPoint = UnwindPoint !CLabel !UnwindTable
+
+instance Outputable UnwindPoint where
+  ppr (UnwindPoint lbl uws) =
+      braces $ ppr lbl<>colon
+      <+> hsep (punctuate comma $ map pprUw $ Map.toList uws)
+    where
+      pprUw (g, expr) = ppr g <> char '=' <> ppr expr
+
+-- | Maps registers to expressions that yield their "old" values
+-- further up the stack. Most interesting for the stack pointer @Sp@,
+-- but might be useful to document saved registers, too. Note that a
+-- register's value will be 'Nothing' when the register's previous
+-- value cannot be reconstructed.
+type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr)
+
+-- | Expressions, used for unwind information
+data UnwindExpr = UwConst !Int                  -- ^ literal value
+                | UwReg !GlobalReg !Int         -- ^ register plus offset
+                | UwDeref UnwindExpr            -- ^ pointer dereferencing
+                | UwLabel CLabel
+                | UwPlus UnwindExpr UnwindExpr
+                | UwMinus UnwindExpr UnwindExpr
+                | UwTimes UnwindExpr UnwindExpr
+                deriving (Eq)
+
+instance Outputable UnwindExpr where
+  pprPrec _ (UwConst i)     = ppr i
+  pprPrec _ (UwReg g 0)     = ppr g
+  pprPrec p (UwReg g x)     = pprPrec p (UwPlus (UwReg g 0) (UwConst x))
+  pprPrec _ (UwDeref e)     = char '*' <> pprPrec 3 e
+  pprPrec _ (UwLabel l)     = pprPrec 3 l
+  pprPrec p (UwPlus e0 e1)  | p <= 0
+                            = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1
+  pprPrec p (UwMinus e0 e1) | p <= 0
+                            = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1
+  pprPrec p (UwTimes e0 e1) | p <= 1
+                            = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1
+  pprPrec _ other           = parens (pprPrec 0 other)
+
+-- | Conversion of Cmm expressions to unwind expressions. We check for
+-- unsupported operator usages and simplify the expression as far as
+-- possible.
+toUnwindExpr :: CmmExpr -> UnwindExpr
+toUnwindExpr (CmmLit (CmmInt i _))       = UwConst (fromIntegral i)
+toUnwindExpr (CmmLit (CmmLabel l))       = UwLabel l
+toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i
+toUnwindExpr (CmmReg (CmmGlobal g))      = UwReg g 0
+toUnwindExpr (CmmLoad e _)               = UwDeref (toUnwindExpr e)
+toUnwindExpr e@(CmmMachOp op [e1, e2])   =
+  case (op, toUnwindExpr e1, toUnwindExpr e2) of
+    (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y)
+    (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y)
+    (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y)
+    (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y)
+    (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y)
+    (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y)
+    (MO_Add{}, u1,        u2       ) -> UwPlus u1 u2
+    (MO_Sub{}, u1,        u2       ) -> UwMinus u1 u2
+    (MO_Mul{}, u1,        u2       ) -> UwTimes u1 u2
+    _otherwise -> pprPanic "Unsupported operator in unwind expression!"
+                           (pprExpr e)
+toUnwindExpr e
+  = pprPanic "Unsupported unwind expression!" (ppr e)
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
new file mode 100644
index 0000000000..3b4f0156a0
--- /dev/null
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -0,0 +1,619 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+module GHC.Cmm.Expr
+    ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr
+    , CmmReg(..), cmmRegType, cmmRegWidth
+    , CmmLit(..), cmmLitType
+    , LocalReg(..), localRegType
+    , GlobalReg(..), isArgReg, globalRegType
+    , spReg, hpReg, spLimReg, hpLimReg, nodeReg
+    , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
+    , node, baseReg
+    , VGcPtr(..)
+
+    , DefinerOfRegs, UserOfRegs
+    , foldRegsDefd, foldRegsUsed
+    , foldLocalRegsDefd, foldLocalRegsUsed
+
+    , RegSet, LocalRegSet, GlobalRegSet
+    , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
+    , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
+    , regSetToList
+
+    , Area(..)
+    , module GHC.Cmm.MachOp
+    , module GHC.Cmm.Type
+    )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.MachOp
+import GHC.Cmm.Type
+import DynFlags
+import Outputable (panic)
+import Unique
+
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import BasicTypes (Alignment, mkAlignment, alignmentOf)
+
+-----------------------------------------------------------------------------
+--              CmmExpr
+-- An expression.  Expressions have no side effects.
+-----------------------------------------------------------------------------
+
+data CmmExpr
+  = CmmLit CmmLit               -- Literal
+  | CmmLoad !CmmExpr !CmmType   -- Read memory location
+  | CmmReg !CmmReg              -- Contents of register
+  | CmmMachOp MachOp [CmmExpr]  -- Machine operation (+, -, *, etc.)
+  | CmmStackSlot Area {-# UNPACK #-} !Int
+                                -- addressing expression of a stack slot
+                                -- See Note [CmmStackSlot aliasing]
+  | CmmRegOff !CmmReg Int
+        -- CmmRegOff reg i
+        --        ** is shorthand only, meaning **
+        -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
+        --      where rep = typeWidth (cmmRegType reg)
+
+instance Eq CmmExpr where       -- Equality ignores the types
+  CmmLit l1          == CmmLit l2          = l1==l2
+  CmmLoad e1 _       == CmmLoad e2 _       = e1==e2
+  CmmReg r1          == CmmReg r2          = r1==r2
+  CmmRegOff r1 i1    == CmmRegOff r2 i2    = r1==r2 && i1==i2
+  CmmMachOp op1 es1  == CmmMachOp op2 es2  = op1==op2 && es1==es2
+  CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
+  _e1                == _e2                = False
+
+data CmmReg
+  = CmmLocal  {-# UNPACK #-} !LocalReg
+  | CmmGlobal GlobalReg
+  deriving( Eq, Ord )
+
+-- | A stack area is either the stack slot where a variable is spilled
+-- or the stack space where function arguments and results are passed.
+data Area
+  = Old            -- See Note [Old Area]
+  | Young {-# UNPACK #-} !BlockId  -- Invariant: must be a continuation BlockId
+                   -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+  deriving (Eq, Ord)
+
+{- Note [Old Area]
+~~~~~~~~~~~~~~~~~~
+There is a single call area 'Old', allocated at the extreme old
+end of the stack frame (ie just younger than the return address)
+which holds:
+  * incoming (overflow) parameters,
+  * outgoing (overflow) parameter to tail calls,
+  * outgoing (overflow) result values
+  * the update frame (if any)
+
+Its size is the max of all these requirements.  On entry, the stack
+pointer will point to the youngest incoming parameter, which is not
+necessarily at the young end of the Old area.
+
+End of note -}
+
+
+{- Note [CmmStackSlot aliasing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When do two CmmStackSlots alias?
+
+ - T[old+N] aliases with U[young(L)+M] for all T, U, L, N and M
+ - T[old+N] aliases with U[old+M] only if the areas actually overlap
+
+Or more informally, different Areas may overlap with each other.
+
+An alternative semantics, that we previously had, was that different
+Areas do not overlap.  The problem that lead to redefining the
+semantics of stack areas is described below.
+
+e.g. if we had
+
+    x = Sp[old + 8]
+    y = Sp[old + 16]
+
+    Sp[young(L) + 8]  = L
+    Sp[young(L) + 16] = y
+    Sp[young(L) + 24] = x
+    call f() returns to L
+
+if areas semantically do not overlap, then we might optimise this to
+
+    Sp[young(L) + 8]  = L
+    Sp[young(L) + 16] = Sp[old + 8]
+    Sp[young(L) + 24] = Sp[old + 16]
+    call f() returns to L
+
+and now young(L) cannot be allocated at the same place as old, and we
+are doomed to use more stack.
+
+  - old+8  conflicts with young(L)+8
+  - old+16 conflicts with young(L)+16 and young(L)+8
+
+so young(L)+8 == old+24 and we get
+
+    Sp[-8]  = L
+    Sp[-16] = Sp[8]
+    Sp[-24] = Sp[0]
+    Sp -= 24
+    call f() returns to L
+
+However, if areas are defined to be "possibly overlapping" in the
+semantics, then we cannot commute any loads/stores of old with
+young(L), and we will be able to re-use both old+8 and old+16 for
+young(L).
+
+    x = Sp[8]
+    y = Sp[0]
+
+    Sp[8] = L
+    Sp[0] = y
+    Sp[-8] = x
+    Sp = Sp - 8
+    call f() returns to L
+
+Now, the assignments of y go away,
+
+    x = Sp[8]
+    Sp[8] = L
+    Sp[-8] = x
+    Sp = Sp - 8
+    call f() returns to L
+-}
+
+data CmmLit
+  = CmmInt !Integer  Width
+        -- Interpretation: the 2's complement representation of the value
+        -- is truncated to the specified size.  This is easier than trying
+        -- to keep the value within range, because we don't know whether
+        -- it will be used as a signed or unsigned value (the CmmType doesn't
+        -- distinguish between signed & unsigned).
+  | CmmFloat  Rational Width
+  | CmmVec [CmmLit]                     -- Vector literal
+  | CmmLabel    CLabel                  -- Address of label
+  | CmmLabelOff CLabel Int              -- Address of label + byte offset
+
+        -- Due to limitations in the C backend, the following
+        -- MUST ONLY be used inside the info table indicated by label2
+        -- (label2 must be the info label), and label1 must be an
+        -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
+        -- Don't use it at all unless tablesNextToCode.
+        -- It is also used inside the NCG during when generating
+        -- position-independent code.
+  | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset
+        -- In an expression, the width just has the effect of MO_SS_Conv
+        -- from wordWidth to the desired width.
+        --
+        -- In a static literal, the supported Widths depend on the
+        -- architecture: wordWidth is supported on all
+        -- architectures. Additionally W32 is supported on x86_64 when
+        -- using the small memory model.
+
+  | CmmBlock {-# UNPACK #-} !BlockId     -- Code label
+        -- Invariant: must be a continuation BlockId
+        -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+
+  | CmmHighStackMark -- A late-bound constant that stands for the max
+                     -- #bytes of stack space used during a procedure.
+                     -- During the stack-layout pass, CmmHighStackMark
+                     -- is replaced by a CmmInt for the actual number
+                     -- of bytes used
+  deriving Eq
+
+cmmExprType :: DynFlags -> CmmExpr -> CmmType
+cmmExprType dflags (CmmLit lit)        = cmmLitType dflags lit
+cmmExprType _      (CmmLoad _ rep)     = rep
+cmmExprType dflags (CmmReg reg)        = cmmRegType dflags reg
+cmmExprType dflags (CmmMachOp op args) = machOpResultType dflags op (map (cmmExprType dflags) args)
+cmmExprType dflags (CmmRegOff reg _)   = cmmRegType dflags reg
+cmmExprType dflags (CmmStackSlot _ _)  = bWord dflags -- an address
+-- Careful though: what is stored at the stack slot may be bigger than
+-- an address
+
+cmmLitType :: DynFlags -> CmmLit -> CmmType
+cmmLitType _      (CmmInt _ width)     = cmmBits  width
+cmmLitType _      (CmmFloat _ width)   = cmmFloat width
+cmmLitType _      (CmmVec [])          = panic "cmmLitType: CmmVec []"
+cmmLitType cflags (CmmVec (l:ls))      = let ty = cmmLitType cflags l
+                                         in if all (`cmmEqType` ty) (map (cmmLitType cflags) ls)
+                                            then cmmVec (1+length ls) ty
+                                            else panic "cmmLitType: CmmVec"
+cmmLitType dflags (CmmLabel lbl)       = cmmLabelType dflags lbl
+cmmLitType dflags (CmmLabelOff lbl _)  = cmmLabelType dflags lbl
+cmmLitType _      (CmmLabelDiffOff _ _ _ width) = cmmBits width
+cmmLitType dflags (CmmBlock _)         = bWord dflags
+cmmLitType dflags (CmmHighStackMark)   = bWord dflags
+
+cmmLabelType :: DynFlags -> CLabel -> CmmType
+cmmLabelType dflags lbl
+ | isGcPtrLabel lbl = gcWord dflags
+ | otherwise        = bWord dflags
+
+cmmExprWidth :: DynFlags -> CmmExpr -> Width
+cmmExprWidth dflags e = typeWidth (cmmExprType dflags e)
+
+-- | Returns an alignment in bytes of a CmmExpr when it's a statically
+-- known integer constant, otherwise returns an alignment of 1 byte.
+-- The caller is responsible for using with a sensible CmmExpr
+-- argument.
+cmmExprAlignment :: CmmExpr -> Alignment
+cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff)
+cmmExprAlignment _                          = mkAlignment 1
+--------
+--- Negation for conditional branches
+
+maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
+maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
+                                            return (CmmMachOp op' args)
+maybeInvertCmmExpr _ = Nothing
+
+-----------------------------------------------------------------------------
+--              Local registers
+-----------------------------------------------------------------------------
+
+data LocalReg
+  = LocalReg {-# UNPACK #-} !Unique CmmType
+    -- ^ Parameters:
+    --   1. Identifier
+    --   2. Type
+
+instance Eq LocalReg where
+  (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
+
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+-- See Note [No Ord for Unique]
+instance Ord LocalReg where
+  compare (LocalReg u1 _) (LocalReg u2 _) = nonDetCmpUnique u1 u2
+
+instance Uniquable LocalReg where
+  getUnique (LocalReg uniq _) = uniq
+
+cmmRegType :: DynFlags -> CmmReg -> CmmType
+cmmRegType _      (CmmLocal  reg) = localRegType reg
+cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg
+
+cmmRegWidth :: DynFlags -> CmmReg -> Width
+cmmRegWidth dflags = typeWidth . cmmRegType dflags
+
+localRegType :: LocalReg -> CmmType
+localRegType (LocalReg _ rep) = rep
+
+-----------------------------------------------------------------------------
+--    Register-use information for expressions and other types
+-----------------------------------------------------------------------------
+
+-- | Sets of registers
+
+-- These are used for dataflow facts, and a common operation is taking
+-- the union of two RegSets and then asking whether the union is the
+-- same as one of the inputs.  UniqSet isn't good here, because
+-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary
+-- Sets.
+
+type RegSet r     = Set r
+type LocalRegSet  = RegSet LocalReg
+type GlobalRegSet = RegSet GlobalReg
+
+emptyRegSet             :: RegSet r
+nullRegSet              :: RegSet r -> Bool
+elemRegSet              :: Ord r => r -> RegSet r -> Bool
+extendRegSet            :: Ord r => RegSet r -> r -> RegSet r
+deleteFromRegSet        :: Ord r => RegSet r -> r -> RegSet r
+mkRegSet                :: Ord r => [r] -> RegSet r
+minusRegSet, plusRegSet, timesRegSet :: Ord r => RegSet r -> RegSet r -> RegSet r
+sizeRegSet              :: RegSet r -> Int
+regSetToList            :: RegSet r -> [r]
+
+emptyRegSet      = Set.empty
+nullRegSet       = Set.null
+elemRegSet       = Set.member
+extendRegSet     = flip Set.insert
+deleteFromRegSet = flip Set.delete
+mkRegSet         = Set.fromList
+minusRegSet      = Set.difference
+plusRegSet       = Set.union
+timesRegSet      = Set.intersection
+sizeRegSet       = Set.size
+regSetToList     = Set.toList
+
+class Ord r => UserOfRegs r a where
+  foldRegsUsed :: DynFlags -> (b -> r -> b) -> b -> a -> b
+
+foldLocalRegsUsed :: UserOfRegs LocalReg a
+                  => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
+foldLocalRegsUsed = foldRegsUsed
+
+class Ord r => DefinerOfRegs r a where
+  foldRegsDefd :: DynFlags -> (b -> r -> b) -> b -> a -> b
+
+foldLocalRegsDefd :: DefinerOfRegs LocalReg a
+                  => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
+foldLocalRegsDefd = foldRegsDefd
+
+instance UserOfRegs LocalReg CmmReg where
+    foldRegsUsed _ f z (CmmLocal reg) = f z reg
+    foldRegsUsed _ _ z (CmmGlobal _)  = z
+
+instance DefinerOfRegs LocalReg CmmReg where
+    foldRegsDefd _ f z (CmmLocal reg) = f z reg
+    foldRegsDefd _ _ z (CmmGlobal _)  = z
+
+instance UserOfRegs GlobalReg CmmReg where
+    foldRegsUsed _ _ z (CmmLocal _)    = z
+    foldRegsUsed _ f z (CmmGlobal reg) = f z reg
+
+instance DefinerOfRegs GlobalReg CmmReg where
+    foldRegsDefd _ _ z (CmmLocal _)    = z
+    foldRegsDefd _ f z (CmmGlobal reg) = f z reg
+
+instance Ord r => UserOfRegs r r where
+    foldRegsUsed _ f z r = f z r
+
+instance Ord r => DefinerOfRegs r r where
+    foldRegsDefd _ f z r = f z r
+
+instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where
+  -- The (Ord r) in the context is necessary here
+  -- See Note [Recursive superclasses] in TcInstDcls
+  foldRegsUsed dflags f !z e = expr z e
+    where expr z (CmmLit _)          = z
+          expr z (CmmLoad addr _)    = foldRegsUsed dflags f z addr
+          expr z (CmmReg r)          = foldRegsUsed dflags f z r
+          expr z (CmmMachOp _ exprs) = foldRegsUsed dflags f z exprs
+          expr z (CmmRegOff r _)     = foldRegsUsed dflags f z r
+          expr z (CmmStackSlot _ _)  = z
+
+instance UserOfRegs r a => UserOfRegs r [a] where
+  foldRegsUsed dflags f set as = foldl' (foldRegsUsed dflags f) set as
+  {-# INLINABLE foldRegsUsed #-}
+
+instance DefinerOfRegs r a => DefinerOfRegs r [a] where
+  foldRegsDefd dflags f set as = foldl' (foldRegsDefd dflags f) set as
+  {-# INLINABLE foldRegsDefd #-}
+
+-----------------------------------------------------------------------------
+--              Global STG registers
+-----------------------------------------------------------------------------
+
+data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
+
+-----------------------------------------------------------------------------
+--              Global STG registers
+-----------------------------------------------------------------------------
+{-
+Note [Overlapping global registers]
+
+The backend might not faithfully implement the abstraction of the STG
+machine with independent registers for different values of type
+GlobalReg. Specifically, certain pairs of registers (r1, r2) may
+overlap in the sense that a store to r1 invalidates the value in r2,
+and vice versa.
+
+Currently this occurs only on the x86_64 architecture where FloatReg n
+and DoubleReg n are assigned the same microarchitectural register, in
+order to allow functions to receive more Float# or Double# arguments
+in registers (as opposed to on the stack).
+
+There are no specific rules about which registers might overlap with
+which other registers, but presumably it's safe to assume that nothing
+will overlap with special registers like Sp or BaseReg.
+
+Use GHC.Cmm.Utils.regsOverlap to determine whether two GlobalRegs overlap
+on a particular platform. The instance Eq GlobalReg is syntactic
+equality of STG registers and does not take overlap into
+account. However it is still used in UserOfRegs/DefinerOfRegs and
+there are likely still bugs there, beware!
+-}
+
+data GlobalReg
+  -- Argument and return registers
+  = VanillaReg                  -- pointers, unboxed ints and chars
+        {-# UNPACK #-} !Int     -- its number
+        VGcPtr
+
+  | FloatReg            -- single-precision floating-point registers
+        {-# UNPACK #-} !Int     -- its number
+
+  | DoubleReg           -- double-precision floating-point registers
+        {-# UNPACK #-} !Int     -- its number
+
+  | LongReg             -- long int registers (64-bit, really)
+        {-# UNPACK #-} !Int     -- its number
+
+  | XmmReg                      -- 128-bit SIMD vector register
+        {-# UNPACK #-} !Int     -- its number
+
+  | YmmReg                      -- 256-bit SIMD vector register
+        {-# UNPACK #-} !Int     -- its number
+
+  | ZmmReg                      -- 512-bit SIMD vector register
+        {-# UNPACK #-} !Int     -- its number
+
+  -- STG registers
+  | Sp                  -- Stack ptr; points to last occupied stack location.
+  | SpLim               -- Stack limit
+  | Hp                  -- Heap ptr; points to last occupied heap location.
+  | HpLim               -- Heap limit register
+  | CCCS                -- Current cost-centre stack
+  | CurrentTSO          -- pointer to current thread's TSO
+  | CurrentNursery      -- pointer to allocation area
+  | HpAlloc             -- allocation count for heap check failure
+
+                -- We keep the address of some commonly-called
+                -- functions in the register table, to keep code
+                -- size down:
+  | EagerBlackholeInfo  -- stg_EAGER_BLACKHOLE_info
+  | GCEnter1            -- stg_gc_enter_1
+  | GCFun               -- stg_gc_fun
+
+  -- Base offset for the register table, used for accessing registers
+  -- which do not have real registers assigned to them.  This register
+  -- will only appear after we have expanded GlobalReg into memory accesses
+  -- (where necessary) in the native code generator.
+  | BaseReg
+
+  -- The register used by the platform for the C stack pointer. This is
+  -- a break in the STG abstraction used exclusively to setup stack unwinding
+  -- information.
+  | MachSp
+
+  -- The is a dummy register used to indicate to the stack unwinder where
+  -- a routine would return to.
+  | UnwindReturnReg
+
+  -- Base Register for PIC (position-independent code) calculations
+  -- Only used inside the native code generator. It's exact meaning differs
+  -- from platform to platform (see module PositionIndependentCode).
+  | PicBaseReg
+
+  deriving( Show )
+
+instance Eq GlobalReg where
+   VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
+   FloatReg i == FloatReg j = i==j
+   DoubleReg i == DoubleReg j = i==j
+   LongReg i == LongReg j = i==j
+   -- NOTE: XMM, YMM, ZMM registers actually are the same registers
+   -- at least with respect to store at YMM i and then read from XMM i
+   -- and similarly for ZMM etc.
+   XmmReg i == XmmReg j = i==j
+   YmmReg i == YmmReg j = i==j
+   ZmmReg i == ZmmReg j = i==j
+   Sp == Sp = True
+   SpLim == SpLim = True
+   Hp == Hp = True
+   HpLim == HpLim = True
+   CCCS == CCCS = True
+   CurrentTSO == CurrentTSO = True
+   CurrentNursery == CurrentNursery = True
+   HpAlloc == HpAlloc = True
+   EagerBlackholeInfo == EagerBlackholeInfo = True
+   GCEnter1 == GCEnter1 = True
+   GCFun == GCFun = True
+   BaseReg == BaseReg = True
+   MachSp == MachSp = True
+   UnwindReturnReg == UnwindReturnReg = True
+   PicBaseReg == PicBaseReg = True
+   _r1 == _r2 = False
+
+instance Ord GlobalReg where
+   compare (VanillaReg i _) (VanillaReg j _) = compare i j
+     -- Ignore type when seeking clashes
+   compare (FloatReg i)  (FloatReg  j) = compare i j
+   compare (DoubleReg i) (DoubleReg j) = compare i j
+   compare (LongReg i)   (LongReg   j) = compare i j
+   compare (XmmReg i)    (XmmReg    j) = compare i j
+   compare (YmmReg i)    (YmmReg    j) = compare i j
+   compare (ZmmReg i)    (ZmmReg    j) = compare i j
+   compare Sp Sp = EQ
+   compare SpLim SpLim = EQ
+   compare Hp Hp = EQ
+   compare HpLim HpLim = EQ
+   compare CCCS CCCS = EQ
+   compare CurrentTSO CurrentTSO = EQ
+   compare CurrentNursery CurrentNursery = EQ
+   compare HpAlloc HpAlloc = EQ
+   compare EagerBlackholeInfo EagerBlackholeInfo = EQ
+   compare GCEnter1 GCEnter1 = EQ
+   compare GCFun GCFun = EQ
+   compare BaseReg BaseReg = EQ
+   compare MachSp MachSp = EQ
+   compare UnwindReturnReg UnwindReturnReg = EQ
+   compare PicBaseReg PicBaseReg = EQ
+   compare (VanillaReg _ _) _ = LT
+   compare _ (VanillaReg _ _) = GT
+   compare (FloatReg _) _     = LT
+   compare _ (FloatReg _)     = GT
+   compare (DoubleReg _) _    = LT
+   compare _ (DoubleReg _)    = GT
+   compare (LongReg _) _      = LT
+   compare _ (LongReg _)      = GT
+   compare (XmmReg _) _       = LT
+   compare _ (XmmReg _)       = GT
+   compare (YmmReg _) _       = LT
+   compare _ (YmmReg _)       = GT
+   compare (ZmmReg _) _       = LT
+   compare _ (ZmmReg _)       = GT
+   compare Sp _ = LT
+   compare _ Sp = GT
+   compare SpLim _ = LT
+   compare _ SpLim = GT
+   compare Hp _ = LT
+   compare _ Hp = GT
+   compare HpLim _ = LT
+   compare _ HpLim = GT
+   compare CCCS _ = LT
+   compare _ CCCS = GT
+   compare CurrentTSO _ = LT
+   compare _ CurrentTSO = GT
+   compare CurrentNursery _ = LT
+   compare _ CurrentNursery = GT
+   compare HpAlloc _ = LT
+   compare _ HpAlloc = GT
+   compare GCEnter1 _ = LT
+   compare _ GCEnter1 = GT
+   compare GCFun _ = LT
+   compare _ GCFun = GT
+   compare BaseReg _ = LT
+   compare _ BaseReg = GT
+   compare MachSp _ = LT
+   compare _ MachSp = GT
+   compare UnwindReturnReg _ = LT
+   compare _ UnwindReturnReg = GT
+   compare EagerBlackholeInfo _ = LT
+   compare _ EagerBlackholeInfo = GT
+
+-- convenient aliases
+baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg,
+  currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg  :: CmmReg
+baseReg = CmmGlobal BaseReg
+spReg = CmmGlobal Sp
+hpReg = CmmGlobal Hp
+hpLimReg = CmmGlobal HpLim
+spLimReg = CmmGlobal SpLim
+nodeReg = CmmGlobal node
+currentTSOReg = CmmGlobal CurrentTSO
+currentNurseryReg = CmmGlobal CurrentNursery
+hpAllocReg = CmmGlobal HpAlloc
+cccsReg = CmmGlobal CCCS
+
+node :: GlobalReg
+node = VanillaReg 1 VGcPtr
+
+globalRegType :: DynFlags -> GlobalReg -> CmmType
+globalRegType dflags (VanillaReg _ VGcPtr)    = gcWord dflags
+globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
+globalRegType _      (FloatReg _)      = cmmFloat W32
+globalRegType _      (DoubleReg _)     = cmmFloat W64
+globalRegType _      (LongReg _)       = cmmBits W64
+-- TODO: improve the internal model of SIMD/vectorized registers
+-- the right design SHOULd improve handling of float and double code too.
+-- see remarks in "NOTE [SIMD Design for the future]"" in GHC.StgToCmm.Prim
+globalRegType _      (XmmReg _)        = cmmVec 4 (cmmBits W32)
+globalRegType _      (YmmReg _)        = cmmVec 8 (cmmBits W32)
+globalRegType _      (ZmmReg _)        = cmmVec 16 (cmmBits W32)
+
+globalRegType dflags Hp                = gcWord dflags
+                                            -- The initialiser for all
+                                            -- dynamically allocated closures
+globalRegType dflags _                 = bWord dflags
+
+isArgReg :: GlobalReg -> Bool
+isArgReg (VanillaReg {}) = True
+isArgReg (FloatReg {})   = True
+isArgReg (DoubleReg {})  = True
+isArgReg (LongReg {})    = True
+isArgReg (XmmReg {})     = True
+isArgReg (YmmReg {})     = True
+isArgReg (ZmmReg {})     = True
+isArgReg _               = False
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
new file mode 100644
index 0000000000..8d19e7fdb9
--- /dev/null
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -0,0 +1,484 @@
+{-# LANGUAGE BangPatterns, GADTs #-}
+
+module GHC.Cmm.Graph
+  ( CmmAGraph, CmmAGraphScoped, CgStmt(..)
+  , (<*>), catAGraphs
+  , mkLabel, mkMiddle, mkLast, outOfLine
+  , lgraphOfAGraph, labelAGraph
+
+  , stackStubExpr
+  , mkNop, mkAssign, mkStore
+  , mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+  , mkJumpReturnsTo
+  , mkJump, mkJumpExtra
+  , mkRawJump
+  , mkCbranch, mkSwitch
+  , mkReturn, mkComment, mkCallEntry, mkBranch
+  , mkUnwind
+  , copyInOflow, copyOutOflow
+  , noExtraStack
+  , toCall, Transfer(..)
+  )
+where
+
+import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>)
+
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.CallConv
+import GHC.Cmm.Switch (SwitchTargets)
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import DynFlags
+import FastString
+import ForeignCall
+import OrdList
+import GHC.Runtime.Layout (ByteOff)
+import UniqSupply
+import Util
+import Panic
+
+
+-----------------------------------------------------------------------------
+-- Building Graphs
+
+
+-- | CmmAGraph is a chunk of code consisting of:
+--
+--   * ordinary statements (assignments, stores etc.)
+--   * jumps
+--   * labels
+--   * out-of-line labelled blocks
+--
+-- The semantics is that control falls through labels and out-of-line
+-- blocks.  Everything after a jump up to the next label is by
+-- definition unreachable code, and will be discarded.
+--
+-- Two CmmAGraphs can be stuck together with <*>, with the meaning that
+-- control flows from the first to the second.
+--
+-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
+-- by providing a label for the entry point and a tick scope; see
+-- 'labelAGraph'.
+type CmmAGraph = OrdList CgStmt
+-- | Unlabeled graph with tick scope
+type CmmAGraphScoped = (CmmAGraph, CmmTickScope)
+
+data CgStmt
+  = CgLabel BlockId CmmTickScope
+  | CgStmt  (CmmNode O O)
+  | CgLast  (CmmNode O C)
+  | CgFork  BlockId CmmAGraph CmmTickScope
+
+flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
+flattenCmmAGraph id (stmts_t, tscope) =
+    CmmGraph { g_entry = id,
+               g_graph = GMany NothingO body NothingO }
+  where
+  body = foldr addBlock emptyBody $ flatten id stmts_t tscope []
+
+  --
+  -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
+  --
+  -- NB. avoid the quadratic-append trap by passing in the tail of the
+  -- list.  This is important for Very Long Functions (e.g. in T783).
+  --
+  flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C]
+          -> [Block CmmNode C C]
+  flatten id g tscope blocks
+      = flatten1 (fromOL g) block' blocks
+      where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock
+  --
+  -- flatten0: we are outside a block at this point: any code before
+  -- the first label is unreachable, so just drop it.
+  --
+  flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
+  flatten0 [] blocks = blocks
+
+  flatten0 (CgLabel id tscope : stmts) blocks
+    = flatten1 stmts block blocks
+    where !block = blockJoinHead (CmmEntry id tscope) emptyBlock
+
+  flatten0 (CgFork fork_id stmts_t tscope : rest) blocks
+    = flatten fork_id stmts_t tscope $ flatten0 rest blocks
+
+  flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
+  flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
+
+  --
+  -- flatten1: we have a partial block, collect statements until the
+  -- next last node to make a block, then call flatten0 to get the rest
+  -- of the blocks
+  --
+  flatten1 :: [CgStmt] -> Block CmmNode C O
+           -> [Block CmmNode C C] -> [Block CmmNode C C]
+
+  -- The current block falls through to the end of a function or fork:
+  -- this code should not be reachable, but it may be referenced by
+  -- other code that is not reachable.  We'll remove it later with
+  -- dead-code analysis, but for now we have to keep the graph
+  -- well-formed, so we terminate the block with a branch to the
+  -- beginning of the current block.
+  flatten1 [] block blocks
+    = blockJoinTail block (CmmBranch (entryLabel block)) : blocks
+
+  flatten1 (CgLast stmt : stmts) block blocks
+    = block' : flatten0 stmts blocks
+    where !block' = blockJoinTail block stmt
+
+  flatten1 (CgStmt stmt : stmts) block blocks
+    = flatten1 stmts block' blocks
+    where !block' = blockSnoc block stmt
+
+  flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks
+    = flatten fork_id stmts_t tscope $ flatten1 rest block blocks
+
+  -- a label here means that we should start a new block, and the
+  -- current block should fall through to the new block.
+  flatten1 (CgLabel id tscp : stmts) block blocks
+    = blockJoinTail block (CmmBranch id) :
+      flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks
+
+
+
+---------- AGraph manipulation
+
+(<*>)          :: CmmAGraph -> CmmAGraph -> CmmAGraph
+(<*>)           = appOL
+
+catAGraphs     :: [CmmAGraph] -> CmmAGraph
+catAGraphs      = concatOL
+
+-- | creates a sequence "goto id; id:" as an AGraph
+mkLabel        :: BlockId -> CmmTickScope -> CmmAGraph
+mkLabel bid scp = unitOL (CgLabel bid scp)
+
+-- | creates an open AGraph from a given node
+mkMiddle        :: CmmNode O O -> CmmAGraph
+mkMiddle middle = unitOL (CgStmt middle)
+
+-- | creates a closed AGraph from a given node
+mkLast         :: CmmNode O C -> CmmAGraph
+mkLast last     = unitOL (CgLast last)
+
+-- | A labelled code block; should end in a last node
+outOfLine      :: BlockId -> CmmAGraphScoped -> CmmAGraph
+outOfLine l (c,s) = unitOL (CgFork l c s)
+
+-- | allocate a fresh label for the entry point
+lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph
+lgraphOfAGraph g = do
+  u <- getUniqueM
+  return (labelAGraph (mkBlockId u) g)
+
+-- | use the given BlockId as the label of the entry point
+labelAGraph    :: BlockId -> CmmAGraphScoped -> CmmGraph
+labelAGraph lbl ag = flattenCmmAGraph lbl ag
+
+---------- No-ops
+mkNop        :: CmmAGraph
+mkNop         = nilOL
+
+mkComment    :: FastString -> CmmAGraph
+mkComment fs
+  -- SDM: generating all those comments takes time, this saved about 4% for me
+  | debugIsOn = mkMiddle $ CmmComment fs
+  | otherwise = nilOL
+
+---------- Assignment and store
+mkAssign     :: CmmReg  -> CmmExpr -> CmmAGraph
+mkAssign l (CmmReg r) | l == r  = mkNop
+mkAssign l r  = mkMiddle $ CmmAssign l r
+
+mkStore      :: CmmExpr -> CmmExpr -> CmmAGraph
+mkStore  l r  = mkMiddle $ CmmStore  l r
+
+---------- Control transfer
+mkJump          :: DynFlags -> Convention -> CmmExpr
+                -> [CmmExpr]
+                -> UpdFrameOffset
+                -> CmmAGraph
+mkJump dflags conv e actuals updfr_off =
+  lastWithArgs dflags Jump Old conv actuals updfr_off $
+    toCall e Nothing updfr_off 0
+
+-- | A jump where the caller says what the live GlobalRegs are.  Used
+-- for low-level hand-written Cmm.
+mkRawJump       :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
+                -> CmmAGraph
+mkRawJump dflags e updfr_off vols =
+  lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $
+    \arg_space _  -> toCall e Nothing updfr_off 0 arg_space vols
+
+
+mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr]
+                -> UpdFrameOffset -> [CmmExpr]
+                -> CmmAGraph
+mkJumpExtra dflags conv e actuals updfr_off extra_stack =
+  lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $
+    toCall e Nothing updfr_off 0
+
+mkCbranch       :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph
+mkCbranch pred ifso ifnot likely =
+  mkLast (CmmCondBranch pred ifso ifnot likely)
+
+mkSwitch        :: CmmExpr -> SwitchTargets -> CmmAGraph
+mkSwitch e tbl   = mkLast $ CmmSwitch e tbl
+
+mkReturn        :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset
+                -> CmmAGraph
+mkReturn dflags e actuals updfr_off =
+  lastWithArgs dflags Ret  Old NativeReturn actuals updfr_off $
+    toCall e Nothing updfr_off 0
+
+mkBranch        :: BlockId -> CmmAGraph
+mkBranch bid     = mkLast (CmmBranch bid)
+
+mkFinalCall   :: DynFlags
+              -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset
+              -> CmmAGraph
+mkFinalCall dflags f _ actuals updfr_off =
+  lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $
+    toCall f Nothing updfr_off 0
+
+mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
+                -> BlockId
+                -> ByteOff
+                -> UpdFrameOffset
+                -> [CmmExpr]
+                -> CmmAGraph
+mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
+  lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals
+     updfr_off extra_stack $
+       toCall f (Just ret_lbl) updfr_off ret_off
+
+-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
+-- already on the stack).
+mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
+                -> BlockId
+                -> ByteOff
+                -> UpdFrameOffset
+                -> CmmAGraph
+mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off  = do
+  lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $
+       toCall f (Just ret_lbl) updfr_off ret_off
+
+mkUnsafeCall  :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
+mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
+
+-- | Construct a 'CmmUnwind' node for the given register and unwinding
+-- expression.
+mkUnwind     :: GlobalReg -> CmmExpr -> CmmAGraph
+mkUnwind r e  = mkMiddle $ CmmUnwind [(r, Just e)]
+
+--------------------------------------------------------------------------
+
+
+
+
+-- Why are we inserting extra blocks that simply branch to the successors?
+-- Because in addition to the branch instruction, @mkBranch@ will insert
+-- a necessary adjustment to the stack pointer.
+
+
+-- For debugging purposes, we can stub out dead stack slots:
+stackStubExpr :: Width -> CmmExpr
+stackStubExpr w = CmmLit (CmmInt 0 w)
+
+-- When we copy in parameters, we usually want to put overflow
+-- parameters on the stack, but sometimes we want to pass the
+-- variables in their spill slots.  Therefore, for copying arguments
+-- and results, we provide different functions to pass the arguments
+-- in an overflow area and to pass them in spill slots.
+copyInOflow  :: DynFlags -> Convention -> Area
+             -> [CmmFormal]
+             -> [CmmFormal]
+             -> (Int, [GlobalReg], CmmAGraph)
+
+copyInOflow dflags conv area formals extra_stk
+  = (offset, gregs, catAGraphs $ map mkMiddle nodes)
+  where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk
+
+-- Return the number of bytes used for copying arguments, as well as the
+-- instructions to copy the arguments.
+copyIn :: DynFlags -> Convention -> Area
+       -> [CmmFormal]
+       -> [CmmFormal]
+       -> (ByteOff, [GlobalReg], [CmmNode O O])
+copyIn dflags conv area formals extra_stk
+  = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
+  where
+    -- See Note [Width of parameters]
+    ci (reg, RegisterParam r@(VanillaReg {})) =
+        let local = CmmLocal reg
+            global = CmmReg (CmmGlobal r)
+            width = cmmRegWidth dflags local
+            expr
+                | width == wordWidth dflags = global
+                | width < wordWidth dflags =
+                    CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global]
+                | otherwise = panic "Parameter width greater than word width"
+
+        in CmmAssign local expr
+
+    -- Non VanillaRegs
+    ci (reg, RegisterParam r) =
+        CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
+
+    ci (reg, StackParam off)
+      | isBitsType $ localRegType reg
+      , typeWidth (localRegType reg) < wordWidth dflags =
+        let
+          stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags))
+          local = CmmLocal reg
+          width = cmmRegWidth dflags local
+          expr  = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot]
+        in CmmAssign local expr
+
+      | otherwise =
+         CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
+         where ty = localRegType reg
+
+    init_offset = widthInBytes (wordWidth dflags) -- infotable
+
+    (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
+
+    (stk_size, args) = assignArgumentsPos dflags stk_off conv
+                                          localRegType formals
+
+-- Factoring out the common parts of the copyout functions yielded something
+-- more complicated:
+
+data Transfer = Call | JumpRet | Jump | Ret deriving Eq
+
+copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr]
+             -> UpdFrameOffset
+             -> [CmmExpr] -- extra stack args
+             -> (Int, [GlobalReg], CmmAGraph)
+
+-- Generate code to move the actual parameters into the locations
+-- required by the calling convention.  This includes a store for the
+-- return address.
+--
+-- The argument layout function ignores the pointer to the info table,
+-- so we slot that in here. When copying-out to a young area, we set
+-- the info table for return and adjust the offsets of the other
+-- parameters.  If this is a call instruction, we adjust the offsets
+-- of the other parameters.
+copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
+  = (stk_size, regs, graph)
+  where
+    (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
+
+    -- See Note [Width of parameters]
+    co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
+        let width = cmmExprWidth dflags v
+            value
+                | width == wordWidth dflags = v
+                | width < wordWidth dflags =
+                    CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v]
+                | otherwise = panic "Parameter width greater than word width"
+
+        in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
+
+    -- Non VanillaRegs
+    co (v, RegisterParam r) (rs, ms) =
+        (r:rs, mkAssign (CmmGlobal r) v <*> ms)
+
+    -- See Note [Width of parameters]
+    co (v, StackParam off)  (rs, ms)
+      = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
+
+    width v = cmmExprWidth dflags v
+    value v
+      | isBitsType $ cmmExprType dflags v
+      , width v < wordWidth dflags =
+        CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v]
+      | otherwise = v
+
+    (setRA, init_offset) =
+      case area of
+            Young id ->  -- Generate a store instruction for
+                         -- the return address if making a call
+                  case transfer of
+                     Call ->
+                       ([(CmmLit (CmmBlock id), StackParam init_offset)],
+                       widthInBytes (wordWidth dflags))
+                     JumpRet ->
+                       ([],
+                       widthInBytes (wordWidth dflags))
+                     _other ->
+                       ([], 0)
+            Old -> ([], updfr_off)
+
+    (extra_stack_off, stack_params) =
+       assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff
+
+    args :: [(CmmExpr, ParamLocation)]   -- The argument and where to put it
+    (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv
+                                          (cmmExprType dflags) actuals
+
+
+-- Note [Width of parameters]
+--
+-- Consider passing a small (< word width) primitive like Int8# to a function.
+-- It's actually non-trivial to do this without extending/narrowing:
+-- * Global registers are considered to have native word width (i.e., 64-bits on
+--   x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a
+--   global register.
+-- * Same problem exists with LLVM IR.
+-- * Lowering gets harder since on x86-32 not every register exposes its lower
+--   8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
+--   8-bit register for %edi). So we would either need to extend/narrow anyway,
+--   or complicate the calling convention.
+-- * Passing a small integer in a stack slot, which has native word width,
+--   requires extending to word width when writing to the stack and narrowing
+--   when reading off the stack (see #16258).
+-- So instead, we always extend every parameter smaller than native word width
+-- in copyOutOflow and then truncate it back to the expected width in copyIn.
+-- Note that we do this in cmm using MO_XX_Conv to avoid requiring
+-- zero-/sign-extending - it's up to a backend to handle this in a most
+-- efficient way (e.g., a simple register move or a smaller size store).
+-- This convention (of ignoring the upper bits) is different from some C ABIs,
+-- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters.
+--
+-- There was some discussion about this on this PR:
+-- https://github.com/ghc-proposals/ghc-proposals/pull/74
+
+
+mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal]
+            -> (Int, [GlobalReg], CmmAGraph)
+mkCallEntry dflags conv formals extra_stk
+  = copyInOflow dflags conv Old formals extra_stk
+
+lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr]
+             -> UpdFrameOffset
+             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+             -> CmmAGraph
+lastWithArgs dflags transfer area conv actuals updfr_off last =
+  lastWithArgsAndExtraStack dflags transfer area conv actuals
+                            updfr_off noExtraStack last
+
+lastWithArgsAndExtraStack :: DynFlags
+             -> Transfer -> Area -> Convention -> [CmmExpr]
+             -> UpdFrameOffset -> [CmmExpr]
+             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+             -> CmmAGraph
+lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off
+                          extra_stack last =
+  copies <*> last outArgs regs
+ where
+  (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals
+                               updfr_off extra_stack
+
+
+noExtraStack :: [CmmExpr]
+noExtraStack = []
+
+toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
+       -> ByteOff -> [GlobalReg]
+       -> CmmAGraph
+toCall e cont updfr_off res_space arg_space regs =
+  mkLast $ CmmCall e cont regs arg_space res_space updfr_off
diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs
new file mode 100644
index 0000000000..a10db2b292
--- /dev/null
+++ b/compiler/GHC/Cmm/Info.hs
@@ -0,0 +1,593 @@
+{-# LANGUAGE CPP #-}
+module GHC.Cmm.Info (
+  mkEmptyContInfoTable,
+  cmmToRawCmm,
+  mkInfoTable,
+  srtEscape,
+
+  -- info table accessors
+  closureInfoPtr,
+  entryCode,
+  getConstrTag,
+  cmmGetClosureType,
+  infoTable,
+  infoTableConstrTag,
+  infoTableSrtBitmap,
+  infoTableClosureType,
+  infoTablePtrs,
+  infoTableNonPtrs,
+  funInfoTable,
+  funInfoArity,
+
+  -- info table sizes and offsets
+  stdInfoTableSizeW,
+  fixedInfoTableSizeW,
+  profInfoTableSizeW,
+  maxStdInfoTableSizeW,
+  maxRetInfoTableSizeW,
+  stdInfoTableSizeB,
+  conInfoTableSizeB,
+  stdSrtBitmapOffset,
+  stdClosureTypeOffset,
+  stdPtrsOffset, stdNonPtrsOffset,
+) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
+import GHC.Runtime.Layout
+import GHC.Data.Bitmap
+import Stream (Stream)
+import qualified Stream
+import GHC.Cmm.Dataflow.Collections
+
+import GHC.Platform
+import Maybes
+import DynFlags
+import ErrUtils (withTimingSilent)
+import Panic
+import UniqSupply
+import MonadUtils
+import Util
+import Outputable
+
+import Data.ByteString (ByteString)
+import Data.Bits
+
+-- When we split at proc points, we need an empty info table.
+mkEmptyContInfoTable :: CLabel -> CmmInfoTable
+mkEmptyContInfoTable info_lbl
+  = CmmInfoTable { cit_lbl  = info_lbl
+                 , cit_rep  = mkStackRep []
+                 , cit_prof = NoProfilingInfo
+                 , cit_srt  = Nothing
+                 , cit_clo  = Nothing }
+
+cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a
+            -> IO (Stream IO RawCmmGroup a)
+cmmToRawCmm dflags cmms
+  = do { uniqs <- mkSplitUniqSupply 'i'
+       ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl])
+             do_one uniqs cmm =
+               -- NB. strictness fixes a space leak.  DO NOT REMOVE.
+               withTimingSilent dflags (text "Cmm -> Raw Cmm")
+                                forceRes $
+                 case initUs uniqs $ concatMapM (mkInfoTable dflags) cmm of
+                   (b,uniqs') -> return (uniqs',b)
+       ; return (snd <$> Stream.mapAccumL_ do_one uniqs cmms)
+       }
+
+    where forceRes (uniqs, rawcmms) =
+            uniqs `seq` foldr (\decl r -> decl `seq` r) () rawcmms
+
+-- Make a concrete info table, represented as a list of CmmStatic
+-- (it can't be simply a list of Word, because the SRT field is
+-- represented by a label+offset expression).
+--
+-- With tablesNextToCode, the layout is
+--      <reversed variable part>
+--      <normal forward StgInfoTable, but without
+--              an entry point at the front>
+--      <code>
+--
+-- Without tablesNextToCode, the layout of an info table is
+--      <entry label>
+--      <normal forward rest of StgInfoTable>
+--      <forward variable part>
+--
+--      See includes/rts/storage/InfoTables.h
+--
+-- For return-points these are as follows
+--
+-- Tables next to code:
+--
+--                      <srt slot>
+--                      <standard info table>
+--      ret-addr -->    <entry code (if any)>
+--
+-- Not tables-next-to-code:
+--
+--      ret-addr -->    <ptr to entry code>
+--                      <standard info table>
+--                      <srt slot>
+--
+--  * The SRT slot is only there if there is SRT info to record
+
+mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl]
+mkInfoTable _ (CmmData sec dat)
+  = return [CmmData sec dat]
+
+mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
+  --
+  -- in the non-tables-next-to-code case, procs can have at most a
+  -- single info table associated with the entry label of the proc.
+  --
+  | not (tablesNextToCode dflags)
+  = case topInfoTable proc of   --  must be at most one
+      -- no info table
+      Nothing ->
+         return [CmmProc mapEmpty entry_lbl live blocks]
+
+      Just info@CmmInfoTable { cit_lbl = info_lbl } -> do
+        (top_decls, (std_info, extra_bits)) <-
+             mkInfoTableContents dflags info Nothing
+        let
+          rel_std_info   = map (makeRelativeRefTo dflags info_lbl) std_info
+          rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
+        --
+        -- Separately emit info table (with the function entry
+        -- point as first entry) and the entry code
+        --
+        return (top_decls ++
+                [CmmProc mapEmpty entry_lbl live blocks,
+                 mkRODataLits info_lbl
+                    (CmmLabel entry_lbl : rel_std_info ++ rel_extra_bits)])
+
+  --
+  -- With tables-next-to-code, we can have many info tables,
+  -- associated with some of the BlockIds of the proc.  For each info
+  -- table we need to turn it into CmmStatics, and collect any new
+  -- CmmDecls that arise from doing so.
+  --
+  | otherwise
+  = do
+    (top_declss, raw_infos) <-
+       unzip `fmap` mapM do_one_info (mapToList (info_tbls infos))
+    return (concat top_declss ++
+            [CmmProc (mapFromList raw_infos) entry_lbl live blocks])
+
+  where
+   do_one_info (lbl,itbl) = do
+     (top_decls, (std_info, extra_bits)) <-
+         mkInfoTableContents dflags itbl Nothing
+     let
+        info_lbl = cit_lbl itbl
+        rel_std_info   = map (makeRelativeRefTo dflags info_lbl) std_info
+        rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
+     --
+     return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $
+                              reverse rel_extra_bits ++ rel_std_info))
+
+-----------------------------------------------------
+type InfoTableContents = ( [CmmLit]          -- The standard part
+                         , [CmmLit] )        -- The "extra bits"
+-- These Lits have *not* had mkRelativeTo applied to them
+
+mkInfoTableContents :: DynFlags
+                    -> CmmInfoTable
+                    -> Maybe Int               -- Override default RTS type tag?
+                    -> UniqSM ([RawCmmDecl],             -- Auxiliary top decls
+                               InfoTableContents)       -- Info tbl + extra bits
+
+mkInfoTableContents dflags
+                    info@(CmmInfoTable { cit_lbl  = info_lbl
+                                       , cit_rep  = smrep
+                                       , cit_prof = prof
+                                       , cit_srt = srt })
+                    mb_rts_tag
+  | RTSRep rts_tag rep <- smrep
+  = mkInfoTableContents dflags info{cit_rep = rep} (Just rts_tag)
+    -- Completely override the rts_tag that mkInfoTableContents would
+    -- otherwise compute, with the rts_tag stored in the RTSRep
+    -- (which in turn came from a handwritten .cmm file)
+
+  | StackRep frame <- smrep
+  = do { (prof_lits, prof_data) <- mkProfLits dflags prof
+       ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
+       ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame
+       ; let
+             std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit
+             rts_tag | Just tag <- mb_rts_tag = tag
+                     | null liveness_data     = rET_SMALL -- Fits in extra_bits
+                     | otherwise              = rET_BIG   -- Does not; extra_bits is
+                                                          -- a label
+       ; return (prof_data ++ liveness_data, (std_info, srt_label)) }
+
+  | HeapRep _ ptrs nonptrs closure_type <- smrep
+  = do { let layout  = packIntsCLit dflags ptrs nonptrs
+       ; (prof_lits, prof_data) <- mkProfLits dflags prof
+       ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
+       ; (mb_srt_field, mb_layout, extra_bits, ct_data)
+                                <- mk_pieces closure_type srt_label
+       ; let std_info = mkStdInfoTable dflags prof_lits
+                                       (mb_rts_tag   `orElse` rtsClosureType smrep)
+                                       (mb_srt_field `orElse` srt_bitmap)
+                                       (mb_layout    `orElse` layout)
+       ; return (prof_data ++ ct_data, (std_info, extra_bits)) }
+  where
+    mk_pieces :: ClosureTypeInfo -> [CmmLit]
+              -> UniqSM ( Maybe CmmLit  -- Override the SRT field with this
+                        , Maybe CmmLit  -- Override the layout field with this
+                        , [CmmLit]           -- "Extra bits" for info table
+                        , [RawCmmDecl])      -- Auxiliary data decls
+    mk_pieces (Constr con_tag con_descr) _no_srt    -- A data constructor
+      = do { (descr_lit, decl) <- newStringLit con_descr
+           ; return ( Just (CmmInt (fromIntegral con_tag)
+                                   (halfWordWidth dflags))
+                    , Nothing, [descr_lit], [decl]) }
+
+    mk_pieces Thunk srt_label
+      = return (Nothing, Nothing, srt_label, [])
+
+    mk_pieces (ThunkSelector offset) _no_srt
+      = return (Just (CmmInt 0 (halfWordWidth dflags)),
+                Just (mkWordCLit dflags (fromIntegral offset)), [], [])
+         -- Layout known (one free var); we use the layout field for offset
+
+    mk_pieces (Fun arity (ArgSpec fun_type)) srt_label
+      = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label
+           ; return (Nothing, Nothing,  extra_bits, []) }
+
+    mk_pieces (Fun arity (ArgGen arg_bits)) srt_label
+      = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits
+           ; let fun_type | null liveness_data = aRG_GEN
+                          | otherwise          = aRG_GEN_BIG
+                 extra_bits = [ packIntsCLit dflags fun_type arity ]
+                           ++ (if inlineSRT dflags then [] else [ srt_lit ])
+                           ++ [ liveness_lit, slow_entry ]
+           ; return (Nothing, Nothing, extra_bits, liveness_data) }
+      where
+        slow_entry = CmmLabel (toSlowEntryLbl info_lbl)
+        srt_lit = case srt_label of
+                    []          -> mkIntCLit dflags 0
+                    (lit:_rest) -> ASSERT( null _rest ) lit
+
+    mk_pieces other _ = pprPanic "mk_pieces" (ppr other)
+
+mkInfoTableContents _ _ _ = panic "mkInfoTableContents"   -- NonInfoTable dealt with earlier
+
+packIntsCLit :: DynFlags -> Int -> Int -> CmmLit
+packIntsCLit dflags a b = packHalfWordsCLit dflags
+                           (toStgHalfWord dflags (fromIntegral a))
+                           (toStgHalfWord dflags (fromIntegral b))
+
+
+mkSRTLit :: DynFlags
+         -> CLabel
+         -> Maybe CLabel
+         -> ([CmmLit],    -- srt_label, if any
+             CmmLit)      -- srt_bitmap
+mkSRTLit dflags info_lbl (Just lbl)
+  | inlineSRT dflags
+  = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags))
+mkSRTLit dflags _ Nothing    = ([], CmmInt 0 (halfWordWidth dflags))
+mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags))
+
+
+-- | Is the SRT offset field inline in the info table on this platform?
+--
+-- See the section "Referring to an SRT from the info table" in
+-- Note [SRTs] in GHC.Cmm.Info.Build
+inlineSRT :: DynFlags -> Bool
+inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64
+  && tablesNextToCode dflags
+
+-------------------------------------------------------------------------
+--
+--      Lay out the info table and handle relative offsets
+--
+-------------------------------------------------------------------------
+
+-- This function takes
+--   * the standard info table portion (StgInfoTable)
+--   * the "extra bits" (StgFunInfoExtraRev etc.)
+--   * the entry label
+--   * the code
+-- and lays them out in memory, producing a list of RawCmmDecl
+
+-------------------------------------------------------------------------
+--
+--      Position independent code
+--
+-------------------------------------------------------------------------
+-- In order to support position independent code, we mustn't put absolute
+-- references into read-only space. Info tables in the tablesNextToCode
+-- case must be in .text, which is read-only, so we doctor the CmmLits
+-- to use relative offsets instead.
+
+-- Note that this is done even when the -fPIC flag is not specified,
+-- as we want to keep binary compatibility between PIC and non-PIC.
+
+makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit
+
+makeRelativeRefTo dflags info_lbl (CmmLabel lbl)
+  | tablesNextToCode dflags
+  = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags)
+makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off)
+  | tablesNextToCode dflags
+  = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags)
+makeRelativeRefTo _ _ lit = lit
+
+
+-------------------------------------------------------------------------
+--
+--              Build a liveness mask for the stack layout
+--
+-------------------------------------------------------------------------
+
+-- There are four kinds of things on the stack:
+--
+--      - pointer variables (bound in the environment)
+--      - non-pointer variables (bound in the environment)
+--      - free slots (recorded in the stack free list)
+--      - non-pointer data slots (recorded in the stack free list)
+--
+-- The first two are represented with a 'Just' of a 'LocalReg'.
+-- The last two with one or more 'Nothing' constructors.
+-- Each 'Nothing' represents one used word.
+--
+-- The head of the stack layout is the top of the stack and
+-- the least-significant bit.
+
+mkLivenessBits :: DynFlags -> Liveness -> UniqSM (CmmLit, [RawCmmDecl])
+              -- ^ Returns:
+              --   1. The bitmap (literal value or label)
+              --   2. Large bitmap CmmData if needed
+
+mkLivenessBits dflags liveness
+  | n_bits > mAX_SMALL_BITMAP_SIZE dflags -- does not fit in one word
+  = do { uniq <- getUniqueM
+       ; let bitmap_lbl = mkBitmapLabel uniq
+       ; return (CmmLabel bitmap_lbl,
+                 [mkRODataLits bitmap_lbl lits]) }
+
+  | otherwise -- Fits in one word
+  = return (mkStgWordCLit dflags bitmap_word, [])
+  where
+    n_bits = length liveness
+
+    bitmap :: Bitmap
+    bitmap = mkBitmap dflags liveness
+
+    small_bitmap = case bitmap of
+                     []  -> toStgWord dflags 0
+                     [b] -> b
+                     _   -> panic "mkLiveness"
+    bitmap_word = toStgWord dflags (fromIntegral n_bits)
+              .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags)
+
+    lits = mkWordCLit dflags (fromIntegral n_bits)
+         : map (mkStgWordCLit dflags) bitmap
+      -- The first word is the size.  The structure must match
+      -- StgLargeBitmap in includes/rts/storage/InfoTable.h
+
+-------------------------------------------------------------------------
+--
+--      Generating a standard info table
+--
+-------------------------------------------------------------------------
+
+-- The standard bits of an info table.  This part of the info table
+-- corresponds to the StgInfoTable type defined in
+-- includes/rts/storage/InfoTables.h.
+--
+-- Its shape varies with ticky/profiling/tables next to code etc
+-- so we can't use constant offsets from Constants
+
+mkStdInfoTable
+   :: DynFlags
+   -> (CmmLit,CmmLit)   -- Closure type descr and closure descr  (profiling)
+   -> Int               -- Closure RTS tag
+   -> CmmLit            -- SRT length
+   -> CmmLit            -- layout field
+   -> [CmmLit]
+
+mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit
+ =      -- Parallel revertible-black hole field
+    prof_info
+        -- Ticky info (none at present)
+        -- Debug info (none at present)
+ ++ [layout_lit, tag, srt]
+
+ where
+    prof_info
+        | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr]
+        | otherwise = []
+
+    tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags)
+
+-------------------------------------------------------------------------
+--
+--      Making string literals
+--
+-------------------------------------------------------------------------
+
+mkProfLits :: DynFlags -> ProfilingInfo -> UniqSM ((CmmLit,CmmLit), [RawCmmDecl])
+mkProfLits dflags NoProfilingInfo       = return ((zeroCLit dflags, zeroCLit dflags), [])
+mkProfLits _ (ProfilingInfo td cd)
+  = do { (td_lit, td_decl) <- newStringLit td
+       ; (cd_lit, cd_decl) <- newStringLit cd
+       ; return ((td_lit,cd_lit), [td_decl,cd_decl]) }
+
+newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
+newStringLit bytes
+  = do { uniq <- getUniqueM
+       ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
+
+
+-- Misc utils
+
+-- | Value of the srt field of an info table when using an StgLargeSRT
+srtEscape :: DynFlags -> StgHalfWord
+srtEscape dflags = toStgHalfWord dflags (-1)
+
+-------------------------------------------------------------------------
+--
+--      Accessing fields of an info table
+--
+-------------------------------------------------------------------------
+
+-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is
+-- enabled.
+wordAligned :: DynFlags -> CmmExpr -> CmmExpr
+wordAligned dflags e
+  | gopt Opt_AlignmentSanitisation dflags
+  = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e]
+  | otherwise
+  = e
+
+closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer and returns the info table pointer
+closureInfoPtr dflags e =
+    CmmLoad (wordAligned dflags e) (bWord dflags)
+
+entryCode :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns its entry code
+entryCode dflags e
+ | tablesNextToCode dflags = e
+ | otherwise               = CmmLoad e (bWord dflags)
+
+getConstrTag :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the *zero-indexed*
+-- constructor tag obtained from the info table
+-- This lives in the SRT field of the info table
+-- (constructors don't need SRTs).
+getConstrTag dflags closure_ptr
+  = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableConstrTag dflags info_table]
+  where
+    info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
+
+cmmGetClosureType :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the closure type
+-- obtained from the info table
+cmmGetClosureType dflags closure_ptr
+  = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableClosureType dflags info_table]
+  where
+    info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
+
+infoTable :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns a pointer to the first word of the standard-form
+-- info table, excluding the entry-code word (if present)
+infoTable dflags info_ptr
+  | tablesNextToCode dflags = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags)
+  | otherwise               = cmmOffsetW dflags info_ptr 1 -- Past the entry code pointer
+
+infoTableConstrTag :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the constr tag
+-- field of the info table (same as the srt_bitmap field)
+infoTableConstrTag = infoTableSrtBitmap
+
+infoTableSrtBitmap :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the srt_bitmap
+-- field of the info table
+infoTableSrtBitmap dflags info_tbl
+  = CmmLoad (cmmOffsetB dflags info_tbl (stdSrtBitmapOffset dflags)) (bHalfWord dflags)
+
+infoTableClosureType :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the closure type
+-- field of the info table.
+infoTableClosureType dflags info_tbl
+  = CmmLoad (cmmOffsetB dflags info_tbl (stdClosureTypeOffset dflags)) (bHalfWord dflags)
+
+infoTablePtrs :: DynFlags -> CmmExpr -> CmmExpr
+infoTablePtrs dflags info_tbl
+  = CmmLoad (cmmOffsetB dflags info_tbl (stdPtrsOffset dflags)) (bHalfWord dflags)
+
+infoTableNonPtrs :: DynFlags -> CmmExpr -> CmmExpr
+infoTableNonPtrs dflags info_tbl
+  = CmmLoad (cmmOffsetB dflags info_tbl (stdNonPtrsOffset dflags)) (bHalfWord dflags)
+
+funInfoTable :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes the info pointer of a function,
+-- and returns a pointer to the first word of the StgFunInfoExtra struct
+-- in the info table.
+funInfoTable dflags info_ptr
+  | tablesNextToCode dflags
+  = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags - sIZEOF_StgFunInfoExtraRev dflags)
+  | otherwise
+  = cmmOffsetW dflags info_ptr (1 + stdInfoTableSizeW dflags)
+                                -- Past the entry code pointer
+
+-- Takes the info pointer of a function, returns the function's arity
+funInfoArity :: DynFlags -> CmmExpr -> CmmExpr
+funInfoArity dflags iptr
+  = cmmToWord dflags (cmmLoadIndex dflags rep fun_info (offset `div` rep_bytes))
+  where
+   fun_info = funInfoTable dflags iptr
+   rep = cmmBits (widthFromBytes rep_bytes)
+
+   (rep_bytes, offset)
+    | tablesNextToCode dflags = ( pc_REP_StgFunInfoExtraRev_arity pc
+                                , oFFSET_StgFunInfoExtraRev_arity dflags )
+    | otherwise               = ( pc_REP_StgFunInfoExtraFwd_arity pc
+                                , oFFSET_StgFunInfoExtraFwd_arity dflags )
+
+   pc = platformConstants dflags
+
+-----------------------------------------------------------------------------
+--
+--      Info table sizes & offsets
+--
+-----------------------------------------------------------------------------
+
+stdInfoTableSizeW :: DynFlags -> WordOff
+-- The size of a standard info table varies with profiling/ticky etc,
+-- so we can't get it from Constants
+-- It must vary in sync with mkStdInfoTable
+stdInfoTableSizeW dflags
+  = fixedInfoTableSizeW
+  + if gopt Opt_SccProfilingOn dflags
+       then profInfoTableSizeW
+       else 0
+
+fixedInfoTableSizeW :: WordOff
+fixedInfoTableSizeW = 2 -- layout, type
+
+profInfoTableSizeW :: WordOff
+profInfoTableSizeW = 2
+
+maxStdInfoTableSizeW :: WordOff
+maxStdInfoTableSizeW =
+  1 {- entry, when !tablesNextToCode -}
+  + fixedInfoTableSizeW
+  + profInfoTableSizeW
+
+maxRetInfoTableSizeW :: WordOff
+maxRetInfoTableSizeW =
+  maxStdInfoTableSizeW
+  + 1 {- srt label -}
+
+stdInfoTableSizeB  :: DynFlags -> ByteOff
+stdInfoTableSizeB dflags = stdInfoTableSizeW dflags * wORD_SIZE dflags
+
+stdSrtBitmapOffset :: DynFlags -> ByteOff
+-- Byte offset of the SRT bitmap half-word which is
+-- in the *higher-addressed* part of the type_lit
+stdSrtBitmapOffset dflags = stdInfoTableSizeB dflags - halfWordSize dflags
+
+stdClosureTypeOffset :: DynFlags -> ByteOff
+-- Byte offset of the closure type half-word
+stdClosureTypeOffset dflags = stdInfoTableSizeB dflags - wORD_SIZE dflags
+
+stdPtrsOffset, stdNonPtrsOffset :: DynFlags -> ByteOff
+stdPtrsOffset    dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags
+stdNonPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags + halfWordSize dflags
+
+conInfoTableSizeB :: DynFlags -> Int
+conInfoTableSizeB dflags = stdInfoTableSizeB dflags + wORD_SIZE dflags
diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs
new file mode 100644
index 0000000000..1ba79befcd
--- /dev/null
+++ b/compiler/GHC/Cmm/Info/Build.hs
@@ -0,0 +1,892 @@
+{-# LANGUAGE GADTs, BangPatterns, RecordWildCards,
+    GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-}
+
+module GHC.Cmm.Info.Build
+  ( CAFSet, CAFEnv, cafAnal
+  , doSRTs, ModuleSRTInfo, emptySRT
+  ) where
+
+import GhcPrelude hiding (succ)
+
+import Id
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import Module
+import GHC.Platform
+import Digraph
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import DynFlags
+import Maybes
+import Outputable
+import GHC.Runtime.Layout
+import UniqSupply
+import CostCentre
+import GHC.StgToCmm.Heap
+
+import Control.Monad
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+import Data.Tuple
+import Control.Monad.Trans.State
+import Control.Monad.Trans.Class
+
+
+{- Note [SRTs]
+
+SRTs are the mechanism by which the garbage collector can determine
+the live CAFs in the program.
+
+Representation
+^^^^^^^^^^^^^^
+
++------+
+| info |
+|      |     +-----+---+---+---+
+|   -------->|SRT_2| | | | | 0 |
+|------|     +-----+-|-+-|-+---+
+|      |             |   |
+| code |             |   |
+|      |             v   v
+
+An SRT is simply an object in the program's data segment. It has the
+same representation as a static constructor.  There are 16
+pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info,
+representing SRT objects with 1-16 pointers, respectively.
+
+The entries of an SRT object point to static closures, which are either
+- FUN_STATIC, THUNK_STATIC or CONSTR
+- Another SRT (actually just a CONSTR)
+
+The final field of the SRT is the static link field, used by the
+garbage collector to chain together static closures that it visits and
+to determine whether a static closure has been visited or not. (see
+Note [STATIC_LINK fields])
+
+By traversing the transitive closure of an SRT, the GC will reach all
+of the CAFs that are reachable from the code associated with this SRT.
+
+If we need to create an SRT with more than 16 entries, we build a
+chain of SRT objects with all but the last having 16 entries.
+
++-----+---+- -+---+---+
+|SRT16| | |   | | | 0 |
++-----+-|-+- -+-|-+---+
+        |       |
+        v       v
+              +----+---+---+---+
+              |SRT2| | | | | 0 |
+              +----+-|-+-|-+---+
+                     |   |
+                     |   |
+                     v   v
+
+Referring to an SRT from the info table
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following things have SRTs:
+
+- Static functions (FUN)
+- Static thunks (THUNK), ie. CAFs
+- Continuations (RET_SMALL, etc.)
+
+In each case, the info table points to the SRT.
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt == 1 and info->f.srt_offset points to the SRT
+
+e.g. for a FUN with an SRT:
+
+StgFunInfoTable       +------+
+  info->f.srt_offset  |  ------------> offset to SRT object
+StgStdInfoTable       +------+
+  info->layout.ptrs   | ...  |
+  info->layout.nptrs  | ...  |
+  info->srt           |  1   |
+  info->type          | ...  |
+                      |------|
+
+On x86_64, we optimise the info table representation further.  The
+offset to the SRT can be stored in 32 bits (all code lives within a
+2GB region in x86_64's small memory model), so we can save a word in
+the info table by storing the srt_offset in the srt field, which is
+half a word.
+
+On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169):
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt is an offset from the info pointer to the SRT object
+
+StgStdInfoTable       +------+
+  info->layout.ptrs   |      |
+  info->layout.nptrs  |      |
+  info->srt           |  ------------> offset to SRT object
+                      |------|
+
+
+EXAMPLE
+^^^^^^^
+
+f = \x. ... g ...
+  where
+    g = \y. ... h ... c1 ...
+    h = \z. ... c2 ...
+
+c1 & c2 are CAFs
+
+g and h are local functions, but they have no static closures.  When
+we generate code for f, we start with a CmmGroup of four CmmDecls:
+
+   [ f_closure, f_entry, g_entry, h_entry ]
+
+we process each CmmDecl separately in cpsTop, giving us a list of
+CmmDecls. e.g. for f_entry, we might end up with
+
+   [ f_entry, f1_ret, f2_proc ]
+
+where f1_ret is a return point, and f2_proc is a proc-point.  We have
+a CAFSet for each of these CmmDecls, let's suppose they are
+
+   [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ]
+   [ g_entry{h_info, c1_closure} ]
+   [ h_entry{c2_closure} ]
+
+Next, we make an SRT for each of these functions:
+
+  f_srt : [g_info]
+  g_srt : [h_info, c1_closure]
+  h_srt : [c2_closure]
+
+Now, for g_info and h_info, we want to refer to the SRTs for g and h
+respectively, which we'll label g_srt and h_srt:
+
+  f_srt : [g_srt]
+  g_srt : [h_srt, c1_closure]
+  h_srt : [c2_closure]
+
+Now, when an SRT has a single entry, we don't actually generate an SRT
+closure for it, instead we just replace references to it with its
+single element.  So, since h_srt == c2_closure, we have
+
+  f_srt : [g_srt]
+  g_srt : [c2_closure, c1_closure]
+  h_srt : [c2_closure]
+
+and the only SRT closure we generate is
+
+  g_srt = SRT_2 [c2_closure, c1_closure]
+
+
+Optimisations
+^^^^^^^^^^^^^
+
+To reduce the code size overhead and the cost of traversing SRTs in
+the GC, we want to simplify SRTs where possible. We therefore apply
+the following optimisations.  Each has a [keyword]; search for the
+keyword in the code below to see where the optimisation is
+implemented.
+
+1. [Inline] we never create an SRT with a single entry, instead we
+   point to the single entry directly from the info table.
+
+   i.e. instead of
+
+    +------+
+    | info |
+    |      |     +-----+---+---+
+    |   -------->|SRT_1| | | 0 |
+    |------|     +-----+-|-+---+
+    |      |             |
+    | code |             |
+    |      |             v
+                         C
+
+   we can point directly to the closure:
+
+    +------+
+    | info |
+    |      |
+    |   -------->C
+    |------|
+    |      |
+    | code |
+    |      |
+
+
+   Furthermore, the SRT for any code that refers to this info table
+   can point directly to C.
+
+   The exception to this is when we're doing dynamic linking. In that
+   case, if the closure is not locally defined then we can't point to
+   it directly from the info table, because this is the text section
+   which cannot contain runtime relocations. In this case we skip this
+   optimisation and generate the singleton SRT, because SRTs are in the
+   data section and *can* have relocatable references.
+
+2. [FUN] A static function closure can also be an SRT, we simply put
+   the SRT entries as fields in the static closure.  This makes a lot
+   of sense: the static references are just like the free variables of
+   the FUN closure.
+
+   i.e. instead of
+
+   f_closure:
+   +-----+---+
+   |  |  | 0 |
+   +- |--+---+
+      |            +------+
+      |            | info |     f_srt:
+      |            |      |     +-----+---+---+---+
+      |            |   -------->|SRT_2| | | | + 0 |
+      `----------->|------|     +-----+-|-+-|-+---+
+                   |      |             |   |
+                   | code |             |   |
+                   |      |             v   v
+
+
+   We can generate:
+
+   f_closure:
+   +-----+---+---+---+
+   |  |  | | | | | 0 |
+   +- |--+-|-+-|-+---+
+      |    |   |   +------+
+      |    v   v   | info |
+      |            |      |
+      |            |   0  |
+      `----------->|------|
+                   |      |
+                   | code |
+                   |      |
+
+
+   (note: we can't do this for THUNKs, because the thunk gets
+   overwritten when it is entered, so we wouldn't be able to share
+   this SRT with other info tables that want to refer to it (see
+   [Common] below). FUNs are immutable so don't have this problem.)
+
+3. [Common] Identical SRTs can be commoned up.
+
+4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also
+   refers to C (perhaps transitively), then we can omit the reference
+   to C from A.
+
+
+Note that there are many other optimisations that we could do, but
+aren't implemented. In general, we could omit any reference from an
+SRT if everything reachable from it is also reachable from the other
+fields in the SRT. Our [Filter] optimisation is a special case of
+this.
+
+Another opportunity we don't exploit is this:
+
+A = {X,Y,Z}
+B = {Y,Z}
+C = {X,B}
+
+Here we could use C = {A} and therefore [Inline] C = A.
+-}
+
+-- ---------------------------------------------------------------------
+{- Note [Invalid optimisation: shortcutting]
+
+You might think that if we have something like
+
+A's SRT = {B}
+B's SRT = {X}
+
+that we could replace the reference to B in A's SRT with X.
+
+A's SRT = {X}
+B's SRT = {X}
+
+and thereby perhaps save a little work at runtime, because we don't
+have to visit B.
+
+But this is NOT valid.
+
+Consider these cases:
+
+0. B can't be a constructor, because constructors don't have SRTs
+
+1. B is a CAF. This is the easy one. Obviously we want A's SRT to
+   point to B, so that it keeps B alive.
+
+2. B is a function.  This is the tricky one. The reason we can't
+shortcut in this case is that we aren't allowed to resurrect static
+objects.
+
+== How does this cause a problem? ==
+
+The particular case that cropped up when we tried this was #15544.
+- A is a thunk
+- B is a static function
+- X is a CAF
+- suppose we GC when A is alive, and B is not otherwise reachable.
+- B is "collected", meaning that it doesn't make it onto the static
+  objects list during this GC, but nothing bad happens yet.
+- Next, suppose we enter A, and then call B. (remember that A refers to B)
+  At the entry point to B, we GC. This puts B on the stack, as part of the
+  RET_FUN stack frame that gets pushed when we GC at a function entry point.
+- This GC will now reach B
+- But because B was previous "collected", it breaks the assumption
+  that static objects are never resurrected. See Note [STATIC_LINK
+  fields] in rts/sm/Storage.h for why this is bad.
+- In practice, the GC thinks that B has already been visited, and so
+  doesn't visit X, and catastrophe ensues.
+
+== Isn't this caused by the RET_FUN business? ==
+
+Maybe, but could you prove that RET_FUN is the only way that
+resurrection can occur?
+
+So, no shortcutting.
+-}
+
+-- ---------------------------------------------------------------------
+-- Label types
+
+-- Labels that come from cafAnal can be:
+--   - _closure labels for static functions or CAFs
+--   - _info labels for dynamic functions, thunks, or continuations
+--   - _entry labels for functions or thunks
+--
+-- Meanwhile the labels on top-level blocks are _entry labels.
+--
+-- To put everything in the same namespace we convert all labels to
+-- closure labels using toClosureLbl.  Note that some of these
+-- labels will not actually exist; that's ok because we're going to
+-- map them to SRTEntry later, which ranges over labels that do exist.
+--
+newtype CAFLabel = CAFLabel CLabel
+  deriving (Eq,Ord,Outputable)
+
+type CAFSet = Set CAFLabel
+type CAFEnv = LabelMap CAFSet
+
+mkCAFLabel :: CLabel -> CAFLabel
+mkCAFLabel lbl = CAFLabel (toClosureLbl lbl)
+
+-- This is a label that we can put in an SRT.  It *must* be a closure label,
+-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR.
+newtype SRTEntry = SRTEntry CLabel
+  deriving (Eq, Ord, Outputable)
+
+-- ---------------------------------------------------------------------
+-- CAF analysis
+
+-- |
+-- For each code block:
+--   - collect the references reachable from this code block to FUN,
+--     THUNK or RET labels for which hasCAF == True
+--
+-- This gives us a `CAFEnv`: a mapping from code block to sets of labels
+--
+cafAnal
+  :: LabelSet   -- The blocks representing continuations, ie. those
+                -- that will get RET info tables.  These labels will
+                -- get their own SRTs, so we don't aggregate CAFs from
+                -- references to these labels, we just use the label.
+  -> CLabel     -- The top label of the proc
+  -> CmmGraph
+  -> CAFEnv
+cafAnal contLbls topLbl cmmGraph =
+  analyzeCmmBwd cafLattice
+    (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty
+
+
+cafLattice :: DataflowLattice CAFSet
+cafLattice = DataflowLattice Set.empty add
+  where
+    add (OldFact old) (NewFact new) =
+        let !new' = old `Set.union` new
+        in changedIf (Set.size new' > Set.size old) new'
+
+
+cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet
+cafTransfers contLbls entry topLbl
+  (BlockCC eNode middle xNode) fBase =
+    let joined = cafsInNode xNode $! live'
+        !result = foldNodesBwdOO cafsInNode middle joined
+
+        facts = mapMaybe successorFact (successors xNode)
+        live' = joinFacts cafLattice facts
+
+        successorFact s
+          -- If this is a loop back to the entry, we can refer to the
+          -- entry label.
+          | s == entry = Just (add topLbl Set.empty)
+          -- If this is a continuation, we want to refer to the
+          -- SRT for the continuation's info table
+          | s `setMember` contLbls
+          = Just (Set.singleton (mkCAFLabel (infoTblLbl s)))
+          -- Otherwise, takes the CAF references from the destination
+          | otherwise
+          = lookupFact s fBase
+
+        cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
+        cafsInNode node set = foldExpDeep addCaf node set
+
+        addCaf expr !set =
+          case expr of
+              CmmLit (CmmLabel c) -> add c set
+              CmmLit (CmmLabelOff c _) -> add c set
+              CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set
+              _ -> set
+        add l s | hasCAF l  = Set.insert (mkCAFLabel l) s
+                | otherwise = s
+
+    in mapSingleton (entryLabel eNode) result
+
+
+-- -----------------------------------------------------------------------------
+-- ModuleSRTInfo
+
+data ModuleSRTInfo = ModuleSRTInfo
+  { thisModule :: Module
+    -- ^ Current module being compiled. Required for calling labelDynamic.
+  , dedupSRTs :: Map (Set SRTEntry) SRTEntry
+    -- ^ previous SRTs we've emitted, so we can de-duplicate.
+    -- Used to implement the [Common] optimisation.
+  , flatSRTs :: Map SRTEntry (Set SRTEntry)
+    -- ^ The reverse mapping, so that we can remove redundant
+    -- entries. e.g.  if we have an SRT [a,b,c], and we know that b
+    -- points to [c,d], we can omit c and emit [a,b].
+    -- Used to implement the [Filter] optimisation.
+  }
+instance Outputable ModuleSRTInfo where
+  ppr ModuleSRTInfo{..} =
+    text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs
+
+emptySRT :: Module -> ModuleSRTInfo
+emptySRT mod =
+  ModuleSRTInfo
+    { thisModule = mod
+    , dedupSRTs = Map.empty
+    , flatSRTs = Map.empty }
+
+-- -----------------------------------------------------------------------------
+-- Constructing SRTs
+
+{- Implementation notes
+
+- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable
+
+- The entry in info_tbls corresponding to g_entry is the closure info
+  table, the rest are continuations.
+
+- Each entry in info_tbls possibly needs an SRT.  We need to make a
+  label for each of these.
+
+- We get the CAFSet for each entry from the CAFEnv
+
+-}
+
+-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl,
+--   where the label is
+--   - the info label for a continuation or dynamic closure
+--   - the closure label for a top-level function (not a CAF)
+getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)]
+getLabelledBlocks (CmmData _ _) = []
+getLabelledBlocks (CmmProc top_info _ _ _) =
+  [ (blockId, mkCAFLabel (cit_lbl info))
+  | (blockId, info) <- mapToList (info_tbls top_info)
+  , let rep = cit_rep info
+  , not (isStaticRep rep) || not (isThunkRep rep)
+  ]
+
+
+-- | Put the labelled blocks that we will be annotating with SRTs into
+-- dependency order.  This is so that we can process them one at a
+-- time, resolving references to earlier blocks to point to their
+-- SRTs. CAFs themselves are not included here; see getCAFs below.
+depAnalSRTs
+  :: CAFEnv
+  -> [CmmDecl]
+  -> [SCC (Label, CAFLabel, Set CAFLabel)]
+depAnalSRTs cafEnv decls =
+  srtTrace "depAnalSRTs" (ppr graph) graph
+ where
+  labelledBlocks = concatMap getLabelledBlocks decls
+  labelToBlock = Map.fromList (map swap labelledBlocks)
+  graph = stronglyConnCompFromEdgedVerticesOrd
+             [ let cafs' = Set.delete lbl cafs in
+               DigraphNode (l,lbl,cafs') l
+                 (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
+             | (l, lbl) <- labelledBlocks
+             , Just cafs <- [mapLookup l cafEnv] ]
+
+
+-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF.
+-- These are treated differently from other labelled blocks:
+--  - we never shortcut a reference to a CAF to the contents of its
+--    SRT, since the point of SRTs is to keep CAFs alive.
+--  - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
+--    instead we generate their SRTs after everything else.
+getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)]
+getCAFs cafEnv decls =
+  [ (g_entry g, mkCAFLabel topLbl, cafs)
+  | CmmProc top_info topLbl _ g <- decls
+  , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+  , let rep = cit_rep info
+  , isStaticRep rep && isThunkRep rep
+  , Just cafs <- [mapLookup (g_entry g) cafEnv]
+  ]
+
+
+-- | Get the list of blocks that correspond to the entry points for
+-- FUN_STATIC closures.  These are the blocks for which if we have an
+-- SRT we can merge it with the static closure. [FUN]
+getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)]
+getStaticFuns decls =
+  [ (g_entry g, lbl)
+  | CmmProc top_info _ _ g <- decls
+  , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+  , Just (id, _) <- [cit_clo info]
+  , let rep = cit_rep info
+  , isStaticRep rep && isFunRep rep
+  , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+  ]
+
+
+-- | Maps labels from 'cafAnal' to the final CLabel that will appear
+-- in the SRT.
+--   - closures with singleton SRTs resolve to their single entry
+--   - closures with larger SRTs map to the label for that SRT
+--   - CAFs must not map to anything!
+--   - if a labels maps to Nothing, we found that this label's SRT
+--     is empty, so we don't need to refer to it from other SRTs.
+type SRTMap = Map CAFLabel (Maybe SRTEntry)
+
+-- | resolve a CAFLabel to its SRTEntry using the SRTMap
+resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry
+resolveCAF srtMap lbl@(CAFLabel l) =
+  Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
+
+
+-- | Attach SRTs to all info tables in the CmmDecls, and add SRT
+-- declarations to the ModuleSRTInfo.
+--
+doSRTs
+  :: DynFlags
+  -> ModuleSRTInfo
+  -> [(CAFEnv, [CmmDecl])]
+  -> IO (ModuleSRTInfo, [CmmDecl])
+
+doSRTs dflags moduleSRTInfo tops = do
+  us <- mkSplitUniqSupply 'u'
+
+  -- Ignore the original grouping of decls, and combine all the
+  -- CAFEnvs into a single CAFEnv.
+  let (cafEnvs, declss) = unzip tops
+      cafEnv = mapUnions cafEnvs
+      decls = concat declss
+      staticFuns = mapFromList (getStaticFuns decls)
+
+  -- Put the decls in dependency order. Why? So that we can implement
+  -- [Inline] and [Filter].  If we need to refer to an SRT that has
+  -- a single entry, we use the entry itself, which means that we
+  -- don't need to generate the singleton SRT in the first place.  But
+  -- to do this we need to process blocks before things that depend on
+  -- them.
+  let
+    sccs = depAnalSRTs cafEnv decls
+    cafsWithSRTs = getCAFs cafEnv decls
+
+  -- On each strongly-connected group of decls, construct the SRT
+  -- closures and the SRT fields for info tables.
+  let result ::
+        [ ( [CmmDecl]              -- generated SRTs
+          , [(Label, CLabel)]      -- SRT fields for info tables
+          , [(Label, [SRTEntry])]  -- SRTs to attach to static functions
+          ) ]
+      ((result, _srtMap), moduleSRTInfo') =
+        initUs_ us $
+        flip runStateT moduleSRTInfo $
+        flip runStateT Map.empty $ do
+          nonCAFs <- mapM (doSCC dflags staticFuns) sccs
+          cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
+            oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs
+          return (nonCAFs ++ cAFs)
+
+      (declss, pairs, funSRTs) = unzip3 result
+
+  -- Next, update the info tables with the SRTs
+  let
+    srtFieldMap = mapFromList (concat pairs)
+    funSRTMap = mapFromList (concat funSRTs)
+    decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls
+
+  return (moduleSRTInfo', concat declss ++ decls')
+
+
+-- | Build the SRT for a strongly-connected component of blocks
+doSCC
+  :: DynFlags
+  -> LabelMap CLabel           -- which blocks are static function entry points
+  -> SCC (Label, CAFLabel, Set CAFLabel)
+  -> StateT SRTMap
+        (StateT ModuleSRTInfo UniqSM)
+        ( [CmmDecl]              -- generated SRTs
+        , [(Label, CLabel)]      -- SRT fields for info tables
+        , [(Label, [SRTEntry])]  -- SRTs to attach to static functions
+        )
+
+doSCC dflags staticFuns  (AcyclicSCC (l, cafLbl, cafs)) =
+  oneSRT dflags staticFuns [l] [cafLbl] False cafs
+
+doSCC dflags staticFuns (CyclicSCC nodes) = do
+  -- build a single SRT for the whole cycle, see Note [recursive SRTs]
+  let (blockids, lbls, cafsets) = unzip3 nodes
+      cafs = Set.unions cafsets
+  oneSRT dflags staticFuns blockids lbls False cafs
+
+
+{- Note [recursive SRTs]
+
+If the dependency analyser has found us a recursive group of
+declarations, then we build a single SRT for the whole group, on the
+grounds that everything in the group is reachable from everything
+else, so we lose nothing by having a single SRT.
+
+However, there are a couple of wrinkles to be aware of.
+
+* The Set CAFLabel for this SRT will contain labels in the group
+itself. The SRTMap will therefore not contain entries for these labels
+yet, so we can't turn them into SRTEntries using resolveCAF. BUT we
+can just remove recursive references from the Set CAFLabel before
+generating the SRT - the SRT will still contain all the CAFLabels that
+we need to refer to from this group's SRT.
+
+* That is, EXCEPT for static function closures. For the same reason
+described in Note [Invalid optimisation: shortcutting], we cannot omit
+references to static function closures.
+  - But, since we will merge the SRT with one of the static function
+    closures (see [FUN]), we can omit references to *that* static
+    function closure from the SRT.
+-}
+
+-- | Build an SRT for a set of blocks
+oneSRT
+  :: DynFlags
+  -> LabelMap CLabel            -- which blocks are static function entry points
+  -> [Label]                    -- blocks in this set
+  -> [CAFLabel]                 -- labels for those blocks
+  -> Bool                       -- True <=> this SRT is for a CAF
+  -> Set CAFLabel               -- SRT for this set
+  -> StateT SRTMap
+       (StateT ModuleSRTInfo UniqSM)
+       ( [CmmDecl]                    -- SRT objects we built
+       , [(Label, CLabel)]            -- SRT fields for these blocks' itbls
+       , [(Label, [SRTEntry])]        -- SRTs to attach to static functions
+       )
+
+oneSRT dflags staticFuns blockids lbls isCAF cafs = do
+  srtMap <- get
+  topSRT <- lift get
+  let
+    -- Can we merge this SRT with a FUN_STATIC closure?
+    (maybeFunClosure, otherFunLabels) =
+      case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of
+        [] -> (Nothing, [])
+        ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs)
+
+    -- Remove recursive references from the SRT, except for (all but
+    -- one of the) static functions. See Note [recursive SRTs].
+    nonRec = cafs `Set.difference`
+      (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels)
+
+    -- First resolve all the CAFLabels to SRTEntries
+    -- Implements the [Inline] optimisation.
+    resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec)
+
+    -- The set of all SRTEntries in SRTs that we refer to from here.
+    allBelow =
+      Set.unions [ lbls | caf <- resolved
+                        , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ]
+
+    -- Remove SRTEntries that are also in an SRT that we refer to.
+    -- Implements the [Filter] optimisation.
+    filtered = Set.difference (Set.fromList resolved) allBelow
+
+  srtTrace "oneSRT:"
+     (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return ()
+
+  let
+    isStaticFun = isJust maybeFunClosure
+
+    -- For a label without a closure (e.g. a continuation), we must
+    -- update the SRTMap for the label to point to a closure. It's
+    -- important that we don't do this for static functions or CAFs,
+    -- see Note [Invalid optimisation: shortcutting].
+    updateSRTMap srtEntry =
+      when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do
+        let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls]
+        put (Map.union newSRTMap srtMap)
+
+    this_mod = thisModule topSRT
+
+  case Set.toList filtered of
+    [] -> do
+      srtTrace "oneSRT: empty" (ppr lbls) $ return ()
+      updateSRTMap Nothing
+      return ([], [], [])
+
+    -- [Inline] - when we have only one entry there is no need to
+    -- build an SRT object at all, instead we put the singleton SRT
+    -- entry in the info table.
+    [one@(SRTEntry lbl)]
+      | -- Info tables refer to SRTs by offset (as noted in the section
+        -- "Referring to an SRT from the info table" of Note [SRTs]). However,
+        -- when dynamic linking is used we cannot guarantee that the offset
+        -- between the SRT and the info table will fit in the offset field.
+        -- Consequently we build a singleton SRT in in this case.
+        not (labelDynamic dflags this_mod lbl)
+
+        -- MachO relocations can't express offsets between compilation units at
+        -- all, so we are always forced to build a singleton SRT in this case.
+          && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
+             || isLocalCLabel this_mod lbl) -> do
+
+        -- If we have a static function closure, then it becomes the
+        -- SRT object, and everything else points to it. (the only way
+        -- we could have multiple labels here is if this is a
+        -- recursive group, see Note [recursive SRTs])
+        case maybeFunClosure of
+          Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, [])
+            where
+              withLabels =
+                [ (b, if b == staticFunBlock then lbl else staticFunLbl)
+                | b <- blockids ]
+          Nothing -> do
+            updateSRTMap (Just one)
+            return ([], map (,lbl) blockids, [])
+
+    cafList ->
+      -- Check whether an SRT with the same entries has been emitted already.
+      -- Implements the [Common] optimisation.
+      case Map.lookup filtered (dedupSRTs topSRT) of
+        Just srtEntry@(SRTEntry srtLbl)  -> do
+          srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return ()
+          updateSRTMap (Just srtEntry)
+          return ([], map (,srtLbl) blockids, [])
+        Nothing -> do
+          -- No duplicates: we have to build a new SRT object
+          srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return ()
+          (decls, funSRTs, srtEntry) <-
+            case maybeFunClosure of
+              Just (fun,block) ->
+                return ( [], [(block, cafList)], SRTEntry fun )
+              Nothing -> do
+                (decls, entry) <- lift . lift $ buildSRTChain dflags cafList
+                return (decls, [], entry)
+          updateSRTMap (Just srtEntry)
+          let allBelowThis = Set.union allBelow filtered
+              oldFlatSRTs = flatSRTs topSRT
+              newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs
+              newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
+          lift (put (topSRT { dedupSRTs = newDedupSRTs
+                            , flatSRTs = newFlatSRTs }))
+          let SRTEntry lbl = srtEntry
+          return (decls, map (,lbl) blockids, funSRTs)
+
+
+-- | build a static SRT object (or a chain of objects) from a list of
+-- SRTEntries.
+buildSRTChain
+   :: DynFlags
+   -> [SRTEntry]
+   -> UniqSM
+        ( [CmmDecl]    -- The SRT object(s)
+        , SRTEntry     -- label to use in the info table
+        )
+buildSRTChain _ [] = panic "buildSRT: empty"
+buildSRTChain dflags cafSet =
+  case splitAt mAX_SRT_SIZE cafSet of
+    (these, []) -> do
+      (decl,lbl) <- buildSRT dflags these
+      return ([decl], lbl)
+    (these,those) -> do
+      (rest, rest_lbl) <- buildSRTChain dflags (head these : those)
+      (decl,lbl) <- buildSRT dflags (rest_lbl : tail these)
+      return (decl:rest, lbl)
+  where
+    mAX_SRT_SIZE = 16
+
+
+buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry)
+buildSRT dflags refs = do
+  id <- getUniqueM
+  let
+    lbl = mkSRTLabel id
+    srt_n_info = mkSRTInfoLabel (length refs)
+    fields =
+      mkStaticClosure dflags srt_n_info dontCareCCS
+        [ CmmLabel lbl | SRTEntry lbl <- refs ]
+        [] -- no padding
+        [mkIntCLit dflags 0] -- link field
+        [] -- no saved info
+  return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
+
+
+-- | Update info tables with references to their SRTs. Also generate
+-- static closures, splicing in SRT fields as necessary.
+updInfoSRTs
+  :: DynFlags
+  -> LabelMap CLabel               -- SRT labels for each block
+  -> LabelMap [SRTEntry]           -- SRTs to merge into FUN_STATIC closures
+  -> CmmDecl
+  -> [CmmDecl]
+
+updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
+  | Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
+  | otherwise = [ proc ]
+  where
+    proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g
+    newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info)
+    updInfoTbl l info_tbl
+      | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf
+      | otherwise  = info_tbl { cit_srt = mapLookup l srt_env }
+
+    -- Generate static closures [FUN].  Note that this also generates
+    -- static closures for thunks (CAFs), because it's easier to treat
+    -- them uniformly in the code generator.
+    maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl)
+    maybeStaticClosure
+      | Just info_tbl@CmmInfoTable{..} <-
+           mapLookup (g_entry g) (info_tbls top_info)
+      , Just (id, ccs) <- cit_clo
+      , isStaticRep cit_rep =
+        let
+          (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of
+            Nothing ->
+              -- if we don't add SRT entries to this closure, then we
+              -- want to set the srt field in its info table as usual
+              (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, [])
+            Just srtEntries -> srtTrace "maybeStaticFun" (ppr res)
+              (info_tbl { cit_rep = new_rep }, res)
+              where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
+          fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id)
+            srtEntries
+          new_rep = case cit_rep of
+             HeapRep sta ptrs nptrs ty ->
+               HeapRep sta (ptrs + length srtEntries) nptrs ty
+             _other -> panic "maybeStaticFun"
+          lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+        in
+          Just (newInfo, mkDataLits (Section Data lbl) lbl fields)
+      | otherwise = Nothing
+
+updInfoSRTs _ _ _ t = [t]
+
+
+srtTrace :: String -> SDoc -> b -> b
+-- srtTrace = pprTrace
+srtTrace _ _ b = b
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
new file mode 100644
index 0000000000..f6dda7728c
--- /dev/null
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -0,0 +1,1236 @@
+{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-}
+module GHC.Cmm.LayoutStack (
+       cmmLayoutStack, setInfoTableStackMap
+  ) where
+
+import GhcPrelude hiding ((<*>))
+
+import GHC.StgToCmm.Utils      ( callerSaveVolatileRegs, newTemp  ) -- XXX layering violation
+import GHC.StgToCmm.Foreign    ( saveThreadState, loadThreadState ) -- XXX layering violation
+
+import BasicTypes
+import GHC.Cmm
+import GHC.Cmm.Info
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Utils
+import GHC.Cmm.Graph
+import ForeignCall
+import GHC.Cmm.Liveness
+import GHC.Cmm.ProcPoint
+import GHC.Runtime.Layout
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import DynFlags
+import FastString
+import Outputable hiding ( isEmpty )
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+import Data.Bits
+import Data.List (nub)
+
+{- Note [Stack Layout]
+
+The job of this pass is to
+
+ - replace references to abstract stack Areas with fixed offsets from Sp.
+
+ - replace the CmmHighStackMark constant used in the stack check with
+   the maximum stack usage of the proc.
+
+ - save any variables that are live across a call, and reload them as
+   necessary.
+
+Before stack allocation, local variables remain live across native
+calls (CmmCall{ cmm_cont = Just _ }), and after stack allocation local
+variables are clobbered by native calls.
+
+We want to do stack allocation so that as far as possible
+ - stack use is minimized, and
+ - unnecessary stack saves and loads are avoided.
+
+The algorithm we use is a variant of linear-scan register allocation,
+where the stack is our register file.
+
+We proceed in two passes, see Note [Two pass approach] for why they are not easy
+to merge into one.
+
+Pass 1:
+
+ - First, we do a liveness analysis, which annotates every block with
+   the variables live on entry to the block.
+
+ - We traverse blocks in reverse postorder DFS; that is, we visit at
+   least one predecessor of a block before the block itself.  The
+   stack layout flowing from the predecessor of the block will
+   determine the stack layout on entry to the block.
+
+ - We maintain a data structure
+
+     Map Label StackMap
+
+   which describes the contents of the stack and the stack pointer on
+   entry to each block that is a successor of a block that we have
+   visited.
+
+ - For each block we visit:
+
+    - Look up the StackMap for this block.
+
+    - If this block is a proc point (or a call continuation, if we aren't
+      splitting proc points), we need to reload all the live variables from the
+      stack - but this is done in Pass 2, which calculates more precise liveness
+      information (see description of Pass 2).
+
+    - Walk forwards through the instructions:
+      - At an assignment  x = Sp[loc]
+        - Record the fact that Sp[loc] contains x, so that we won't
+          need to save x if it ever needs to be spilled.
+      - At an assignment  x = E
+        - If x was previously on the stack, it isn't any more
+      - At the last node, if it is a call or a jump to a proc point
+        - Lay out the stack frame for the call (see setupStackFrame)
+        - emit instructions to save all the live variables
+        - Remember the StackMaps for all the successors
+        - emit an instruction to adjust Sp
+      - If the last node is a branch, then the current StackMap is the
+        StackMap for the successors.
+
+    - Manifest Sp: replace references to stack areas in this block
+      with real Sp offsets. We cannot do this until we have laid out
+      the stack area for the successors above.
+
+      In this phase we also eliminate redundant stores to the stack;
+      see elimStackStores.
+
+  - There is one important gotcha: sometimes we'll encounter a control
+    transfer to a block that we've already processed (a join point),
+    and in that case we might need to rearrange the stack to match
+    what the block is expecting. (exactly the same as in linear-scan
+    register allocation, except here we have the luxury of an infinite
+    supply of temporary variables).
+
+  - Finally, we update the magic CmmHighStackMark constant with the
+    stack usage of the function, and eliminate the whole stack check
+    if there was no stack use. (in fact this is done as part of the
+    main traversal, by feeding the high-water-mark output back in as
+    an input. I hate cyclic programming, but it's just too convenient
+    sometimes.)
+
+  There are plenty of tricky details: update frames, proc points, return
+  addresses, foreign calls, and some ad-hoc optimisations that are
+  convenient to do here and effective in common cases.  Comments in the
+  code below explain these.
+
+Pass 2:
+
+- Calculate live registers, but taking into account that nothing is live at the
+  entry to a proc point.
+
+- At each proc point and call continuation insert reloads of live registers from
+  the stack (they were saved by Pass 1).
+
+
+Note [Two pass approach]
+
+The main reason for Pass 2 is being able to insert only the reloads that are
+needed and the fact that the two passes need different liveness information.
+Let's consider an example:
+
+  .....
+   \ /
+    D   <- proc point
+   / \
+  E   F
+   \ /
+    G   <- proc point
+    |
+    X
+
+Pass 1 needs liveness assuming that local variables are preserved across calls.
+This is important because it needs to save any local registers to the stack
+(e.g., if register a is used in block X, it must be saved before any native
+call).
+However, for Pass 2, where we want to reload registers from stack (in a proc
+point), this is overly conservative and would lead us to generate reloads in D
+for things used in X, even though we're going to generate reloads in G anyway
+(since it's also a proc point).
+So Pass 2 calculates liveness knowing that nothing is live at the entry to a
+proc point. This means that in D we only need to reload things used in E or F.
+This can be quite important, for an extreme example see testcase for #3294.
+
+Merging the two passes is not trivial - Pass 2 is a backward rewrite and Pass 1
+is a forward one. Furthermore, Pass 1 is creating code that uses local registers
+(saving them before a call), which the liveness analysis for Pass 2 must see to
+be correct.
+
+-}
+
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+{-
+ A StackMap describes the stack at any given point.  At a continuation
+ it has a particular layout, like this:
+
+         |             | <- base
+         |-------------|
+         |     ret0    | <- base + 8
+         |-------------|
+         .  upd frame  . <- base + sm_ret_off
+         |-------------|
+         |             |
+         .    vars     .
+         . (live/dead) .
+         |             | <- base + sm_sp - sm_args
+         |-------------|
+         |    ret1     |
+         .  ret vals   . <- base + sm_sp    (<--- Sp points here)
+         |-------------|
+
+Why do we include the final return address (ret0) in our stack map?  I
+have absolutely no idea, but it seems to be done that way consistently
+in the rest of the code generator, so I played along here. --SDM
+
+Note that we will be constructing an info table for the continuation
+(ret1), which needs to describe the stack down to, but not including,
+the update frame (or ret0, if there is no update frame).
+-}
+
+data StackMap = StackMap
+ {  sm_sp   :: StackLoc
+       -- ^ the offset of Sp relative to the base on entry
+       -- to this block.
+ ,  sm_args :: ByteOff
+       -- ^ the number of bytes of arguments in the area for this block
+       -- Defn: the offset of young(L) relative to the base is given by
+       -- (sm_sp - sm_args) of the StackMap for block L.
+ ,  sm_ret_off :: ByteOff
+       -- ^ Number of words of stack that we do not describe with an info
+       -- table, because it contains an update frame.
+ ,  sm_regs :: UniqFM (LocalReg,StackLoc)
+       -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+  ppr StackMap{..} =
+     text "Sp = " <> int sm_sp $$
+     text "sm_args = " <> int sm_args $$
+     text "sm_ret_off = " <> int sm_ret_off $$
+     text "sm_regs = " <> pprUFM sm_regs ppr
+
+
+cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
+               -> UniqSM (CmmGraph, LabelMap StackMap)
+cmmLayoutStack dflags procpoints entry_args
+               graph@(CmmGraph { g_entry = entry })
+  = do
+    -- We need liveness info. Dead assignments are removed later
+    -- by the sinking pass.
+    let liveness = cmmLocalLiveness dflags graph
+        blocks = revPostorder graph
+
+    (final_stackmaps, _final_high_sp, new_blocks) <-
+          mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+            layout dflags procpoints liveness entry entry_args
+                   rec_stackmaps rec_high_sp blocks
+
+    blocks_with_reloads <-
+        insertReloadsAsNeeded dflags procpoints final_stackmaps entry new_blocks
+    new_blocks' <- mapM (lowerSafeForeignCall dflags) blocks_with_reloads
+    return (ofBlockList entry new_blocks', final_stackmaps)
+
+-- -----------------------------------------------------------------------------
+-- Pass 1
+-- -----------------------------------------------------------------------------
+
+layout :: DynFlags
+       -> LabelSet                      -- proc points
+       -> LabelMap CmmLocalLive         -- liveness
+       -> BlockId                       -- entry
+       -> ByteOff                       -- stack args on entry
+
+       -> LabelMap StackMap             -- [final] stack maps
+       -> ByteOff                       -- [final] Sp high water mark
+
+       -> [CmmBlock]                    -- [in] blocks
+
+       -> UniqSM
+          ( LabelMap StackMap           -- [out] stack maps
+          , ByteOff                     -- [out] Sp high water mark
+          , [CmmBlock]                  -- [out] new blocks
+          )
+
+layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks
+  = go blocks init_stackmap entry_args []
+  where
+    (updfr, cont_info)  = collectContInfo blocks
+
+    init_stackmap = mapSingleton entry StackMap{ sm_sp   = entry_args
+                                               , sm_args = entry_args
+                                               , sm_ret_off = updfr
+                                               , sm_regs = emptyUFM
+                                               }
+
+    go [] acc_stackmaps acc_hwm acc_blocks
+      = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+    go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+      = do
+       let (entry0@(CmmEntry entry_lbl tscope), middle0, last0) = blockSplit b0
+
+       let stack0@StackMap { sm_sp = sp0 }
+               = mapFindWithDefault
+                     (pprPanic "no stack map for" (ppr entry_lbl))
+                     entry_lbl acc_stackmaps
+
+       -- (a) Update the stack map to include the effects of
+       --     assignments in this block
+       let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+       -- (b) Look at the last node and if we are making a call or
+       --     jumping to a proc point, we must save the live
+       --     variables, adjust Sp, and construct the StackMaps for
+       --     each of the successor blocks.  See handleLastNode for
+       --     details.
+       (middle1, sp_off, last1, fixup_blocks, out)
+           <- handleLastNode dflags procpoints liveness cont_info
+                             acc_stackmaps stack1 tscope middle0 last0
+
+       -- (c) Manifest Sp: run over the nodes in the block and replace
+       --     CmmStackSlot with CmmLoad from Sp with a concrete offset.
+       --
+       -- our block:
+       --    middle0          -- the original middle nodes
+       --    middle1          -- live variable saves from handleLastNode
+       --    Sp = Sp + sp_off -- Sp adjustment goes here
+       --    last1            -- the last node
+       --
+       let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1
+
+       let final_blocks =
+               manifestSp dflags final_stackmaps stack0 sp0 final_sp_high
+                          entry0 middle_pre sp_off last1 fixup_blocks
+
+       let acc_stackmaps' = mapUnion acc_stackmaps out
+
+           -- If this block jumps to the GC, then we do not take its
+           -- stack usage into account for the high-water mark.
+           -- Otherwise, if the only stack usage is in the stack-check
+           -- failure block itself, we will do a redundant stack
+           -- check.  The stack has a buffer designed to accommodate
+           -- the largest amount of stack needed for calling the GC.
+           --
+           this_sp_hwm | isGcJump last0 = 0
+                       | otherwise      = sp0 - sp_off
+
+           hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
+
+       go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
+
+
+-- -----------------------------------------------------------------------------
+
+-- Not foolproof, but GCFun is the culprit we most want to catch
+isGcJump :: CmmNode O C -> Bool
+isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
+  = l == GCFun || l == GCEnter1
+isGcJump _something_else = False
+
+-- -----------------------------------------------------------------------------
+
+-- This doesn't seem right somehow.  We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling.  The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs.  This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, LabelMap ByteOff)
+collectContInfo blocks
+  = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+  (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+  get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff)
+  get_cont b =
+     case lastNode b of
+        CmmCall { cml_cont = Just l, .. }
+           -> (Just (l, cml_ret_args), cml_ret_off)
+        CmmForeignCall { .. }
+           -> (Just (succ, ret_args), ret_off)
+        _other -> (Nothing, 0)
+
+
+-- -----------------------------------------------------------------------------
+-- Updating the StackMap from middle nodes
+
+-- Look for loads from stack slots, and update the StackMap.  This is
+-- purely for optimisation reasons, so that we can avoid saving a
+-- variable back to a different stack slot if it is already on the
+-- stack.
+--
+-- This happens a lot: for example when function arguments are passed
+-- on the stack and need to be immediately saved across a call, we
+-- want to just leave them where they are on the stack.
+--
+procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+  = case node of
+     CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+       -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+        where loc = getStackLoc area off stackmaps
+     CmmAssign (CmmLocal r) _other
+       -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+     _other
+       -> sm
+
+getStackLoc :: Area -> ByteOff -> LabelMap StackMap -> StackLoc
+getStackLoc Old       n _         = n
+getStackLoc (Young l) n stackmaps =
+  case mapLookup l stackmaps of
+    Nothing -> pprPanic "getStackLoc" (ppr l)
+    Just sm -> sm_sp sm - sm_args sm + n
+
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+-- We take a single last node and turn it into:
+--
+--    C1 (some statements)
+--    Sp = Sp + N
+--    C2 (some more statements)
+--    call f()          -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
+handleLastNode
+   :: DynFlags -> ProcPointSet -> LabelMap CmmLocalLive -> LabelMap ByteOff
+   -> LabelMap StackMap -> StackMap -> CmmTickScope
+   -> Block CmmNode O O
+   -> CmmNode O C
+   -> UniqSM
+      ( [CmmNode O O]      -- nodes to go *before* the Sp adjustment
+      , ByteOff            -- amount to adjust Sp
+      , CmmNode O C        -- new last node
+      , [CmmBlock]         -- new blocks
+      , LabelMap StackMap  -- stackmaps for the continuations
+      )
+
+handleLastNode dflags procpoints liveness cont_info stackmaps
+               stack0@StackMap { sm_sp = sp0 } tscp middle last
+ = case last of
+    --  At each return / tail call,
+    --  adjust Sp to point to the last argument pushed, which
+    --  is cml_args, after popping any other junk from the stack.
+    CmmCall{ cml_cont = Nothing, .. } -> do
+      let sp_off = sp0 - cml_args
+      return ([], sp_off, last, [], mapEmpty)
+
+    --  At each CmmCall with a continuation:
+    CmmCall{ cml_cont = Just cont_lbl, .. } ->
+       return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+
+    CmmForeignCall{ succ = cont_lbl, .. } -> do
+       return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off
+            -- one word of args: the return address
+
+    CmmBranch {}     ->  handleBranches
+    CmmCondBranch {} ->  handleBranches
+    CmmSwitch {}     ->  handleBranches
+
+  where
+     -- Calls and ForeignCalls are handled the same way:
+     lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+              -> ( [CmmNode O O]
+                 , ByteOff
+                 , CmmNode O C
+                 , [CmmBlock]
+                 , LabelMap StackMap
+                 )
+     lastCall lbl cml_args cml_ret_args cml_ret_off
+      =  ( assignments
+         , spOffsetForCall sp0 cont_stack cml_args
+         , last
+         , [] -- no new blocks
+         , mapSingleton lbl cont_stack )
+      where
+         (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
+
+
+     prepareStack lbl cml_ret_args cml_ret_off
+       | Just cont_stack <- mapLookup lbl stackmaps
+             -- If we have already seen this continuation before, then
+             -- we just have to make the stack look the same:
+       = (fixupStack stack0 cont_stack, cont_stack)
+             -- Otherwise, we have to allocate the stack frame
+       | otherwise
+       = (save_assignments, new_cont_stack)
+       where
+        (new_cont_stack, save_assignments)
+           = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
+
+
+     -- For other last nodes (branches), if any of the targets is a
+     -- proc point, we have to set up the stack to match what the proc
+     -- point is expecting.
+     --
+     handleBranches :: UniqSM ( [CmmNode O O]
+                                , ByteOff
+                                , CmmNode O C
+                                , [CmmBlock]
+                                , LabelMap StackMap )
+
+     handleBranches
+         -- Note [diamond proc point]
+       | Just l <- futureContinuation middle
+       , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
+       = do
+         let cont_args = mapFindWithDefault 0 l cont_info
+             (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
+             out = mapFromList [ (l', cont_stack)
+                               | l' <- successors last ]
+         return ( assigs
+                , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
+                , last
+                , []
+                , out)
+
+        | otherwise = do
+          pps <- mapM handleBranch (successors last)
+          let lbl_map :: LabelMap Label
+              lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+              fix_lbl l = mapFindWithDefault l l lbl_map
+          return ( []
+                 , 0
+                 , mapSuccessors fix_lbl last
+                 , concat [ blk | (_,_,_,blk) <- pps ]
+                 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
+
+     -- For each successor of this block
+     handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+     handleBranch l
+        --   (a) if the successor already has a stackmap, we need to
+        --       shuffle the current stack to make it look the same.
+        --       We have to insert a new block to make this happen.
+        | Just stack2 <- mapLookup l stackmaps
+        = do
+             let assigs = fixupStack stack0 stack2
+             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
+             return (l, tmp_lbl, stack2, block)
+
+        --   (b) if the successor is a proc point, save everything
+        --       on the stack.
+        | l `setMember` procpoints
+        = do
+             let cont_args = mapFindWithDefault 0 l cont_info
+                 (stack2, assigs) =
+                      setupStackFrame dflags l liveness (sm_ret_off stack0)
+                                                        cont_args stack0
+             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
+             return (l, tmp_lbl, stack2, block)
+
+        --   (c) otherwise, the current StackMap is the StackMap for
+        --       the continuation.  But we must remember to remove any
+        --       variables from the StackMap that are *not* live at
+        --       the destination, because this StackMap might be used
+        --       by fixupStack if this is a join point.
+        | otherwise = return (l, l, stack1, [])
+        where live = mapFindWithDefault (panic "handleBranch") l liveness
+              stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
+              is_live (r,_) = r `elemRegSet` live
+
+
+makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap
+               -> CmmTickScope -> [CmmNode O O]
+               -> UniqSM (Label, [CmmBlock])
+makeFixupBlock dflags sp0 l stack tscope assigs
+  | null assigs && sp0 == sm_sp stack = return (l, [])
+  | otherwise = do
+    tmp_lbl <- newBlockId
+    let sp_off = sp0 - sm_sp stack
+        block = blockJoin (CmmEntry tmp_lbl tscope)
+                          ( maybeAddSpAdj dflags sp0 sp_off
+                           $ blockFromList assigs )
+                          (CmmBranch l)
+    return (tmp_lbl, [block])
+
+
+-- Sp is currently pointing to current_sp,
+-- we want it to point to
+--    (sm_sp cont_stack - sm_args cont_stack + args)
+-- so the difference is
+--    sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
+spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
+spOffsetForCall current_sp cont_stack args
+  = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+     old_map  = sm_regs old_stack
+     new_locs = stackSlotRegs new_stack
+
+     move (r,n)
+       | Just (_,m) <- lookupUFM old_map r, n == m = []
+       | otherwise = [CmmStore (CmmStackSlot Old n)
+                               (CmmReg (CmmLocal r))]
+
+
+
+setupStackFrame
+             :: DynFlags
+             -> BlockId                 -- label of continuation
+             -> LabelMap CmmLocalLive   -- liveness
+             -> ByteOff      -- updfr
+             -> ByteOff      -- bytes of return values on stack
+             -> StackMap     -- current StackMap
+             -> (StackMap, [CmmNode O O])
+
+setupStackFrame dflags lbl liveness updfr_off ret_args stack0
+  = (cont_stack, assignments)
+  where
+      -- get the set of LocalRegs live in the continuation
+      live = mapFindWithDefault Set.empty lbl liveness
+
+      -- the stack from the base to updfr_off is off-limits.
+      -- our new stack frame contains:
+      --   * saved live variables
+      --   * the return address [young(C) + 8]
+      --   * the args for the call,
+      --     which are replaced by the return values at the return
+      --     point.
+
+      -- everything up to updfr_off is off-limits
+      -- stack1 contains updfr_off, plus everything we need to save
+      (stack1, assignments) = allocate dflags updfr_off live stack0
+
+      -- And the Sp at the continuation is:
+      --   sm_sp stack1 + ret_args
+      cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
+                         , sm_args = ret_args
+                         , sm_ret_off = updfr_off
+                         }
+
+
+-- -----------------------------------------------------------------------------
+-- Note [diamond proc point]
+--
+-- This special case looks for the pattern we get from a typical
+-- tagged case expression:
+--
+--    Sp[young(L1)] = L1
+--    if (R1 & 7) != 0 goto L1 else goto L2
+--  L2:
+--    call [R1] returns to L1
+--  L1: live: {y}
+--    x = R1
+--
+-- If we let the generic case handle this, we get
+--
+--    Sp[-16] = L1
+--    if (R1 & 7) != 0 goto L1a else goto L2
+--  L2:
+--    Sp[-8] = y
+--    Sp = Sp - 16
+--    call [R1] returns to L1
+--  L1a:
+--    Sp[-8] = y
+--    Sp = Sp - 16
+--    goto L1
+--  L1:
+--    x = R1
+--
+-- The code for saving the live vars is duplicated in each branch, and
+-- furthermore there is an extra jump in the fast path (assuming L1 is
+-- a proc point, which it probably is if there is a heap check).
+--
+-- So to fix this we want to set up the stack frame before the
+-- conditional jump.  How do we know when to do this, and when it is
+-- safe?  The basic idea is, when we see the assignment
+--
+--   Sp[young(L)] = L
+--
+-- we know that
+--   * we are definitely heading for L
+--   * there can be no more reads from another stack area, because young(L)
+--     overlaps with it.
+--
+-- We don't necessarily know that everything live at L is live now
+-- (some might be assigned between here and the jump to L).  So we
+-- simplify and only do the optimisation when we see
+--
+--   (1) a block containing an assignment of a return address L
+--   (2) ending in a branch where one (and only) continuation goes to L,
+--       and no other continuations go to proc points.
+--
+-- then we allocate the stack frame for L at the end of the block,
+-- before the branch.
+--
+-- We could generalise (2), but that would make it a bit more
+-- complicated to handle, and this currently catches the common case.
+
+futureContinuation :: Block CmmNode O O -> Maybe BlockId
+futureContinuation middle = foldBlockNodesB f middle Nothing
+   where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
+         f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+               = Just l
+         f _ r = r
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap
+         -> (StackMap, [CmmNode O O])
+allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
+                                              , sm_regs = regs0 }
+ =
+   -- we only have to save regs that are not already in a slot
+   let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+       regs1   = filterUFM (\(r,_) -> elemRegSet r live) regs0
+   in
+
+   -- make a map of the stack
+   let stack = reverse $ Array.elems $
+               accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
+                 ret_words ++ live_words
+            where ret_words =
+                   [ (x, Occupied)
+                   | x <- [ 1 .. toWords dflags ret_off] ]
+                  live_words =
+                   [ (toWords dflags x, Occupied)
+                   | (r,off) <- nonDetEltsUFM regs1,
+                   -- See Note [Unique Determinism and code generation]
+                     let w = localRegBytes dflags r,
+                     x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
+   in
+
+   -- Pass over the stack: find slots to save all the new live variables,
+   -- choosing the oldest slots first (hence a foldr).
+   let
+       save slot ([], stack, n, assigs, regs) -- no more regs to save
+          = ([], slot:stack, plusW dflags n 1, assigs, regs)
+       save slot (to_save, stack, n, assigs, regs)
+          = case slot of
+               Occupied ->  (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
+               Empty
+                 | Just (stack', r, to_save') <-
+                       select_save to_save (slot:stack)
+                 -> let assig = CmmStore (CmmStackSlot Old n')
+                                         (CmmReg (CmmLocal r))
+                        n' = plusW dflags n 1
+                   in
+                        (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+                 | otherwise
+                 -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
+
+       -- we should do better here: right now we'll fit the smallest first,
+       -- but it would make more sense to fit the biggest first.
+       select_save :: [LocalReg] -> [StackSlot]
+                   -> Maybe ([StackSlot], LocalReg, [LocalReg])
+       select_save regs stack = go regs []
+         where go []     _no_fit = Nothing
+               go (r:rs) no_fit
+                 | Just rest <- dropEmpty words stack
+                 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+                 | otherwise
+                 = go rs (r:no_fit)
+                 where words = localRegWords dflags r
+
+       -- fill in empty slots as much as possible
+       (still_to_save, save_stack, n, save_assigs, save_regs)
+          = foldr save (to_save, [], 0, [], []) stack
+
+       -- push any remaining live vars on the stack
+       (push_sp, push_assigs, push_regs)
+          = foldr push (n, [], []) still_to_save
+          where
+              push r (n, assigs, regs)
+                = (n', assig : assigs, (r,(r,n')) : regs)
+                where
+                  n' = n + localRegBytes dflags r
+                  assig = CmmStore (CmmStackSlot Old n')
+                                   (CmmReg (CmmLocal r))
+
+       trim_sp
+          | not (null push_regs) = push_sp
+          | otherwise
+          = plusW dflags n (- length (takeWhile isEmpty save_stack))
+
+       final_regs = regs1 `addListToUFM` push_regs
+                          `addListToUFM` save_regs
+
+   in
+  -- XXX should be an assert
+   if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+   if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0  then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
+
+   ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+   , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+-- Manifesting Sp
+
+-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp.  The
+-- block looks like this:
+--
+--    middle_pre       -- the middle nodes
+--    Sp = Sp + sp_off -- Sp adjustment goes here
+--    last             -- the last node
+--
+-- And we have some extra blocks too (that don't contain Sp adjustments)
+--
+-- The adjustment for middle_pre will be different from that for
+-- middle_post, because the Sp adjustment intervenes.
+--
+manifestSp
+   :: DynFlags
+   -> LabelMap StackMap  -- StackMaps for other blocks
+   -> StackMap           -- StackMap for this block
+   -> ByteOff            -- Sp on entry to the block
+   -> ByteOff            -- SpHigh
+   -> CmmNode C O        -- first node
+   -> [CmmNode O O]      -- middle
+   -> ByteOff            -- sp_off
+   -> CmmNode O C        -- last node
+   -> [CmmBlock]         -- new blocks
+   -> [CmmBlock]         -- final blocks with Sp manifest
+
+manifestSp dflags stackmaps stack0 sp0 sp_high
+           first middle_pre sp_off last fixup_blocks
+  = final_block : fixup_blocks'
+  where
+    area_off = getAreaOff stackmaps
+
+    adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+    adj_pre_sp  = mapExpDeep (areaToSp dflags sp0            sp_high area_off)
+    adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
+
+    final_middle = maybeAddSpAdj dflags sp0 sp_off
+                 . blockFromList
+                 . map adj_pre_sp
+                 . elimStackStores stack0 stackmaps area_off
+                 $ middle_pre
+    final_last    = optStackCheck (adj_post_sp last)
+
+    final_block   = blockJoin first final_middle final_last
+
+    fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
+
+getAreaOff :: LabelMap StackMap -> (Area -> StackLoc)
+getAreaOff _ Old = 0
+getAreaOff stackmaps (Young l) =
+  case mapLookup l stackmaps of
+    Just sm -> sm_sp sm - sm_args sm
+    Nothing -> pprPanic "getAreaOff" (ppr l)
+
+
+maybeAddSpAdj
+  :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj dflags sp0 sp_off block =
+  add_initial_unwind $ add_adj_unwind $ adj block
+  where
+    adj block
+      | sp_off /= 0
+      = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off)
+      | otherwise = block
+    -- Add unwind pseudo-instruction at the beginning of each block to
+    -- document Sp level for debugging
+    add_initial_unwind block
+      | debugLevel dflags > 0
+      = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block
+      | otherwise
+      = block
+      where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags)
+
+    -- Add unwind pseudo-instruction right after the Sp adjustment
+    -- if there is one.
+    add_adj_unwind block
+      | debugLevel dflags > 0
+      , sp_off /= 0
+      = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)]
+      | otherwise
+      = block
+      where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off)
+
+{- Note [SP old/young offsets]
+
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to [old + N] turns into
+   [Sp + Sp(L) - N]
+
+ - in block L, each reference to [young(L') + N] turns into
+   [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+   to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
+
+areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n)
+  = cmmOffset dflags spExpr (sp_old - area_off area - n)
+    -- Replace (CmmStackSlot area n) with an offset from Sp
+
+areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark)
+  = mkIntExpr dflags sp_hwm
+    -- Replace CmmHighStackMark with the number of bytes of stack used,
+    -- the sp_hwm.   See Note [Stack usage] in GHC.StgToCmm.Heap
+
+areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) args)
+  | falseStackCheck args
+  = zeroExpr dflags
+areaToSp dflags _ _ _ (CmmMachOp (MO_U_Ge _) args)
+  | falseStackCheck args
+  = mkIntExpr dflags 1
+    -- Replace a stack-overflow test that cannot fail with a no-op
+    -- See Note [Always false stack check]
+
+areaToSp _ _ _ _ other = other
+
+-- | Determine whether a stack check cannot fail.
+falseStackCheck :: [CmmExpr] -> Bool
+falseStackCheck [ CmmMachOp (MO_Sub _)
+                      [ CmmRegOff (CmmGlobal Sp) x_off
+                      , CmmLit (CmmInt y_lit _)]
+                , CmmReg (CmmGlobal SpLim)]
+  = fromIntegral x_off >= y_lit
+falseStackCheck _ = False
+
+-- Note [Always false stack check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- We can optimise stack checks of the form
+--
+--   if ((Sp + x) - y < SpLim) then .. else ..
+--
+-- where are non-negative integer byte offsets.  Since we know that
+-- SpLim <= Sp (remember the stack grows downwards), this test must
+-- yield False if (x >= y), so we can rewrite the comparison to False.
+-- A subsequent sinking pass will later drop the dead code.
+-- Optimising this away depends on knowing that SpLim <= Sp, so it is
+-- really the job of the stack layout algorithm, hence we do it now.
+--
+-- The control flow optimiser may negate a conditional to increase
+-- the likelihood of a fallthrough if the branch is not taken.  But
+-- not every conditional is inverted as the control flow optimiser
+-- places some requirements on the predecessors of both branch targets.
+-- So we better look for the inverted comparison too.
+
+optStackCheck :: CmmNode O C -> CmmNode O C
+optStackCheck n = -- Note [Always false stack check]
+ case n of
+   CmmCondBranch (CmmLit (CmmInt 0 _)) _true false _ -> CmmBranch false
+   CmmCondBranch (CmmLit (CmmInt _ _)) true _false _ -> CmmBranch true
+   other -> other
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Eliminate stores of the form
+--
+--    Sp[area+n] = r
+--
+-- when we know that r is already in the same slot as Sp[area+n].  We
+-- could do this in a later optimisation pass, but that would involve
+-- a separate analysis and we already have the information to hand
+-- here.  It helps clean up some extra stack stores in common cases.
+--
+-- Note that we may have to modify the StackMap as we walk through the
+-- code using procMiddle, since an assignment to a variable in the
+-- StackMap will invalidate its mapping there.
+--
+elimStackStores :: StackMap
+                -> LabelMap StackMap
+                -> (Area -> ByteOff)
+                -> [CmmNode O O]
+                -> [CmmNode O O]
+elimStackStores stackmap stackmaps area_off nodes
+  = go stackmap nodes
+  where
+    go _stackmap [] = []
+    go stackmap (n:ns)
+     = case n of
+         CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+            | Just (_,off) <- lookupUFM (sm_regs stackmap) r
+            , area_off area + m == off
+            -> go stackmap ns
+         _otherwise
+            -> n : go (procMiddle stackmaps n stackmap) ns
+
+
+-- -----------------------------------------------------------------------------
+-- Update info tables to include stack liveness
+
+
+setInfoTableStackMap :: DynFlags -> LabelMap StackMap -> CmmDecl -> CmmDecl
+setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g)
+  = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g
+  where
+    fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
+       info_tbl { cit_rep = StackRep (get_liveness lbl) }
+    fix_info _ other = other
+
+    get_liveness :: BlockId -> Liveness
+    get_liveness lbl
+      = case mapLookup lbl stackmaps of
+          Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
+          Just sm -> stackMapToLiveness dflags sm
+
+setInfoTableStackMap _ _ d = d
+
+
+stackMapToLiveness :: DynFlags -> StackMap -> Liveness
+stackMapToLiveness dflags StackMap{..} =
+   reverse $ Array.elems $
+        accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
+                                     toWords dflags (sm_sp - sm_args)) live_words
+   where
+     live_words =  [ (toWords dflags off, False)
+                   | (r,off) <- nonDetEltsUFM sm_regs
+                   , isGcPtrType (localRegType r) ]
+                   -- See Note [Unique Determinism and code generation]
+
+-- -----------------------------------------------------------------------------
+-- Pass 2
+-- -----------------------------------------------------------------------------
+
+insertReloadsAsNeeded
+    :: DynFlags
+    -> ProcPointSet
+    -> LabelMap StackMap
+    -> BlockId
+    -> [CmmBlock]
+    -> UniqSM [CmmBlock]
+insertReloadsAsNeeded dflags procpoints final_stackmaps entry blocks = do
+    toBlockList . fst <$>
+        rewriteCmmBwd liveLattice rewriteCC (ofBlockList entry blocks) mapEmpty
+  where
+    rewriteCC :: RewriteFun CmmLocalLive
+    rewriteCC (BlockCC e_node middle0 x_node) fact_base0 = do
+        let entry_label = entryLabel e_node
+            stackmap = case mapLookup entry_label final_stackmaps of
+                Just sm -> sm
+                Nothing -> panic "insertReloadsAsNeeded: rewriteCC: stackmap"
+
+            -- Merge the liveness from successor blocks and analyse the last
+            -- node.
+            joined = gen_kill dflags x_node $!
+                         joinOutFacts liveLattice x_node fact_base0
+            -- What is live at the start of middle0.
+            live_at_middle0 = foldNodesBwdOO (gen_kill dflags) middle0 joined
+
+            -- If this is a procpoint we need to add the reloads, but only if
+            -- they're actually live. Furthermore, nothing is live at the entry
+            -- to a proc point.
+            (middle1, live_with_reloads)
+                | entry_label `setMember` procpoints
+                = let reloads = insertReloads dflags stackmap live_at_middle0
+                  in (foldr blockCons middle0 reloads, emptyRegSet)
+                | otherwise
+                = (middle0, live_at_middle0)
+
+            -- Final liveness for this block.
+            !fact_base2 = mapSingleton entry_label live_with_reloads
+
+        return (BlockCC e_node middle1 x_node, fact_base2)
+
+insertReloads :: DynFlags -> StackMap -> CmmLocalLive -> [CmmNode O O]
+insertReloads dflags stackmap live =
+     [ CmmAssign (CmmLocal reg)
+                 -- This cmmOffset basically corresponds to manifesting
+                 -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets]
+                 (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off))
+                          (localRegType reg))
+     | (reg, reg_off) <- stackSlotRegs stackmap
+     , reg `elemRegSet` live
+     ]
+   where
+     sp_off = sm_sp stackmap
+
+-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [Lower safe foreign calls]
+
+We start with
+
+   Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+   R1 = r1 -- copyIn, inserted by mkSafeCall
+   ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call.  Our job now is to expand the call so we get
+
+   Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r  -- copyOut
+ | jump Sp[0]
+ '-----------------------
+ L1:
+   r = R1 -- copyIn, inserted by mkSafeCall
+   ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note [safe foreign call convention]). Note also
+that safe foreign call is replace by an unsafe one in the Cmm graph.
+-}
+
+lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
+lowerSafeForeignCall dflags block
+  | (entry@(CmmEntry _ tscp), middle, CmmForeignCall { .. }) <- blockSplit block
+  = do
+    -- Both 'id' and 'new_base' are KindNonPtr because they're
+    -- RTS-only objects and are not subject to garbage collection
+    id <- newTemp (bWord dflags)
+    new_base <- newTemp (cmmRegType dflags baseReg)
+    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
+    save_state_code <- saveThreadState dflags
+    load_state_code <- loadThreadState dflags
+    let suspend = save_state_code  <*>
+                  caller_save <*>
+                  mkMiddle (callSuspendThread dflags id intrbl)
+        midCall = mkUnsafeCall tgt res args
+        resume  = mkMiddle (callResumeThread new_base id) <*>
+                  -- Assign the result to BaseReg: we
+                  -- might now have a different Capability!
+                  mkAssign baseReg (CmmReg (CmmLocal new_base)) <*>
+                  caller_load <*>
+                  load_state_code
+
+        (_, regs, copyout) =
+             copyOutOflow dflags NativeReturn Jump (Young succ)
+                            (map (CmmReg . CmmLocal) res)
+                            ret_off []
+
+        -- NB. after resumeThread returns, the top-of-stack probably contains
+        -- the stack frame for succ, but it might not: if the current thread
+        -- received an exception during the call, then the stack might be
+        -- different.  Hence we continue by jumping to the top stack frame,
+        -- not by jumping to succ.
+        jump = CmmCall { cml_target    = entryCode dflags $
+                                         CmmLoad spExpr (bWord dflags)
+                       , cml_cont      = Just succ
+                       , cml_args_regs = regs
+                       , cml_args      = widthInBytes (wordWidth dflags)
+                       , cml_ret_args  = ret_args
+                       , cml_ret_off   = ret_off }
+
+    graph' <- lgraphOfAGraph ( suspend <*>
+                               midCall <*>
+                               resume  <*>
+                               copyout <*>
+                               mkLast jump, tscp)
+
+    case toBlockList graph' of
+      [one] -> let (_, middle', last) = blockSplit one
+               in return (blockJoin entry (middle `blockAppend` middle') last)
+      _ -> panic "lowerSafeForeignCall0"
+
+  -- Block doesn't end in a safe foreign call:
+  | otherwise = return block
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction))
+
+callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
+callSuspendThread dflags id intrbl =
+  CmmUnsafeForeignCall
+       (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+        (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
+       [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+  CmmUnsafeForeignCall
+       (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+            (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
+       [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
+
+plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
+plusW dflags b w = b + w * wORD_SIZE dflags
+
+data StackSlot = Occupied | Empty
+     -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+  ppr Occupied = text "XXX"
+  ppr Empty    = text "---"
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss           = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty _ _            = Nothing
+
+isEmpty :: StackSlot -> Bool
+isEmpty Empty = True
+isEmpty _ = False
+
+localRegBytes :: DynFlags -> LocalReg -> ByteOff
+localRegBytes dflags r
+    = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
+
+localRegWords :: DynFlags -> LocalReg -> WordOff
+localRegWords dflags = toWords dflags . localRegBytes dflags
+
+toWords :: DynFlags -> ByteOff -> WordOff
+toWords dflags x = x `quot` wORD_SIZE dflags
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = nonDetEltsUFM (sm_regs sm)
+  -- See Note [Unique Determinism and code generation]
diff --git a/compiler/GHC/Cmm/Lexer.x b/compiler/GHC/Cmm/Lexer.x
new file mode 100644
index 0000000000..d8f15b916c
--- /dev/null
+++ b/compiler/GHC/Cmm/Lexer.x
@@ -0,0 +1,368 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 2004-2006
+--
+-- Lexer for concrete Cmm.  We try to stay close to the C-- spec, but there
+-- are a few minor differences:
+--
+--   * extra keywords for our macros, and float32/float64 types
+--   * global registers (Sp,Hp, etc.)
+--
+-----------------------------------------------------------------------------
+
+{
+module GHC.Cmm.Lexer (
+   CmmToken(..), cmmlex,
+  ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+
+import Lexer
+import GHC.Cmm.Monad
+import SrcLoc
+import UniqFM
+import StringBuffer
+import FastString
+import Ctype
+import Util
+--import TRACE
+
+import Data.Word
+import Data.Char
+}
+
+$whitechar   = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
+$white_no_nl = $whitechar # \n
+
+$ascdigit  = 0-9
+$unidigit  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
+$digit     = [$ascdigit $unidigit]
+$octit     = 0-7
+$hexit     = [$digit A-F a-f]
+
+$unilarge  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
+$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
+$large     = [$asclarge $unilarge]
+
+$unismall  = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
+$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
+$small     = [$ascsmall $unismall \_]
+
+$namebegin = [$large $small \. \$ \@]
+$namechar  = [$namebegin $digit]
+
+@decimal     = $digit+
+@octal       = $octit+
+@hexadecimal = $hexit+
+@exponent    = [eE] [\-\+]? @decimal
+
+@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
+
+@escape      = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
+@strchar     = ($printable # [\"\\]) | @escape
+
+cmm :-
+
+$white_no_nl+           ;
+^\# pragma .* \n        ; -- Apple GCC 3.3 CPP generates pragmas in its output
+
+^\# (line)?             { begin line_prag }
+
+-- single-line line pragmas, of the form
+--    # <line> "<file>" <extra-stuff> \n
+<line_prag> $digit+                     { setLine line_prag1 }
+<line_prag1> \" [^\"]* \"       { setFile line_prag2 }
+<line_prag2> .*                         { pop }
+
+<0> {
+  \n                    ;
+
+  [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!]      { special_char }
+
+  ".."                  { kw CmmT_DotDot }
+  "::"                  { kw CmmT_DoubleColon }
+  ">>"                  { kw CmmT_Shr }
+  "<<"                  { kw CmmT_Shl }
+  ">="                  { kw CmmT_Ge }
+  "<="                  { kw CmmT_Le }
+  "=="                  { kw CmmT_Eq }
+  "!="                  { kw CmmT_Ne }
+  "&&"                  { kw CmmT_BoolAnd }
+  "||"                  { kw CmmT_BoolOr }
+
+  "True"                { kw CmmT_True  }
+  "False"               { kw CmmT_False }
+  "likely"              { kw CmmT_likely}
+
+  P@decimal             { global_regN (\n -> VanillaReg n VGcPtr) }
+  R@decimal             { global_regN (\n -> VanillaReg n VNonGcPtr) }
+  F@decimal             { global_regN FloatReg }
+  D@decimal             { global_regN DoubleReg }
+  L@decimal             { global_regN LongReg }
+  Sp                    { global_reg Sp }
+  SpLim                 { global_reg SpLim }
+  Hp                    { global_reg Hp }
+  HpLim                 { global_reg HpLim }
+  CCCS                  { global_reg CCCS }
+  CurrentTSO            { global_reg CurrentTSO }
+  CurrentNursery        { global_reg CurrentNursery }
+  HpAlloc               { global_reg HpAlloc }
+  BaseReg               { global_reg BaseReg }
+  MachSp                { global_reg MachSp }
+  UnwindReturnReg       { global_reg UnwindReturnReg }
+
+  $namebegin $namechar* { name }
+
+  0 @octal              { tok_octal }
+  @decimal              { tok_decimal }
+  0[xX] @hexadecimal    { tok_hexadecimal }
+  @floating_point       { strtoken tok_float }
+
+  \" @strchar* \"       { strtoken tok_string }
+}
+
+{
+data CmmToken
+  = CmmT_SpecChar  Char
+  | CmmT_DotDot
+  | CmmT_DoubleColon
+  | CmmT_Shr
+  | CmmT_Shl
+  | CmmT_Ge
+  | CmmT_Le
+  | CmmT_Eq
+  | CmmT_Ne
+  | CmmT_BoolAnd
+  | CmmT_BoolOr
+  | CmmT_CLOSURE
+  | CmmT_INFO_TABLE
+  | CmmT_INFO_TABLE_RET
+  | CmmT_INFO_TABLE_FUN
+  | CmmT_INFO_TABLE_CONSTR
+  | CmmT_INFO_TABLE_SELECTOR
+  | CmmT_else
+  | CmmT_export
+  | CmmT_section
+  | CmmT_goto
+  | CmmT_if
+  | CmmT_call
+  | CmmT_jump
+  | CmmT_foreign
+  | CmmT_never
+  | CmmT_prim
+  | CmmT_reserve
+  | CmmT_return
+  | CmmT_returns
+  | CmmT_import
+  | CmmT_switch
+  | CmmT_case
+  | CmmT_default
+  | CmmT_push
+  | CmmT_unwind
+  | CmmT_bits8
+  | CmmT_bits16
+  | CmmT_bits32
+  | CmmT_bits64
+  | CmmT_bits128
+  | CmmT_bits256
+  | CmmT_bits512
+  | CmmT_float32
+  | CmmT_float64
+  | CmmT_gcptr
+  | CmmT_GlobalReg GlobalReg
+  | CmmT_Name      FastString
+  | CmmT_String    String
+  | CmmT_Int       Integer
+  | CmmT_Float     Rational
+  | CmmT_EOF
+  | CmmT_False
+  | CmmT_True
+  | CmmT_likely
+  deriving (Show)
+
+-- -----------------------------------------------------------------------------
+-- Lexer actions
+
+type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
+
+begin :: Int -> Action
+begin code _span _str _len = do liftP (pushLexState code); lexToken
+
+pop :: Action
+pop _span _buf _len = liftP popLexState >> lexToken
+
+special_char :: Action
+special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf)))
+
+kw :: CmmToken -> Action
+kw tok span _buf _len = return (L span tok)
+
+global_regN :: (Int -> GlobalReg) -> Action
+global_regN con span buf len
+  = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
+  where buf' = stepOn buf
+        n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
+
+global_reg :: GlobalReg -> Action
+global_reg r span _buf _len = return (L span (CmmT_GlobalReg r))
+
+strtoken :: (String -> CmmToken) -> Action
+strtoken f span buf len =
+  return (L span $! (f $! lexemeToString buf len))
+
+name :: Action
+name span buf len =
+  case lookupUFM reservedWordsFM fs of
+        Just tok -> return (L span tok)
+        Nothing  -> return (L span (CmmT_Name fs))
+  where
+        fs = lexemeToFastString buf len
+
+reservedWordsFM = listToUFM $
+        map (\(x, y) -> (mkFastString x, y)) [
+        ( "CLOSURE",            CmmT_CLOSURE ),
+        ( "INFO_TABLE",         CmmT_INFO_TABLE ),
+        ( "INFO_TABLE_RET",     CmmT_INFO_TABLE_RET ),
+        ( "INFO_TABLE_FUN",     CmmT_INFO_TABLE_FUN ),
+        ( "INFO_TABLE_CONSTR",  CmmT_INFO_TABLE_CONSTR ),
+        ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
+        ( "else",               CmmT_else ),
+        ( "export",             CmmT_export ),
+        ( "section",            CmmT_section ),
+        ( "goto",               CmmT_goto ),
+        ( "if",                 CmmT_if ),
+        ( "call",               CmmT_call ),
+        ( "jump",               CmmT_jump ),
+        ( "foreign",            CmmT_foreign ),
+        ( "never",              CmmT_never ),
+        ( "prim",               CmmT_prim ),
+        ( "reserve",            CmmT_reserve ),
+        ( "return",             CmmT_return ),
+        ( "returns",            CmmT_returns ),
+        ( "import",             CmmT_import ),
+        ( "switch",             CmmT_switch ),
+        ( "case",               CmmT_case ),
+        ( "default",            CmmT_default ),
+        ( "push",               CmmT_push ),
+        ( "unwind",             CmmT_unwind ),
+        ( "bits8",              CmmT_bits8 ),
+        ( "bits16",             CmmT_bits16 ),
+        ( "bits32",             CmmT_bits32 ),
+        ( "bits64",             CmmT_bits64 ),
+        ( "bits128",            CmmT_bits128 ),
+        ( "bits256",            CmmT_bits256 ),
+        ( "bits512",            CmmT_bits512 ),
+        ( "float32",            CmmT_float32 ),
+        ( "float64",            CmmT_float64 ),
+-- New forms
+        ( "b8",                 CmmT_bits8 ),
+        ( "b16",                CmmT_bits16 ),
+        ( "b32",                CmmT_bits32 ),
+        ( "b64",                CmmT_bits64 ),
+        ( "b128",               CmmT_bits128 ),
+        ( "b256",               CmmT_bits256 ),
+        ( "b512",               CmmT_bits512 ),
+        ( "f32",                CmmT_float32 ),
+        ( "f64",                CmmT_float64 ),
+        ( "gcptr",              CmmT_gcptr ),
+        ( "likely",             CmmT_likely),
+        ( "True",               CmmT_True  ),
+        ( "False",              CmmT_False )
+        ]
+
+tok_decimal span buf len
+  = return (L span (CmmT_Int  $! parseUnsignedInteger buf len 10 octDecDigit))
+
+tok_octal span buf len
+  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
+
+tok_hexadecimal span buf len
+  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
+
+tok_float str = CmmT_Float $! readRational str
+
+tok_string str = CmmT_String (read str)
+                 -- urk, not quite right, but it'll do for now
+
+-- -----------------------------------------------------------------------------
+-- Line pragmas
+
+setLine :: Int -> Action
+setLine code span buf len = do
+  let line = parseUnsignedInteger buf len 10 octDecDigit
+  liftP $ do
+    setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
+          -- subtract one: the line number refers to the *following* line
+    -- trace ("setLine "  ++ show line) $ do
+    popLexState >> pushLexState code
+  lexToken
+
+setFile :: Int -> Action
+setFile code span buf len = do
+  let file = lexemeToFastString (stepOn buf) (len-2)
+  liftP $ do
+    setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
+    popLexState >> pushLexState code
+  lexToken
+
+-- -----------------------------------------------------------------------------
+-- This is the top-level function: called from the parser each time a
+-- new token is to be read from the input.
+
+cmmlex :: (Located CmmToken -> PD a) -> PD a
+cmmlex cont = do
+  (L span tok) <- lexToken
+  --trace ("token: " ++ show tok) $ do
+  cont (L (RealSrcSpan span) tok)
+
+lexToken :: PD (RealLocated CmmToken)
+lexToken = do
+  inp@(loc1,buf) <- getInput
+  sc <- liftP getLexState
+  case alexScan inp sc of
+    AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
+                  liftP (setLastToken span 0)
+                  return (L span CmmT_EOF)
+    AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
+    AlexSkip inp2 _ -> do
+        setInput inp2
+        lexToken
+    AlexToken inp2@(end,_buf2) len t -> do
+        setInput inp2
+        let span = mkRealSrcSpan loc1 end
+        span `seq` liftP (setLastToken span len)
+        t span buf len
+
+-- -----------------------------------------------------------------------------
+-- Monad stuff
+
+-- Stuff that Alex needs to know about our input type:
+type AlexInput = (RealSrcLoc,StringBuffer)
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (_,s) = prevChar s '\n'
+
+-- backwards compatibility for Alex 2.x
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar inp = case alexGetByte inp of
+                    Nothing    -> Nothing
+                    Just (b,i) -> c `seq` Just (c,i)
+                       where c = chr $ fromIntegral b
+
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (loc,s)
+  | atEnd s   = Nothing
+  | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
+  where c    = currentChar s
+        b    = fromIntegral $ ord $ c
+        loc' = advanceSrcLoc loc c
+        s'   = stepOn s
+
+getInput :: PD AlexInput
+getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b)
+
+setInput :: AlexInput -> PD ()
+setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } ()
+}
diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs
new file mode 100644
index 0000000000..d70fed3b9e
--- /dev/null
+++ b/compiler/GHC/Cmm/Lint.hs
@@ -0,0 +1,261 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 2011
+--
+-- CmmLint: checking the correctness of Cmm statements and expressions
+--
+-----------------------------------------------------------------------------
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Lint (
+    cmmLint, cmmLintGraph
+  ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Liveness
+import GHC.Cmm.Switch (switchTargetsToList)
+import GHC.Cmm.Ppr () -- For Outputable instances
+import Outputable
+import DynFlags
+
+import Control.Monad (ap)
+
+-- Things to check:
+--     - invariant on CmmBlock in GHC.Cmm.Expr (see comment there)
+--     - check for branches to blocks that don't exist
+--     - check types
+
+-- -----------------------------------------------------------------------------
+-- Exported entry points:
+
+cmmLint :: (Outputable d, Outputable h)
+        => DynFlags -> GenCmmGroup d h CmmGraph -> Maybe SDoc
+cmmLint dflags tops = runCmmLint dflags (mapM_ (lintCmmDecl dflags)) tops
+
+cmmLintGraph :: DynFlags -> CmmGraph -> Maybe SDoc
+cmmLintGraph dflags g = runCmmLint dflags (lintCmmGraph dflags) g
+
+runCmmLint :: Outputable a => DynFlags -> (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint dflags l p =
+   case unCL (l p) dflags of
+     Left err -> Just (vcat [text "Cmm lint error:",
+                             nest 2 err,
+                             text "Program was:",
+                             nest 2 (ppr p)])
+     Right _  -> Nothing
+
+lintCmmDecl :: DynFlags -> GenCmmDecl h i CmmGraph -> CmmLint ()
+lintCmmDecl dflags (CmmProc _ lbl _ g)
+  = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph dflags g
+lintCmmDecl _ (CmmData {})
+  = return ()
+
+
+lintCmmGraph :: DynFlags -> CmmGraph -> CmmLint ()
+lintCmmGraph dflags g =
+    cmmLocalLiveness dflags g `seq` mapM_ (lintCmmBlock labels) blocks
+    -- cmmLiveness throws an error if there are registers
+    -- live on entry to the graph (i.e. undefined
+    -- variables)
+  where
+       blocks = toBlockList g
+       labels = setFromList (map entryLabel blocks)
+
+
+lintCmmBlock :: LabelSet -> CmmBlock -> CmmLint ()
+lintCmmBlock labels block
+  = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do
+        let (_, middle, last) = blockSplit block
+        mapM_ lintCmmMiddle (blockToList middle)
+        lintCmmLast labels last
+
+-- -----------------------------------------------------------------------------
+-- lintCmmExpr
+
+-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
+-- byte/word mismatches.
+
+lintCmmExpr :: CmmExpr -> CmmLint CmmType
+lintCmmExpr (CmmLoad expr rep) = do
+  _ <- lintCmmExpr expr
+  -- Disabled, if we have the inlining phase before the lint phase,
+  -- we can have funny offsets due to pointer tagging. -- EZY
+  -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
+  --   cmmCheckWordAddress expr
+  return rep
+lintCmmExpr expr@(CmmMachOp op args) = do
+  dflags <- getDynFlags
+  tys <- mapM lintCmmExpr args
+  if map (typeWidth . cmmExprType dflags) args == machOpArgReps dflags op
+        then cmmCheckMachOp op args tys
+        else cmmLintMachOpErr expr (map (cmmExprType dflags) args) (machOpArgReps dflags op)
+lintCmmExpr (CmmRegOff reg offset)
+  = do dflags <- getDynFlags
+       let rep = typeWidth (cmmRegType dflags reg)
+       lintCmmExpr (CmmMachOp (MO_Add rep)
+                [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+lintCmmExpr expr =
+  do dflags <- getDynFlags
+     return (cmmExprType dflags expr)
+
+-- Check for some common byte/word mismatches (eg. Sp + 1)
+cmmCheckMachOp   :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType
+cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys
+  = cmmCheckMachOp op [reg, lit] tys
+cmmCheckMachOp op _ tys
+  = do dflags <- getDynFlags
+       return (machOpResultType dflags op tys)
+
+{-
+isOffsetOp :: MachOp -> Bool
+isOffsetOp (MO_Add _) = True
+isOffsetOp (MO_Sub _) = True
+isOffsetOp _ = False
+
+-- This expression should be an address from which a word can be loaded:
+-- check for funny-looking sub-word offsets.
+_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
+_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
+  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
+  = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
+  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
+  = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress _
+  = return ()
+
+-- No warnings for unaligned arithmetic with the node register,
+-- which is used to extract fields from tagged constructor closures.
+notNodeReg :: CmmExpr -> Bool
+notNodeReg (CmmReg reg) | reg == nodeReg = False
+notNodeReg _                             = True
+-}
+
+lintCmmMiddle :: CmmNode O O -> CmmLint ()
+lintCmmMiddle node = case node of
+  CmmComment _ -> return ()
+  CmmTick _    -> return ()
+  CmmUnwind{}  -> return ()
+
+  CmmAssign reg expr -> do
+            dflags <- getDynFlags
+            erep <- lintCmmExpr expr
+            let reg_ty = cmmRegType dflags reg
+            if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+                then return ()
+                else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+
+  CmmStore l r -> do
+            _ <- lintCmmExpr l
+            _ <- lintCmmExpr r
+            return ()
+
+  CmmUnsafeForeignCall target _formals actuals -> do
+            lintTarget target
+            mapM_ lintCmmExpr actuals
+
+
+lintCmmLast :: LabelSet -> CmmNode O C -> CmmLint ()
+lintCmmLast labels node = case node of
+  CmmBranch id -> checkTarget id
+
+  CmmCondBranch e t f _ -> do
+            dflags <- getDynFlags
+            mapM_ checkTarget [t,f]
+            _ <- lintCmmExpr e
+            checkCond dflags e
+
+  CmmSwitch e ids -> do
+            dflags <- getDynFlags
+            mapM_ checkTarget $ switchTargetsToList ids
+            erep <- lintCmmExpr e
+            if (erep `cmmEqType_ignoring_ptrhood` bWord dflags)
+              then return ()
+              else cmmLintErr (text "switch scrutinee is not a word: " <>
+                               ppr e <> text " :: " <> ppr erep)
+
+  CmmCall { cml_target = target, cml_cont = cont } -> do
+          _ <- lintCmmExpr target
+          maybe (return ()) checkTarget cont
+
+  CmmForeignCall tgt _ args succ _ _ _ -> do
+          lintTarget tgt
+          mapM_ lintCmmExpr args
+          checkTarget succ
+ where
+  checkTarget id
+     | setMember id labels = return ()
+     | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
+
+
+lintTarget :: ForeignTarget -> CmmLint ()
+lintTarget (ForeignTarget e _) = lintCmmExpr e >> return ()
+lintTarget (PrimTarget {})     = return ()
+
+
+checkCond :: DynFlags -> CmmExpr -> CmmLint ()
+checkCond _ (CmmMachOp mop _) | isComparisonMachOp mop = return ()
+checkCond dflags (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth dflags = return () -- constant values
+checkCond _ expr
+    = cmmLintErr (hang (text "expression is not a conditional:") 2
+                         (ppr expr))
+
+-- -----------------------------------------------------------------------------
+-- CmmLint monad
+
+-- just a basic error monad:
+
+newtype CmmLint a = CmmLint { unCL :: DynFlags -> Either SDoc a }
+    deriving (Functor)
+
+instance Applicative CmmLint where
+      pure a = CmmLint (\_ -> Right a)
+      (<*>) = ap
+
+instance Monad CmmLint where
+  CmmLint m >>= k = CmmLint $ \dflags ->
+                                case m dflags of
+                                Left e -> Left e
+                                Right a -> unCL (k a) dflags
+
+instance HasDynFlags CmmLint where
+    getDynFlags = CmmLint (\dflags -> Right dflags)
+
+cmmLintErr :: SDoc -> CmmLint a
+cmmLintErr msg = CmmLint (\_ -> Left msg)
+
+addLintInfo :: SDoc -> CmmLint a -> CmmLint a
+addLintInfo info thing = CmmLint $ \dflags ->
+   case unCL thing dflags of
+        Left err -> Left (hang info 2 err)
+        Right a  -> Right a
+
+cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
+cmmLintMachOpErr expr argsRep opExpectsRep
+     = cmmLintErr (text "in MachOp application: " $$
+                   nest 2 (ppr  expr) $$
+                      (text "op is expecting: " <+> ppr opExpectsRep) $$
+                      (text "arguments provide: " <+> ppr argsRep))
+
+cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr stmt e_ty r_ty
+  = cmmLintErr (text "in assignment: " $$
+                nest 2 (vcat [ppr stmt,
+                              text "Reg ty:" <+> ppr r_ty,
+                              text "Rhs ty:" <+> ppr e_ty]))
+
+
+{-
+cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
+cmmLintDubiousWordOffset expr
+   = cmmLintErr (text "offset is not a multiple of words: " $$
+                 nest 2 (ppr expr))
+-}
+
diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs
new file mode 100644
index 0000000000..2b598f52e5
--- /dev/null
+++ b/compiler/GHC/Cmm/Liveness.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module GHC.Cmm.Liveness
+    ( CmmLocalLive
+    , cmmLocalLiveness
+    , cmmGlobalLiveness
+    , liveLattice
+    , gen_kill
+    )
+where
+
+import GhcPrelude
+
+import DynFlags
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Ppr.Expr () -- For Outputable instances
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Label
+
+import Maybes
+import Outputable
+
+-----------------------------------------------------------------------------
+-- Calculating what variables are live on entry to a basic block
+-----------------------------------------------------------------------------
+
+-- | The variables live on entry to a block
+type CmmLive r = RegSet r
+type CmmLocalLive = CmmLive LocalReg
+
+-- | The dataflow lattice
+liveLattice :: Ord r => DataflowLattice (CmmLive r)
+{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive LocalReg) #-}
+{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive GlobalReg) #-}
+liveLattice = DataflowLattice emptyRegSet add
+  where
+    add (OldFact old) (NewFact new) =
+        let !join = plusRegSet old new
+        in changedIf (sizeRegSet join > sizeRegSet old) join
+
+-- | A mapping from block labels to the variables live on entry
+type BlockEntryLiveness r = LabelMap (CmmLive r)
+
+-----------------------------------------------------------------------------
+-- | Calculated liveness info for a CmmGraph
+-----------------------------------------------------------------------------
+
+cmmLocalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness LocalReg
+cmmLocalLiveness dflags graph =
+    check $ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
+  where
+    entry = g_entry graph
+    check facts =
+        noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
+
+cmmGlobalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness GlobalReg
+cmmGlobalLiveness dflags graph =
+    analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
+
+-- | On entry to the procedure, there had better not be any LocalReg's live-in.
+noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a
+noLiveOnEntry bid in_fact x =
+  if nullRegSet in_fact then x
+  else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
+
+gen_kill
+    :: (DefinerOfRegs r n, UserOfRegs r n)
+    => DynFlags -> n -> CmmLive r -> CmmLive r
+gen_kill dflags node set =
+    let !afterKill = foldRegsDefd dflags deleteFromRegSet set node
+    in foldRegsUsed dflags extendRegSet afterKill node
+{-# INLINE gen_kill #-}
+
+xferLive
+    :: forall r.
+       ( UserOfRegs r (CmmNode O O)
+       , DefinerOfRegs r (CmmNode O O)
+       , UserOfRegs r (CmmNode O C)
+       , DefinerOfRegs r (CmmNode O C)
+       )
+    => DynFlags -> TransferFun (CmmLive r)
+xferLive dflags (BlockCC eNode middle xNode) fBase =
+    let joined = gen_kill dflags xNode $! joinOutFacts liveLattice xNode fBase
+        !result = foldNodesBwdOO (gen_kill dflags) middle joined
+    in mapSingleton (entryLabel eNode) result
+{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive LocalReg) #-}
+{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive GlobalReg) #-}
diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
new file mode 100644
index 0000000000..234001545c
--- /dev/null
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -0,0 +1,664 @@
+module GHC.Cmm.MachOp
+    ( MachOp(..)
+    , pprMachOp, isCommutableMachOp, isAssociativeMachOp
+    , isComparisonMachOp, maybeIntComparison, machOpResultType
+    , machOpArgReps, maybeInvertComparison, isFloatComparison
+
+    -- MachOp builders
+    , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
+    , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
+    , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
+    , mo_wordULe, mo_wordUGt, mo_wordULt
+    , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot
+    , mo_wordShl, mo_wordSShr, mo_wordUShr
+    , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
+    , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord
+    , mo_u_32ToWord, mo_s_32ToWord
+    , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
+
+    -- CallishMachOp
+    , CallishMachOp(..), callishMachOpHints
+    , pprCallishMachOp
+    , machOpMemcpyishAlign
+
+    -- Atomic read-modify-write
+    , AtomicMachOp(..)
+   )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Type
+import Outputable
+import DynFlags
+
+-----------------------------------------------------------------------------
+--              MachOp
+-----------------------------------------------------------------------------
+
+{- |
+Machine-level primops; ones which we can reasonably delegate to the
+native code generators to handle.
+
+Most operations are parameterised by the 'Width' that they operate on.
+Some operations have separate signed and unsigned versions, and float
+and integer versions.
+-}
+
+data MachOp
+  -- Integer operations (insensitive to signed/unsigned)
+  = MO_Add Width
+  | MO_Sub Width
+  | MO_Eq  Width
+  | MO_Ne  Width
+  | MO_Mul Width                -- low word of multiply
+
+  -- Signed multiply/divide
+  | MO_S_MulMayOflo Width       -- nonzero if signed multiply overflows
+  | MO_S_Quot Width             -- signed / (same semantics as IntQuotOp)
+  | MO_S_Rem  Width             -- signed % (same semantics as IntRemOp)
+  | MO_S_Neg  Width             -- unary -
+
+  -- Unsigned multiply/divide
+  | MO_U_MulMayOflo Width       -- nonzero if unsigned multiply overflows
+  | MO_U_Quot Width             -- unsigned / (same semantics as WordQuotOp)
+  | MO_U_Rem  Width             -- unsigned % (same semantics as WordRemOp)
+
+  -- Signed comparisons
+  | MO_S_Ge Width
+  | MO_S_Le Width
+  | MO_S_Gt Width
+  | MO_S_Lt Width
+
+  -- Unsigned comparisons
+  | MO_U_Ge Width
+  | MO_U_Le Width
+  | MO_U_Gt Width
+  | MO_U_Lt Width
+
+  -- Floating point arithmetic
+  | MO_F_Add  Width
+  | MO_F_Sub  Width
+  | MO_F_Neg  Width             -- unary -
+  | MO_F_Mul  Width
+  | MO_F_Quot Width
+
+  -- Floating point comparison
+  | MO_F_Eq Width
+  | MO_F_Ne Width
+  | MO_F_Ge Width
+  | MO_F_Le Width
+  | MO_F_Gt Width
+  | MO_F_Lt Width
+
+  -- Bitwise operations.  Not all of these may be supported
+  -- at all sizes, and only integral Widths are valid.
+  | MO_And   Width
+  | MO_Or    Width
+  | MO_Xor   Width
+  | MO_Not   Width
+  | MO_Shl   Width
+  | MO_U_Shr Width      -- unsigned shift right
+  | MO_S_Shr Width      -- signed shift right
+
+  -- Conversions.  Some of these will be NOPs.
+  -- Floating-point conversions use the signed variant.
+  | MO_SF_Conv Width Width      -- Signed int -> Float
+  | MO_FS_Conv Width Width      -- Float -> Signed int
+  | MO_SS_Conv Width Width      -- Signed int -> Signed int
+  | MO_UU_Conv Width Width      -- unsigned int -> unsigned int
+  | MO_XX_Conv Width Width      -- int -> int; puts no requirements on the
+                                -- contents of upper bits when extending;
+                                -- narrowing is simply truncation; the only
+                                -- expectation is that we can recover the
+                                -- original value by applying the opposite
+                                -- MO_XX_Conv, e.g.,
+                                --   MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x)
+                                -- is equivalent to just x.
+  | MO_FF_Conv Width Width      -- Float -> Float
+
+  -- Vector element insertion and extraction operations
+  | MO_V_Insert  Length Width   -- Insert scalar into vector
+  | MO_V_Extract Length Width   -- Extract scalar from vector
+
+  -- Integer vector operations
+  | MO_V_Add Length Width
+  | MO_V_Sub Length Width
+  | MO_V_Mul Length Width
+
+  -- Signed vector multiply/divide
+  | MO_VS_Quot Length Width
+  | MO_VS_Rem  Length Width
+  | MO_VS_Neg  Length Width
+
+  -- Unsigned vector multiply/divide
+  | MO_VU_Quot Length Width
+  | MO_VU_Rem  Length Width
+
+  -- Floating point vector element insertion and extraction operations
+  | MO_VF_Insert  Length Width   -- Insert scalar into vector
+  | MO_VF_Extract Length Width   -- Extract scalar from vector
+
+  -- Floating point vector operations
+  | MO_VF_Add  Length Width
+  | MO_VF_Sub  Length Width
+  | MO_VF_Neg  Length Width      -- unary negation
+  | MO_VF_Mul  Length Width
+  | MO_VF_Quot Length Width
+
+  -- Alignment check (for -falignment-sanitisation)
+  | MO_AlignmentCheck Int Width
+  deriving (Eq, Show)
+
+pprMachOp :: MachOp -> SDoc
+pprMachOp mo = text (show mo)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Some common MachReps
+
+-- A 'wordRep' is a machine word on the target architecture
+-- Specifically, it is the size of an Int#, Word#, Addr#
+-- and the unit of allocation on the stack and the heap
+-- Any pointer is also guaranteed to be a wordRep.
+
+mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
+    , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
+    , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
+    , mo_wordULe, mo_wordUGt, mo_wordULt
+    , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
+    , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
+    , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
+    :: DynFlags -> MachOp
+
+mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
+    , mo_32To8, mo_32To16
+    :: MachOp
+
+mo_wordAdd      dflags = MO_Add (wordWidth dflags)
+mo_wordSub      dflags = MO_Sub (wordWidth dflags)
+mo_wordEq       dflags = MO_Eq  (wordWidth dflags)
+mo_wordNe       dflags = MO_Ne  (wordWidth dflags)
+mo_wordMul      dflags = MO_Mul (wordWidth dflags)
+mo_wordSQuot    dflags = MO_S_Quot (wordWidth dflags)
+mo_wordSRem     dflags = MO_S_Rem (wordWidth dflags)
+mo_wordSNeg     dflags = MO_S_Neg (wordWidth dflags)
+mo_wordUQuot    dflags = MO_U_Quot (wordWidth dflags)
+mo_wordURem     dflags = MO_U_Rem (wordWidth dflags)
+
+mo_wordSGe      dflags = MO_S_Ge  (wordWidth dflags)
+mo_wordSLe      dflags = MO_S_Le  (wordWidth dflags)
+mo_wordSGt      dflags = MO_S_Gt  (wordWidth dflags)
+mo_wordSLt      dflags = MO_S_Lt  (wordWidth dflags)
+
+mo_wordUGe      dflags = MO_U_Ge  (wordWidth dflags)
+mo_wordULe      dflags = MO_U_Le  (wordWidth dflags)
+mo_wordUGt      dflags = MO_U_Gt  (wordWidth dflags)
+mo_wordULt      dflags = MO_U_Lt  (wordWidth dflags)
+
+mo_wordAnd      dflags = MO_And (wordWidth dflags)
+mo_wordOr       dflags = MO_Or  (wordWidth dflags)
+mo_wordXor      dflags = MO_Xor (wordWidth dflags)
+mo_wordNot      dflags = MO_Not (wordWidth dflags)
+mo_wordShl      dflags = MO_Shl (wordWidth dflags)
+mo_wordSShr     dflags = MO_S_Shr (wordWidth dflags)
+mo_wordUShr     dflags = MO_U_Shr (wordWidth dflags)
+
+mo_u_8To32             = MO_UU_Conv W8 W32
+mo_s_8To32             = MO_SS_Conv W8 W32
+mo_u_16To32            = MO_UU_Conv W16 W32
+mo_s_16To32            = MO_SS_Conv W16 W32
+
+mo_u_8ToWord    dflags = MO_UU_Conv W8  (wordWidth dflags)
+mo_s_8ToWord    dflags = MO_SS_Conv W8  (wordWidth dflags)
+mo_u_16ToWord   dflags = MO_UU_Conv W16 (wordWidth dflags)
+mo_s_16ToWord   dflags = MO_SS_Conv W16 (wordWidth dflags)
+mo_s_32ToWord   dflags = MO_SS_Conv W32 (wordWidth dflags)
+mo_u_32ToWord   dflags = MO_UU_Conv W32 (wordWidth dflags)
+
+mo_WordTo8      dflags = MO_UU_Conv (wordWidth dflags) W8
+mo_WordTo16     dflags = MO_UU_Conv (wordWidth dflags) W16
+mo_WordTo32     dflags = MO_UU_Conv (wordWidth dflags) W32
+mo_WordTo64     dflags = MO_UU_Conv (wordWidth dflags) W64
+
+mo_32To8               = MO_UU_Conv W32 W8
+mo_32To16              = MO_UU_Conv W32 W16
+
+
+-- ----------------------------------------------------------------------------
+-- isCommutableMachOp
+
+{- |
+Returns 'True' if the MachOp has commutable arguments.  This is used
+in the platform-independent Cmm optimisations.
+
+If in doubt, return 'False'.  This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isCommutableMachOp :: MachOp -> Bool
+isCommutableMachOp mop =
+  case mop of
+        MO_Add _                -> True
+        MO_Eq _                 -> True
+        MO_Ne _                 -> True
+        MO_Mul _                -> True
+        MO_S_MulMayOflo _       -> True
+        MO_U_MulMayOflo _       -> True
+        MO_And _                -> True
+        MO_Or _                 -> True
+        MO_Xor _                -> True
+        MO_F_Add _              -> True
+        MO_F_Mul _              -> True
+        _other                  -> False
+
+-- ----------------------------------------------------------------------------
+-- isAssociativeMachOp
+
+{- |
+Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
+This is used in the platform-independent Cmm optimisations.
+
+If in doubt, return 'False'.  This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isAssociativeMachOp :: MachOp -> Bool
+isAssociativeMachOp mop =
+  case mop of
+        MO_Add {} -> True       -- NB: does not include
+        MO_Mul {} -> True --     floatint point!
+        MO_And {} -> True
+        MO_Or  {} -> True
+        MO_Xor {} -> True
+        _other    -> False
+
+
+-- ----------------------------------------------------------------------------
+-- isComparisonMachOp
+
+{- |
+Returns 'True' if the MachOp is a comparison.
+
+If in doubt, return False.  This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isComparisonMachOp :: MachOp -> Bool
+isComparisonMachOp mop =
+  case mop of
+    MO_Eq   _  -> True
+    MO_Ne   _  -> True
+    MO_S_Ge _  -> True
+    MO_S_Le _  -> True
+    MO_S_Gt _  -> True
+    MO_S_Lt _  -> True
+    MO_U_Ge _  -> True
+    MO_U_Le _  -> True
+    MO_U_Gt _  -> True
+    MO_U_Lt _  -> True
+    MO_F_Eq {} -> True
+    MO_F_Ne {} -> True
+    MO_F_Ge {} -> True
+    MO_F_Le {} -> True
+    MO_F_Gt {} -> True
+    MO_F_Lt {} -> True
+    _other     -> False
+
+{- |
+Returns @Just w@ if the operation is an integer comparison with width
+@w@, or @Nothing@ otherwise.
+-}
+maybeIntComparison :: MachOp -> Maybe Width
+maybeIntComparison mop =
+  case mop of
+    MO_Eq   w  -> Just w
+    MO_Ne   w  -> Just w
+    MO_S_Ge w  -> Just w
+    MO_S_Le w  -> Just w
+    MO_S_Gt w  -> Just w
+    MO_S_Lt w  -> Just w
+    MO_U_Ge w  -> Just w
+    MO_U_Le w  -> Just w
+    MO_U_Gt w  -> Just w
+    MO_U_Lt w  -> Just w
+    _ -> Nothing
+
+isFloatComparison :: MachOp -> Bool
+isFloatComparison mop =
+  case mop of
+    MO_F_Eq {} -> True
+    MO_F_Ne {} -> True
+    MO_F_Ge {} -> True
+    MO_F_Le {} -> True
+    MO_F_Gt {} -> True
+    MO_F_Lt {} -> True
+    _other     -> False
+
+-- -----------------------------------------------------------------------------
+-- Inverting conditions
+
+-- Sometimes it's useful to be able to invert the sense of a
+-- condition.  Not all conditional tests are invertible: in
+-- particular, floating point conditionals cannot be inverted, because
+-- there exist floating-point values which return False for both senses
+-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
+
+maybeInvertComparison :: MachOp -> Maybe MachOp
+maybeInvertComparison op
+  = case op of  -- None of these Just cases include floating point
+        MO_Eq r   -> Just (MO_Ne r)
+        MO_Ne r   -> Just (MO_Eq r)
+        MO_U_Lt r -> Just (MO_U_Ge r)
+        MO_U_Gt r -> Just (MO_U_Le r)
+        MO_U_Le r -> Just (MO_U_Gt r)
+        MO_U_Ge r -> Just (MO_U_Lt r)
+        MO_S_Lt r -> Just (MO_S_Ge r)
+        MO_S_Gt r -> Just (MO_S_Le r)
+        MO_S_Le r -> Just (MO_S_Gt r)
+        MO_S_Ge r -> Just (MO_S_Lt r)
+        _other    -> Nothing
+
+-- ----------------------------------------------------------------------------
+-- machOpResultType
+
+{- |
+Returns the MachRep of the result of a MachOp.
+-}
+machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType
+machOpResultType dflags mop tys =
+  case mop of
+    MO_Add {}           -> ty1  -- Preserve GC-ptr-hood
+    MO_Sub {}           -> ty1  -- of first arg
+    MO_Mul    r         -> cmmBits r
+    MO_S_MulMayOflo r   -> cmmBits r
+    MO_S_Quot r         -> cmmBits r
+    MO_S_Rem  r         -> cmmBits r
+    MO_S_Neg  r         -> cmmBits r
+    MO_U_MulMayOflo r   -> cmmBits r
+    MO_U_Quot r         -> cmmBits r
+    MO_U_Rem  r         -> cmmBits r
+
+    MO_Eq {}            -> comparisonResultRep dflags
+    MO_Ne {}            -> comparisonResultRep dflags
+    MO_S_Ge {}          -> comparisonResultRep dflags
+    MO_S_Le {}          -> comparisonResultRep dflags
+    MO_S_Gt {}          -> comparisonResultRep dflags
+    MO_S_Lt {}          -> comparisonResultRep dflags
+
+    MO_U_Ge {}          -> comparisonResultRep dflags
+    MO_U_Le {}          -> comparisonResultRep dflags
+    MO_U_Gt {}          -> comparisonResultRep dflags
+    MO_U_Lt {}          -> comparisonResultRep dflags
+
+    MO_F_Add r          -> cmmFloat r
+    MO_F_Sub r          -> cmmFloat r
+    MO_F_Mul r          -> cmmFloat r
+    MO_F_Quot r         -> cmmFloat r
+    MO_F_Neg r          -> cmmFloat r
+    MO_F_Eq  {}         -> comparisonResultRep dflags
+    MO_F_Ne  {}         -> comparisonResultRep dflags
+    MO_F_Ge  {}         -> comparisonResultRep dflags
+    MO_F_Le  {}         -> comparisonResultRep dflags
+    MO_F_Gt  {}         -> comparisonResultRep dflags
+    MO_F_Lt  {}         -> comparisonResultRep dflags
+
+    MO_And {}           -> ty1  -- Used for pointer masking
+    MO_Or {}            -> ty1
+    MO_Xor {}           -> ty1
+    MO_Not   r          -> cmmBits r
+    MO_Shl   r          -> cmmBits r
+    MO_U_Shr r          -> cmmBits r
+    MO_S_Shr r          -> cmmBits r
+
+    MO_SS_Conv _ to     -> cmmBits to
+    MO_UU_Conv _ to     -> cmmBits to
+    MO_XX_Conv _ to     -> cmmBits to
+    MO_FS_Conv _ to     -> cmmBits to
+    MO_SF_Conv _ to     -> cmmFloat to
+    MO_FF_Conv _ to     -> cmmFloat to
+
+    MO_V_Insert  l w    -> cmmVec l (cmmBits w)
+    MO_V_Extract _ w    -> cmmBits w
+
+    MO_V_Add l w        -> cmmVec l (cmmBits w)
+    MO_V_Sub l w        -> cmmVec l (cmmBits w)
+    MO_V_Mul l w        -> cmmVec l (cmmBits w)
+
+    MO_VS_Quot l w      -> cmmVec l (cmmBits w)
+    MO_VS_Rem  l w      -> cmmVec l (cmmBits w)
+    MO_VS_Neg  l w      -> cmmVec l (cmmBits w)
+
+    MO_VU_Quot l w      -> cmmVec l (cmmBits w)
+    MO_VU_Rem  l w      -> cmmVec l (cmmBits w)
+
+    MO_VF_Insert  l w   -> cmmVec l (cmmFloat w)
+    MO_VF_Extract _ w   -> cmmFloat w
+
+    MO_VF_Add  l w      -> cmmVec l (cmmFloat w)
+    MO_VF_Sub  l w      -> cmmVec l (cmmFloat w)
+    MO_VF_Mul  l w      -> cmmVec l (cmmFloat w)
+    MO_VF_Quot l w      -> cmmVec l (cmmFloat w)
+    MO_VF_Neg  l w      -> cmmVec l (cmmFloat w)
+
+    MO_AlignmentCheck _ _ -> ty1
+  where
+    (ty1:_) = tys
+
+comparisonResultRep :: DynFlags -> CmmType
+comparisonResultRep = bWord  -- is it?
+
+
+-- -----------------------------------------------------------------------------
+-- machOpArgReps
+
+-- | This function is used for debugging only: we can check whether an
+-- application of a MachOp is "type-correct" by checking that the MachReps of
+-- its arguments are the same as the MachOp expects.  This is used when
+-- linting a CmmExpr.
+
+machOpArgReps :: DynFlags -> MachOp -> [Width]
+machOpArgReps dflags op =
+  case op of
+    MO_Add    r         -> [r,r]
+    MO_Sub    r         -> [r,r]
+    MO_Eq     r         -> [r,r]
+    MO_Ne     r         -> [r,r]
+    MO_Mul    r         -> [r,r]
+    MO_S_MulMayOflo r   -> [r,r]
+    MO_S_Quot r         -> [r,r]
+    MO_S_Rem  r         -> [r,r]
+    MO_S_Neg  r         -> [r]
+    MO_U_MulMayOflo r   -> [r,r]
+    MO_U_Quot r         -> [r,r]
+    MO_U_Rem  r         -> [r,r]
+
+    MO_S_Ge r           -> [r,r]
+    MO_S_Le r           -> [r,r]
+    MO_S_Gt r           -> [r,r]
+    MO_S_Lt r           -> [r,r]
+
+    MO_U_Ge r           -> [r,r]
+    MO_U_Le r           -> [r,r]
+    MO_U_Gt r           -> [r,r]
+    MO_U_Lt r           -> [r,r]
+
+    MO_F_Add r          -> [r,r]
+    MO_F_Sub r          -> [r,r]
+    MO_F_Mul r          -> [r,r]
+    MO_F_Quot r         -> [r,r]
+    MO_F_Neg r          -> [r]
+    MO_F_Eq  r          -> [r,r]
+    MO_F_Ne  r          -> [r,r]
+    MO_F_Ge  r          -> [r,r]
+    MO_F_Le  r          -> [r,r]
+    MO_F_Gt  r          -> [r,r]
+    MO_F_Lt  r          -> [r,r]
+
+    MO_And   r          -> [r,r]
+    MO_Or    r          -> [r,r]
+    MO_Xor   r          -> [r,r]
+    MO_Not   r          -> [r]
+    MO_Shl   r          -> [r, wordWidth dflags]
+    MO_U_Shr r          -> [r, wordWidth dflags]
+    MO_S_Shr r          -> [r, wordWidth dflags]
+
+    MO_SS_Conv from _   -> [from]
+    MO_UU_Conv from _   -> [from]
+    MO_XX_Conv from _   -> [from]
+    MO_SF_Conv from _   -> [from]
+    MO_FS_Conv from _   -> [from]
+    MO_FF_Conv from _   -> [from]
+
+    MO_V_Insert  l r    -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
+    MO_V_Extract l r    -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
+
+    MO_V_Add _ r        -> [r,r]
+    MO_V_Sub _ r        -> [r,r]
+    MO_V_Mul _ r        -> [r,r]
+
+    MO_VS_Quot _ r      -> [r,r]
+    MO_VS_Rem  _ r      -> [r,r]
+    MO_VS_Neg  _ r      -> [r]
+
+    MO_VU_Quot _ r      -> [r,r]
+    MO_VU_Rem  _ r      -> [r,r]
+
+    MO_VF_Insert  l r   -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+    MO_VF_Extract l r   -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+
+    MO_VF_Add  _ r      -> [r,r]
+    MO_VF_Sub  _ r      -> [r,r]
+    MO_VF_Mul  _ r      -> [r,r]
+    MO_VF_Quot _ r      -> [r,r]
+    MO_VF_Neg  _ r      -> [r]
+
+    MO_AlignmentCheck _ r -> [r]
+
+-----------------------------------------------------------------------------
+-- CallishMachOp
+-----------------------------------------------------------------------------
+
+-- CallishMachOps tend to be implemented by foreign calls in some backends,
+-- so we separate them out.  In Cmm, these can only occur in a
+-- statement position, in contrast to an ordinary MachOp which can occur
+-- anywhere in an expression.
+data CallishMachOp
+  = MO_F64_Pwr
+  | MO_F64_Sin
+  | MO_F64_Cos
+  | MO_F64_Tan
+  | MO_F64_Sinh
+  | MO_F64_Cosh
+  | MO_F64_Tanh
+  | MO_F64_Asin
+  | MO_F64_Acos
+  | MO_F64_Atan
+  | MO_F64_Asinh
+  | MO_F64_Acosh
+  | MO_F64_Atanh
+  | MO_F64_Log
+  | MO_F64_Log1P
+  | MO_F64_Exp
+  | MO_F64_ExpM1
+  | MO_F64_Fabs
+  | MO_F64_Sqrt
+  | MO_F32_Pwr
+  | MO_F32_Sin
+  | MO_F32_Cos
+  | MO_F32_Tan
+  | MO_F32_Sinh
+  | MO_F32_Cosh
+  | MO_F32_Tanh
+  | MO_F32_Asin
+  | MO_F32_Acos
+  | MO_F32_Atan
+  | MO_F32_Asinh
+  | MO_F32_Acosh
+  | MO_F32_Atanh
+  | MO_F32_Log
+  | MO_F32_Log1P
+  | MO_F32_Exp
+  | MO_F32_ExpM1
+  | MO_F32_Fabs
+  | MO_F32_Sqrt
+
+  | MO_UF_Conv Width
+
+  | MO_S_Mul2    Width
+  | MO_S_QuotRem Width
+  | MO_U_QuotRem Width
+  | MO_U_QuotRem2 Width
+  | MO_Add2      Width
+  | MO_AddWordC  Width
+  | MO_SubWordC  Width
+  | MO_AddIntC   Width
+  | MO_SubIntC   Width
+  | MO_U_Mul2    Width
+
+  | MO_ReadBarrier
+  | MO_WriteBarrier
+  | MO_Touch         -- Keep variables live (when using interior pointers)
+
+  -- Prefetch
+  | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not
+                     -- program behavior.
+                     -- the Int can be 0-3. Needs to be known at compile time
+                     -- to interact with code generation correctly.
+                     --  TODO: add support for prefetch WRITES,
+                     --  currently only exposes prefetch reads, which
+                     -- would the majority of use cases in ghc anyways
+
+
+  -- These three MachOps are parameterised by the known alignment
+  -- of the destination and source (for memcpy/memmove) pointers.
+  -- This information may be used for optimisation in backends.
+  | MO_Memcpy Int
+  | MO_Memset Int
+  | MO_Memmove Int
+  | MO_Memcmp Int
+
+  | MO_PopCnt Width
+  | MO_Pdep Width
+  | MO_Pext Width
+  | MO_Clz Width
+  | MO_Ctz Width
+
+  | MO_BSwap Width
+  | MO_BRev Width
+
+  -- Atomic read-modify-write.
+  | MO_AtomicRMW Width AtomicMachOp
+  | MO_AtomicRead Width
+  | MO_AtomicWrite Width
+  | MO_Cmpxchg Width
+  deriving (Eq, Show)
+
+-- | The operation to perform atomically.
+data AtomicMachOp =
+      AMO_Add
+    | AMO_Sub
+    | AMO_And
+    | AMO_Nand
+    | AMO_Or
+    | AMO_Xor
+      deriving (Eq, Show)
+
+pprCallishMachOp :: CallishMachOp -> SDoc
+pprCallishMachOp mo = text (show mo)
+
+callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
+callishMachOpHints op = case op of
+  MO_Memcpy _  -> ([], [AddrHint,AddrHint,NoHint])
+  MO_Memset _  -> ([], [AddrHint,NoHint,NoHint])
+  MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
+  MO_Memcmp _  -> ([], [AddrHint, AddrHint, NoHint])
+  _            -> ([],[])
+  -- empty lists indicate NoHint
+
+-- | The alignment of a 'memcpy'-ish operation.
+machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
+machOpMemcpyishAlign op = case op of
+  MO_Memcpy  align -> Just align
+  MO_Memset  align -> Just align
+  MO_Memmove align -> Just align
+  MO_Memcmp  align -> Just align
+  _                -> Nothing
diff --git a/compiler/GHC/Cmm/Monad.hs b/compiler/GHC/Cmm/Monad.hs
new file mode 100644
index 0000000000..6b8d00a118
--- /dev/null
+++ b/compiler/GHC/Cmm/Monad.hs
@@ -0,0 +1,59 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+-- A Parser monad with access to the 'DynFlags'.
+--
+-- The 'P' monad  only has access to the subset of of 'DynFlags'
+-- required for parsing Haskell.
+
+-- The parser for C-- requires access to a lot more of the 'DynFlags',
+-- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance.
+-----------------------------------------------------------------------------
+module GHC.Cmm.Monad (
+    PD(..)
+  , liftP
+  ) where
+
+import GhcPrelude
+
+import Control.Monad
+import qualified Control.Monad.Fail as MonadFail
+
+import DynFlags
+import Lexer
+
+newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a }
+
+instance Functor PD where
+  fmap = liftM
+
+instance Applicative PD where
+  pure = returnPD
+  (<*>) = ap
+
+instance Monad PD where
+  (>>=) = thenPD
+#if !MIN_VERSION_base(4,13,0)
+  fail = MonadFail.fail
+#endif
+
+instance MonadFail.MonadFail PD where
+  fail = failPD
+
+liftP :: P a -> PD a
+liftP (P f) = PD $ \_ s -> f s
+
+returnPD :: a -> PD a
+returnPD = liftP . return
+
+thenPD :: PD a -> (a -> PD b) -> PD b
+(PD m) `thenPD` k = PD $ \d s ->
+        case m d s of
+                POk s1 a         -> unPD (k a) d s1
+                PFailed s1 -> PFailed s1
+
+failPD :: String -> PD a
+failPD = liftP . fail
+
+instance HasDynFlags PD where
+   getDynFlags = PD $ \d s -> POk s d
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
new file mode 100644
index 0000000000..bb74647910
--- /dev/null
+++ b/compiler/GHC/Cmm/Node.hs
@@ -0,0 +1,724 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+
+-- CmmNode type for representation using Hoopl graphs.
+
+module GHC.Cmm.Node (
+     CmmNode(..), CmmFormal, CmmActual, CmmTickish,
+     UpdFrameOffset, Convention(..),
+     ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
+     CmmReturnInfo(..),
+     mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
+     mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
+
+     -- * Tick scopes
+     CmmTickScope(..), isTickSubScope, combineTickScopes,
+  ) where
+
+import GhcPrelude hiding (succ)
+
+import GHC.Platform.Regs
+import GHC.Cmm.Expr
+import GHC.Cmm.Switch
+import DynFlags
+import FastString
+import ForeignCall
+import Outputable
+import GHC.Runtime.Layout
+import CoreSyn (Tickish)
+import qualified Unique as U
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import Data.Maybe
+import Data.List (tails,sortBy)
+import Unique (nonDetCmpUnique)
+import Util
+
+
+------------------------
+-- CmmNode
+
+#define ULabel {-# UNPACK #-} !Label
+
+data CmmNode e x where
+  CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
+
+  CmmComment :: FastString -> CmmNode O O
+
+    -- Tick annotation, covering Cmm code in our tick scope. We only
+    -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
+    -- See Note [CmmTick scoping details]
+  CmmTick :: !CmmTickish -> CmmNode O O
+
+    -- Unwind pseudo-instruction, encoding stack unwinding
+    -- instructions for a debugger. This describes how to reconstruct
+    -- the "old" value of a register if we want to navigate the stack
+    -- up one frame. Having unwind information for @Sp@ will allow the
+    -- debugger to "walk" the stack.
+    --
+    -- See Note [What is this unwinding business?] in Debug
+  CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
+
+  CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
+    -- Assign to register
+
+  CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
+    -- Assign to memory location.  Size is
+    -- given by cmmExprType of the rhs.
+
+  CmmUnsafeForeignCall ::       -- An unsafe foreign call;
+                                -- see Note [Foreign calls]
+                                -- Like a "fat machine instruction"; can occur
+                                -- in the middle of a block
+      ForeignTarget ->          -- call target
+      [CmmFormal] ->            -- zero or more results
+      [CmmActual] ->            -- zero or more arguments
+      CmmNode O O
+      -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
+      -- See Note [Unsafe foreign calls clobber caller-save registers]
+      --
+      -- Invariant: the arguments and the ForeignTarget must not
+      -- mention any registers for which GHC.Platform.callerSaves
+      -- is True.  See Note [Register Parameter Passing].
+
+  CmmBranch :: ULabel -> CmmNode O C
+                                   -- Goto another block in the same procedure
+
+  CmmCondBranch :: {                 -- conditional branch
+      cml_pred :: CmmExpr,
+      cml_true, cml_false :: ULabel,
+      cml_likely :: Maybe Bool       -- likely result of the conditional,
+                                     -- if known
+  } -> CmmNode O C
+
+  CmmSwitch
+    :: CmmExpr       -- Scrutinee, of some integral type
+    -> SwitchTargets -- Cases. See [Note SwitchTargets]
+    -> CmmNode O C
+
+  CmmCall :: {                -- A native call or tail call
+      cml_target :: CmmExpr,  -- never a CmmPrim to a CallishMachOp!
+
+      cml_cont :: Maybe Label,
+          -- Label of continuation (Nothing for return or tail call)
+          --
+          -- Note [Continuation BlockIds]: these BlockIds are called
+          -- Continuation BlockIds, and are the only BlockIds that can
+          -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
+          -- (CmmStackSlot (Young b) _).
+
+      cml_args_regs :: [GlobalReg],
+          -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
+          -- to the call.  This is essential information for the
+          -- native code generator's register allocator; without
+          -- knowing which GlobalRegs are live it has to assume that
+          -- they are all live.  This list should only include
+          -- GlobalRegs that are mapped to real machine registers on
+          -- the target platform.
+
+      cml_args :: ByteOff,
+          -- Byte offset, from the *old* end of the Area associated with
+          -- the Label (if cml_cont = Nothing, then Old area), of
+          -- youngest outgoing arg.  Set the stack pointer to this before
+          -- transferring control.
+          -- (NB: an update frame might also have been stored in the Old
+          --      area, but it'll be in an older part than the args.)
+
+      cml_ret_args :: ByteOff,
+          -- For calls *only*, the byte offset for youngest returned value
+          -- This is really needed at the *return* point rather than here
+          -- at the call, but in practice it's convenient to record it here.
+
+      cml_ret_off :: ByteOff
+        -- For calls *only*, the byte offset of the base of the frame that
+        -- must be described by the info table for the return point.
+        -- The older words are an update frames, which have their own
+        -- info-table and layout information
+
+        -- From a liveness point of view, the stack words older than
+        -- cml_ret_off are treated as live, even if the sequel of
+        -- the call goes into a loop.
+  } -> CmmNode O C
+
+  CmmForeignCall :: {           -- A safe foreign call; see Note [Foreign calls]
+                                -- Always the last node of a block
+      tgt   :: ForeignTarget,   -- call target and convention
+      res   :: [CmmFormal],     -- zero or more results
+      args  :: [CmmActual],     -- zero or more arguments; see Note [Register parameter passing]
+      succ  :: ULabel,          -- Label of continuation
+      ret_args :: ByteOff,      -- same as cml_ret_args
+      ret_off :: ByteOff,       -- same as cml_ret_off
+      intrbl:: Bool             -- whether or not the call is interruptible
+  } -> CmmNode O C
+
+{- Note [Foreign calls]
+~~~~~~~~~~~~~~~~~~~~~~~
+A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
+a CmmForeignCall call is used for *safe* foreign calls.
+
+Unsafe ones are mostly easy: think of them as a "fat machine
+instruction".  In particular, they do *not* kill all live registers,
+just the registers they return to (there was a bit of code in GHC that
+conservatively assumed otherwise.)  However, see [Register parameter passing].
+
+Safe ones are trickier.  A safe foreign call
+     r = f(x)
+ultimately expands to
+     push "return address"      -- Never used to return to;
+                                -- just points an info table
+     save registers into TSO
+     call suspendThread
+     r = f(x)                   -- Make the call
+     call resumeThread
+     restore registers
+     pop "return address"
+We cannot "lower" a safe foreign call to this sequence of Cmms, because
+after we've saved Sp all the Cmm optimiser's assumptions are broken.
+
+Note that a safe foreign call needs an info table.
+
+So Safe Foreign Calls must remain as last nodes until the stack is
+made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above
+sequence.
+-}
+
+{- Note [Unsafe foreign calls clobber caller-save registers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A foreign call is defined to clobber any GlobalRegs that are mapped to
+caller-saves machine registers (according to the prevailing C ABI).
+GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
+
+This is a design choice that makes it easier to generate code later.
+We could instead choose to say that foreign calls do *not* clobber
+caller-saves regs, but then we would have to figure out which regs
+were live across the call later and insert some saves/restores.
+
+Furthermore when we generate code we never have any GlobalRegs live
+across a call, because they are always copied-in to LocalRegs and
+copied-out again before making a call/jump.  So all we have to do is
+avoid any code motion that would make a caller-saves GlobalReg live
+across a foreign call during subsequent optimisations.
+-}
+
+{- Note [Register parameter passing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+On certain architectures, some registers are utilized for parameter
+passing in the C calling convention.  For example, in x86-64 Linux
+convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
+argument passing.  These are registers R3-R6, which our generated
+code may also be using; as a result, it's necessary to save these
+values before doing a foreign call.  This is done during initial
+code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils.  However,
+one result of doing this is that the contents of these registers
+may mysteriously change if referenced inside the arguments.  This
+is dangerous, so you'll need to disable inlining much in the same
+way is done in GHC.Cmm.Opt currently.  We should fix this!
+-}
+
+---------------------------------------------
+-- Eq instance of CmmNode
+
+deriving instance Eq (CmmNode e x)
+
+----------------------------------------------
+-- Hoopl instances of CmmNode
+
+instance NonLocal CmmNode where
+  entryLabel (CmmEntry l _) = l
+
+  successors (CmmBranch l) = [l]
+  successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
+  successors (CmmSwitch _ ids) = switchTargetsToList ids
+  successors (CmmCall {cml_cont=l}) = maybeToList l
+  successors (CmmForeignCall {succ=l}) = [l]
+
+
+--------------------------------------------------
+-- Various helper types
+
+type CmmActual = CmmExpr
+type CmmFormal = LocalReg
+
+type UpdFrameOffset = ByteOff
+
+-- | A convention maps a list of values (function arguments or return
+-- values) to registers or stack locations.
+data Convention
+  = NativeDirectCall
+       -- ^ top-level Haskell functions use @NativeDirectCall@, which
+       -- maps arguments to registers starting with R2, according to
+       -- how many registers are available on the platform.  This
+       -- convention ignores R1, because for a top-level function call
+       -- the function closure is implicit, and doesn't need to be passed.
+  | NativeNodeCall
+       -- ^ non-top-level Haskell functions, which pass the address of
+       -- the function closure in R1 (regardless of whether R1 is a
+       -- real register or not), and the rest of the arguments in
+       -- registers or on the stack.
+  | NativeReturn
+       -- ^ a native return.  The convention for returns depends on
+       -- how many values are returned: for just one value returned,
+       -- the appropriate register is used (R1, F1, etc.). regardless
+       -- of whether it is a real register or not.  For multiple
+       -- values returned, they are mapped to registers or the stack.
+  | Slow
+       -- ^ Slow entry points: all args pushed on the stack
+  | GC
+       -- ^ Entry to the garbage collector: uses the node reg!
+       -- (TODO: I don't think we need this --SDM)
+  deriving( Eq )
+
+data ForeignConvention
+  = ForeignConvention
+        CCallConv               -- Which foreign-call convention
+        [ForeignHint]           -- Extra info about the args
+        [ForeignHint]           -- Extra info about the result
+        CmmReturnInfo
+  deriving Eq
+
+data CmmReturnInfo
+  = CmmMayReturn
+  | CmmNeverReturns
+  deriving ( Eq )
+
+data ForeignTarget        -- The target of a foreign call
+  = ForeignTarget                -- A foreign procedure
+        CmmExpr                  -- Its address
+        ForeignConvention        -- Its calling convention
+  | PrimTarget            -- A possibly-side-effecting machine operation
+        CallishMachOp            -- Which one
+  deriving Eq
+
+foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
+foreignTargetHints target
+  = ( res_hints ++ repeat NoHint
+    , arg_hints ++ repeat NoHint )
+  where
+    (res_hints, arg_hints) =
+       case target of
+          PrimTarget op -> callishMachOpHints op
+          ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
+             (res_hints, arg_hints)
+
+--------------------------------------------------
+-- Instances of register and slot users / definers
+
+instance UserOfRegs LocalReg (CmmNode e x) where
+  foldRegsUsed dflags f !z n = case n of
+    CmmAssign _ expr -> fold f z expr
+    CmmStore addr rval -> fold f (fold f z addr) rval
+    CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
+    CmmCondBranch expr _ _ _ -> fold f z expr
+    CmmSwitch expr _ -> fold f z expr
+    CmmCall {cml_target=tgt} -> fold f z tgt
+    CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
+    _ -> z
+    where fold :: forall a b. UserOfRegs LocalReg a
+               => (b -> LocalReg -> b) -> b -> a -> b
+          fold f z n = foldRegsUsed dflags f z n
+
+instance UserOfRegs GlobalReg (CmmNode e x) where
+  foldRegsUsed dflags f !z n = case n of
+    CmmAssign _ expr -> fold f z expr
+    CmmStore addr rval -> fold f (fold f z addr) rval
+    CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
+    CmmCondBranch expr _ _ _ -> fold f z expr
+    CmmSwitch expr _ -> fold f z expr
+    CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
+    CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
+    _ -> z
+    where fold :: forall a b.  UserOfRegs GlobalReg a
+               => (b -> GlobalReg -> b) -> b -> a -> b
+          fold f z n = foldRegsUsed dflags f z n
+
+instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
+  -- The (Ord r) in the context is necessary here
+  -- See Note [Recursive superclasses] in TcInstDcls
+  foldRegsUsed _      _ !z (PrimTarget _)      = z
+  foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e
+
+instance DefinerOfRegs LocalReg (CmmNode e x) where
+  foldRegsDefd dflags f !z n = case n of
+    CmmAssign lhs _ -> fold f z lhs
+    CmmUnsafeForeignCall _ fs _ -> fold f z fs
+    CmmForeignCall {res=res} -> fold f z res
+    _ -> z
+    where fold :: forall a b. DefinerOfRegs LocalReg a
+               => (b -> LocalReg -> b) -> b -> a -> b
+          fold f z n = foldRegsDefd dflags f z n
+
+instance DefinerOfRegs GlobalReg (CmmNode e x) where
+  foldRegsDefd dflags f !z n = case n of
+    CmmAssign lhs _ -> fold f z lhs
+    CmmUnsafeForeignCall tgt _ _  -> fold f z (foreignTargetRegs tgt)
+    CmmCall        {} -> fold f z activeRegs
+    CmmForeignCall {} -> fold f z activeRegs
+                      -- See Note [Safe foreign calls clobber STG registers]
+    _ -> z
+    where fold :: forall a b. DefinerOfRegs GlobalReg a
+               => (b -> GlobalReg -> b) -> b -> a -> b
+          fold f z n = foldRegsDefd dflags f z n
+
+          platform = targetPlatform dflags
+          activeRegs = activeStgRegs platform
+          activeCallerSavesRegs = filter (callerSaves platform) activeRegs
+
+          foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
+          foreignTargetRegs _ = activeCallerSavesRegs
+
+-- Note [Safe foreign calls clobber STG registers]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- During stack layout phase every safe foreign call is expanded into a block
+-- that contains unsafe foreign call (instead of safe foreign call) and ends
+-- with a normal call (See Note [Foreign calls]). This means that we must
+-- treat safe foreign call as if it was a normal call (because eventually it
+-- will be). This is important if we try to run sinking pass before stack
+-- layout phase. Consider this example of what might go wrong (this is cmm
+-- code from stablename001 test). Here is code after common block elimination
+-- (before stack layout):
+--
+--  c1q6:
+--      _s1pf::P64 = R1;
+--      _c1q8::I64 = performMajorGC;
+--      I64[(young<c1q9> + 8)] = c1q9;
+--      foreign call "ccall" arg hints:  []  result hints:  [] (_c1q8::I64)(...)
+--                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- If we run sinking pass now (still before stack layout) we will get this:
+--
+--  c1q6:
+--      I64[(young<c1q9> + 8)] = c1q9;
+--      foreign call "ccall" arg hints:  []  result hints:  [] performMajorGC(...)
+--                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      _s1pf::P64 = R1;         <------ _s1pf sunk past safe foreign call
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that _s1pf was sunk past a foreign call. When we run stack layout
+-- safe call to performMajorGC will be turned into:
+--
+--  c1q6:
+--      _s1pc::P64 = P64[Sp + 8];
+--      I64[Sp - 8] = c1q9;
+--      Sp = Sp - 8;
+--      I64[I64[CurrentTSO + 24] + 16] = Sp;
+--      P64[CurrentNursery + 8] = Hp + 8;
+--      (_u1qI::I64) = call "ccall" arg hints:  [PtrHint,]
+--                           result hints:  [PtrHint] suspendThread(BaseReg, 0);
+--      call "ccall" arg hints:  []  result hints:  [] performMajorGC();
+--      (_u1qJ::I64) = call "ccall" arg hints:  [PtrHint]
+--                           result hints:  [PtrHint] resumeThread(_u1qI::I64);
+--      BaseReg = _u1qJ::I64;
+--      _u1qK::P64 = CurrentTSO;
+--      _u1qL::P64 = I64[_u1qK::P64 + 24];
+--      Sp = I64[_u1qL::P64 + 16];
+--      SpLim = _u1qL::P64 + 192;
+--      HpAlloc = 0;
+--      Hp = I64[CurrentNursery + 8] - 8;
+--      HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
+--      call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      _s1pf::P64 = R1;         <------ INCORRECT!
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
+-- call is clearly incorrect. This is what would happen if we assumed that
+-- safe foreign call has the same semantics as unsafe foreign call. To prevent
+-- this we need to treat safe foreign call as if was normal call.
+
+-----------------------------------
+-- mapping Expr in GHC.Cmm.Node
+
+mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
+mapForeignTarget exp   (ForeignTarget e c) = ForeignTarget (exp e) c
+mapForeignTarget _   m@(PrimTarget _)      = m
+
+wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
+-- Take a transformer on expressions and apply it recursively.
+-- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
+--                  then  uses f to rewrite the resulting expression
+wrapRecExp f (CmmMachOp op es)    = f (CmmMachOp op $ map (wrapRecExp f) es)
+wrapRecExp f (CmmLoad addr ty)    = f (CmmLoad (wrapRecExp f addr) ty)
+wrapRecExp f e                    = f e
+
+mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
+mapExp _ f@(CmmEntry{})                          = f
+mapExp _ m@(CmmComment _)                        = m
+mapExp _ m@(CmmTick _)                           = m
+mapExp f   (CmmUnwind regs)                      = CmmUnwind (map (fmap (fmap f)) regs)
+mapExp f   (CmmAssign r e)                       = CmmAssign r (f e)
+mapExp f   (CmmStore addr e)                     = CmmStore (f addr) (f e)
+mapExp f   (CmmUnsafeForeignCall tgt fs as)      = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
+mapExp _ l@(CmmBranch _)                         = l
+mapExp f   (CmmCondBranch e ti fi l)             = CmmCondBranch (f e) ti fi l
+mapExp f   (CmmSwitch e ids)                     = CmmSwitch (f e) ids
+mapExp f   n@CmmCall {cml_target=tgt}            = n{cml_target = f tgt}
+mapExp f   (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
+
+mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
+mapExpDeep f = mapExp $ wrapRecExp f
+
+------------------------------------------------------------------------
+-- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes
+
+mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
+mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
+mapForeignTargetM _ (PrimTarget _)      = Nothing
+
+wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
+-- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
+--                   then  gives f a chance to rewrite the resulting expression
+wrapRecExpM f n@(CmmMachOp op es)  = maybe (f n) (f . CmmMachOp op)    (mapListM (wrapRecExpM f) es)
+wrapRecExpM f n@(CmmLoad addr ty)  = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
+wrapRecExpM f e                    = f e
+
+mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpM _ (CmmEntry{})              = Nothing
+mapExpM _ (CmmComment _)            = Nothing
+mapExpM _ (CmmTick _)               = Nothing
+mapExpM f (CmmUnwind regs)          = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
+mapExpM f (CmmAssign r e)           = CmmAssign r `fmap` f e
+mapExpM f (CmmStore addr e)         = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
+mapExpM _ (CmmBranch _)             = Nothing
+mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
+mapExpM f (CmmSwitch e tbl)         = (\x -> CmmSwitch x tbl)       `fmap` f e
+mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
+mapExpM f (CmmUnsafeForeignCall tgt fs as)
+    = case mapForeignTargetM f tgt of
+        Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
+        Nothing   -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
+mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
+    = case mapForeignTargetM f tgt of
+        Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
+        Nothing   -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
+
+-- share as much as possible
+mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
+mapListM f xs = let (b, r) = mapListT f xs
+                in if b then Just r else Nothing
+
+mapListJ :: (a -> Maybe a) -> [a] -> [a]
+mapListJ f xs = snd (mapListT f xs)
+
+mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
+mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
+    where g (_,   y, Nothing) (True, ys)  = (True,  y:ys)
+          g (_,   _, Just y)  (True, ys)  = (True,  y:ys)
+          g (ys', _, Nothing) (False, _)  = (False, ys')
+          g (_,   _, Just y)  (False, ys) = (True,  y:ys)
+
+mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpDeepM f = mapExpM $ wrapRecExpM f
+
+-----------------------------------
+-- folding Expr in GHC.Cmm.Node
+
+foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
+foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
+foldExpForeignTarget _   (PrimTarget _)      z = z
+
+-- Take a folder on expressions and apply it recursively.
+-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
+-- itself, delegating all the other CmmExpr forms to 'f'.
+wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
+wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
+wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
+wrapRecExpf f e                  z = f e z
+
+foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
+foldExp _ (CmmEntry {}) z                         = z
+foldExp _ (CmmComment {}) z                       = z
+foldExp _ (CmmTick {}) z                          = z
+foldExp f (CmmUnwind xs) z                        = foldr (maybe id f) z (map snd xs)
+foldExp f (CmmAssign _ e) z                       = f e z
+foldExp f (CmmStore addr e) z                     = f addr $ f e z
+foldExp f (CmmUnsafeForeignCall t _ as) z         = foldr f (foldExpForeignTarget f t z) as
+foldExp _ (CmmBranch _) z                         = z
+foldExp f (CmmCondBranch e _ _ _) z               = f e z
+foldExp f (CmmSwitch e _) z                       = f e z
+foldExp f (CmmCall {cml_target=tgt}) z            = f tgt z
+foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
+
+foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
+foldExpDeep f = foldExp (wrapRecExpf f)
+
+-- -----------------------------------------------------------------------------
+
+mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
+mapSuccessors f (CmmBranch bid)         = CmmBranch (f bid)
+mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
+mapSuccessors f (CmmSwitch e ids)       = CmmSwitch e (mapSwitchTargets f ids)
+mapSuccessors _ n = n
+
+mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C
+                     -> (CmmNode O C, [a])
+mapCollectSuccessors f (CmmBranch bid)
+  = let (bid', acc) = f bid in (CmmBranch bid', [acc])
+mapCollectSuccessors f (CmmCondBranch p y n l)
+  = let (bidt, acct) = f y
+        (bidf, accf) = f n
+    in  (CmmCondBranch p bidt bidf l, [accf, acct])
+mapCollectSuccessors f (CmmSwitch e ids)
+  = let lbls = switchTargetsToList ids :: [Label]
+        lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a)
+    in ( CmmSwitch e
+          (mapSwitchTargets
+            (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids)
+          , map snd (mapElems lblMap)
+        )
+mapCollectSuccessors _ n = (n, [])
+
+-- -----------------------------------------------------------------------------
+
+-- | Tickish in Cmm context (annotations only)
+type CmmTickish = Tickish ()
+
+-- | Tick scope identifier, allowing us to reason about what
+-- annotations in a Cmm block should scope over. We especially take
+-- care to allow optimisations to reorganise blocks without losing
+-- tick association in the process.
+data CmmTickScope
+  = GlobalScope
+    -- ^ The global scope is the "root" of the scope graph. Every
+    -- scope is a sub-scope of the global scope. It doesn't make sense
+    -- to add ticks to this scope. On the other hand, this means that
+    -- setting this scope on a block means no ticks apply to it.
+
+  | SubScope !U.Unique CmmTickScope
+    -- ^ Constructs a new sub-scope to an existing scope. This allows
+    -- us to translate Core-style scoping rules (see @tickishScoped@)
+    -- into the Cmm world. Suppose the following code:
+    --
+    --   tick<1> case ... of
+    --             A -> tick<2> ...
+    --             B -> tick<3> ...
+    --
+    -- We want the top-level tick annotation to apply to blocks
+    -- generated for the A and B alternatives. We can achieve that by
+    -- generating tick<1> into a block with scope a, while the code
+    -- for alternatives A and B gets generated into sub-scopes a/b and
+    -- a/c respectively.
+
+  | CombinedScope CmmTickScope CmmTickScope
+    -- ^ A combined scope scopes over everything that the two given
+    -- scopes cover. It is therefore a sub-scope of either scope. This
+    -- is required for optimisations. Consider common block elimination:
+    --
+    --   A -> tick<2> case ... of
+    --     C -> [common]
+    --   B -> tick<3> case ... of
+    --     D -> [common]
+    --
+    -- We will generate code for the C and D alternatives, and figure
+    -- out afterwards that it's actually common code. Scoping rules
+    -- dictate that the resulting common block needs to be covered by
+    -- both tick<2> and tick<3>, therefore we need to construct a
+    -- scope that is a child to *both* scope. Now we can do that - if
+    -- we assign the scopes a/c and b/d to the common-ed up blocks,
+    -- the new block could have a combined tick scope a/c+b/d, which
+    -- both tick<2> and tick<3> apply to.
+
+-- Note [CmmTick scoping details]:
+--
+-- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
+-- same block. Note that as a result of this, optimisations making
+-- tick scopes more specific can *reduce* the amount of code a tick
+-- scopes over. Fixing this would require a separate @CmmTickScope@
+-- field for @CmmTick@. Right now we do not do this simply because I
+-- couldn't find an example where it actually mattered -- multiple
+-- blocks within the same scope generally jump to each other, which
+-- prevents common block elimination from happening in the first
+-- place. But this is no strong reason, so if Cmm optimisations become
+-- more involved in future this might have to be revisited.
+
+-- | Output all scope paths.
+scopeToPaths :: CmmTickScope -> [[U.Unique]]
+scopeToPaths GlobalScope           = [[]]
+scopeToPaths (SubScope u s)        = map (u:) (scopeToPaths s)
+scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
+
+-- | Returns the head uniques of the scopes. This is based on the
+-- assumption that the @Unique@ of @SubScope@ identifies the
+-- underlying super-scope. Used for efficient equality and comparison,
+-- see below.
+scopeUniques :: CmmTickScope -> [U.Unique]
+scopeUniques GlobalScope           = []
+scopeUniques (SubScope u _)        = [u]
+scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
+
+-- Equality and order is based on the head uniques defined above. We
+-- take care to short-cut the (extremely) common cases.
+instance Eq CmmTickScope where
+  GlobalScope    == GlobalScope     = True
+  GlobalScope    == _               = False
+  _              == GlobalScope     = False
+  (SubScope u _) == (SubScope u' _) = u == u'
+  (SubScope _ _) == _               = False
+  _              == (SubScope _ _)  = False
+  scope          == scope'          =
+    sortBy nonDetCmpUnique (scopeUniques scope) ==
+    sortBy nonDetCmpUnique (scopeUniques scope')
+    -- This is still deterministic because
+    -- the order is the same for equal lists
+
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+-- See Note [No Ord for Unique]
+instance Ord CmmTickScope where
+  compare GlobalScope    GlobalScope     = EQ
+  compare GlobalScope    _               = LT
+  compare _              GlobalScope     = GT
+  compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
+  compare scope scope'                   = cmpList nonDetCmpUnique
+     (sortBy nonDetCmpUnique $ scopeUniques scope)
+     (sortBy nonDetCmpUnique $ scopeUniques scope')
+
+instance Outputable CmmTickScope where
+  ppr GlobalScope     = text "global"
+  ppr (SubScope us GlobalScope)
+                      = ppr us
+  ppr (SubScope us s) = ppr s <> char '/' <> ppr us
+  ppr combined        = parens $ hcat $ punctuate (char '+') $
+                        map (hcat . punctuate (char '/') . map ppr . reverse) $
+                        scopeToPaths combined
+
+-- | Checks whether two tick scopes are sub-scopes of each other. True
+-- if the two scopes are equal.
+isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
+isTickSubScope = cmp
+  where cmp _              GlobalScope             = True
+        cmp GlobalScope    _                       = False
+        cmp (CombinedScope s1 s2) s'               = cmp s1 s' && cmp s2 s'
+        cmp s              (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
+        cmp (SubScope u s) s'@(SubScope u' _)      = u == u' || cmp s s'
+
+-- | Combine two tick scopes. The new scope should be sub-scope of
+-- both parameters. We simplify automatically if one tick scope is a
+-- sub-scope of the other already.
+combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
+combineTickScopes s1 s2
+  | s1 `isTickSubScope` s2 = s1
+  | s2 `isTickSubScope` s1 = s2
+  | otherwise              = CombinedScope s1 s2
diff --git a/compiler/GHC/Cmm/Opt.hs b/compiler/GHC/Cmm/Opt.hs
new file mode 100644
index 0000000000..1db37ae58c
--- /dev/null
+++ b/compiler/GHC/Cmm/Opt.hs
@@ -0,0 +1,423 @@
+-----------------------------------------------------------------------------
+--
+-- Cmm optimisation
+--
+-- (c) The University of Glasgow 2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.Opt (
+        constantFoldNode,
+        constantFoldExpr,
+        cmmMachOpFold,
+        cmmMachOpFoldM
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Utils
+import GHC.Cmm
+import DynFlags
+import Util
+
+import Outputable
+import GHC.Platform
+
+import Data.Bits
+import Data.Maybe
+
+
+constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
+constantFoldNode dflags = mapExp (constantFoldExpr dflags)
+
+constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
+constantFoldExpr dflags = wrapRecExp f
+  where f (CmmMachOp op args) = cmmMachOpFold dflags op args
+        f (CmmRegOff r 0) = CmmReg r
+        f e = e
+
+-- -----------------------------------------------------------------------------
+-- MachOp constant folder
+
+-- Now, try to constant-fold the MachOps.  The arguments have already
+-- been optimized and folded.
+
+cmmMachOpFold
+    :: DynFlags
+    -> MachOp       -- The operation from an CmmMachOp
+    -> [CmmExpr]    -- The optimized arguments
+    -> CmmExpr
+
+cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
+
+-- Returns Nothing if no changes, useful for Hoopl, also reduces
+-- allocation!
+cmmMachOpFoldM
+    :: DynFlags
+    -> MachOp
+    -> [CmmExpr]
+    -> Maybe CmmExpr
+
+cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
+  = Just $ case op of
+      MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
+      MO_Not _   -> CmmLit (CmmInt (complement x) rep)
+
+        -- these are interesting: we must first narrow to the
+        -- "from" type, in order to truncate to the correct size.
+        -- The final narrow/widen to the destination type
+        -- is implicit in the CmmLit.
+      MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
+      MO_SS_Conv  from to -> CmmLit (CmmInt (narrowS from x) to)
+      MO_UU_Conv  from to -> CmmLit (CmmInt (narrowU from x) to)
+
+      _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
+
+
+-- Eliminate conversion NOPs
+cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
+cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
+
+-- Eliminate nested conversions where possible
+cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
+  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
+    Just (_,   rep3,signed2) <- isIntConversion conv_outer
+  = case () of
+        -- widen then narrow to the same size is a nop
+      _ | rep1 < rep2 && rep1 == rep3 -> Just x
+        -- Widen then narrow to different size: collapse to single conversion
+        -- but remember to use the signedness from the widening, just in case
+        -- the final conversion is a widen.
+        | rep1 < rep2 && rep2 > rep3 ->
+            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
+        -- Nested widenings: collapse if the signedness is the same
+        | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
+        -- Nested narrowings: collapse
+        | rep1 > rep2 && rep2 > rep3 ->
+            Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
+        | otherwise ->
+            Nothing
+  where
+        isIntConversion (MO_UU_Conv rep1 rep2)
+          = Just (rep1,rep2,False)
+        isIntConversion (MO_SS_Conv rep1 rep2)
+          = Just (rep1,rep2,True)
+        isIntConversion _ = Nothing
+
+        intconv True  = MO_SS_Conv
+        intconv False = MO_UU_Conv
+
+-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
+-- but what if the architecture only supports word-sized loads, should
+-- we do the transformation anyway?
+
+cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
+  = case mop of
+        -- for comparisons: don't forget to narrow the arguments before
+        -- comparing, since they might be out of range.
+        MO_Eq _   -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
+        MO_Ne _   -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
+
+        MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u >  y_u then 1 else 0) (wordWidth dflags))
+        MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
+        MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u <  y_u then 1 else 0) (wordWidth dflags))
+        MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
+
+        MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s >  y_s then 1 else 0) (wordWidth dflags))
+        MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
+        MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s <  y_s then 1 else 0) (wordWidth dflags))
+        MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
+
+        MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
+        MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
+        MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
+        MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
+        MO_U_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem`  y_u) r)
+        MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
+        MO_S_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
+
+        MO_And   r -> Just $ CmmLit (CmmInt (x .&. y) r)
+        MO_Or    r -> Just $ CmmLit (CmmInt (x .|. y) r)
+        MO_Xor   r -> Just $ CmmLit (CmmInt (x `xor` y) r)
+
+        MO_Shl   r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
+        MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
+        MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
+
+        _          -> Nothing
+
+   where
+        x_u = narrowU xrep x
+        y_u = narrowU xrep y
+        x_s = narrowS xrep x
+        y_s = narrowS xrep y
+
+
+-- When possible, shift the constants to the right-hand side, so that we
+-- can match for strength reductions.  Note that the code generator will
+-- also assume that constants have been shifted to the right when
+-- possible.
+
+cmmMachOpFoldM dflags op [x@(CmmLit _), y]
+   | not (isLit y) && isCommutableMachOp op
+   = Just (cmmMachOpFold dflags op [y, x])
+
+-- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
+-- moved to the right, it is more likely that we will find
+-- opportunities for constant folding when the expression is
+-- right-associated.
+--
+-- ToDo: this appears to introduce a quadratic behaviour due to the
+-- nested cmmMachOpFold.  Can we fix this?
+--
+-- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
+-- is also a lit (otherwise arg1 would be on the right).  If we
+-- put arg1 on the left of the rearranged expression, we'll get into a
+-- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
+--
+-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
+-- PicBaseReg from the corresponding label (or label difference).
+--
+cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
+   | mop2 `associates_with` mop1
+     && not (isLit arg1) && not (isPicReg arg1)
+   = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
+   where
+     MO_Add{} `associates_with` MO_Sub{} = True
+     mop1 `associates_with` mop2 =
+        mop1 == mop2 && isAssociativeMachOp mop1
+
+-- special case: (a - b) + c  ==>  a + (c - b)
+cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
+   | not (isLit arg1) && not (isPicReg arg1)
+   = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
+
+-- special case: (PicBaseReg + lit) + N  ==>  PicBaseReg + (lit+N)
+--
+-- this is better because lit+N is a single link-time constant (e.g. a
+-- CmmLabelOff), so the right-hand expression needs only one
+-- instruction, whereas the left needs two.  This happens when pointer
+-- tagging gives us label+offset, and PIC turns the label into
+-- PicBaseReg + label.
+--
+cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
+                          , CmmLit (CmmInt n rep) ]
+  | isPicReg pic
+  = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
+  where off = fromIntegral (narrowS rep n)
+
+-- Make a RegOff if we can
+cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
+  = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+  = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
+  = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+  = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
+
+-- Fold label(+/-)offset into a CmmLit where possible
+
+cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
+  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
+  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
+  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
+
+
+-- Comparison of literal with widened operand: perform the comparison
+-- at the smaller width, as long as the literal is within range.
+
+-- We can't do the reverse trick, when the operand is narrowed:
+-- narrowing throws away bits from the operand, there's no way to do
+-- the same comparison at the larger size.
+
+cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
+  |     -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
+    platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
+        -- if the operand is widened:
+    Just (rep, signed, narrow_fn) <- maybe_conversion conv,
+        -- and this is a comparison operation:
+    Just narrow_cmp <- maybe_comparison cmp rep signed,
+        -- and the literal fits in the smaller size:
+    i == narrow_fn rep i
+        -- then we can do the comparison at the smaller size
+  = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
+ where
+    maybe_conversion (MO_UU_Conv from to)
+        | to > from
+        = Just (from, False, narrowU)
+    maybe_conversion (MO_SS_Conv from to)
+        | to > from
+        = Just (from, True, narrowS)
+
+        -- don't attempt to apply this optimisation when the source
+        -- is a float; see #1916
+    maybe_conversion _ = Nothing
+
+        -- careful (#2080): if the original comparison was signed, but
+        -- we were doing an unsigned widen, then we must do an
+        -- unsigned comparison at the smaller size.
+    maybe_comparison (MO_U_Gt _) rep _     = Just (MO_U_Gt rep)
+    maybe_comparison (MO_U_Ge _) rep _     = Just (MO_U_Ge rep)
+    maybe_comparison (MO_U_Lt _) rep _     = Just (MO_U_Lt rep)
+    maybe_comparison (MO_U_Le _) rep _     = Just (MO_U_Le rep)
+    maybe_comparison (MO_Eq   _) rep _     = Just (MO_Eq   rep)
+    maybe_comparison (MO_S_Gt _) rep True  = Just (MO_S_Gt rep)
+    maybe_comparison (MO_S_Ge _) rep True  = Just (MO_S_Ge rep)
+    maybe_comparison (MO_S_Lt _) rep True  = Just (MO_S_Lt rep)
+    maybe_comparison (MO_S_Le _) rep True  = Just (MO_S_Le rep)
+    maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
+    maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
+    maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
+    maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
+    maybe_comparison _ _ _ = Nothing
+
+-- We can often do something with constants of 0 and 1 ...
+-- See Note [Comparison operators]
+
+cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
+  = case mop of
+        -- Arithmetic
+        MO_Add   _ -> Just x   -- x + 0 = x
+        MO_Sub   _ -> Just x   -- x - 0 = x
+        MO_Mul   _ -> Just y   -- x * 0 = 0
+
+        -- Logical operations
+        MO_And   _ -> Just y   -- x &     0 = 0
+        MO_Or    _ -> Just x   -- x |     0 = x
+        MO_Xor   _ -> Just x   -- x `xor` 0 = x
+
+        -- Shifts
+        MO_Shl   _ -> Just x   -- x << 0 = x
+        MO_S_Shr _ -> Just x   -- ditto shift-right
+        MO_U_Shr _ -> Just x
+
+        -- Comparisons; these ones are trickier
+        -- See Note [Comparison operators]
+        MO_Ne    _ | isComparisonExpr x -> Just x                -- (x > y) != 0  =  x > y
+        MO_Eq    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) == 0  =  x <= y
+        MO_U_Gt  _ | isComparisonExpr x -> Just x                -- (x > y) > 0   =  x > y
+        MO_S_Gt  _ | isComparisonExpr x -> Just x                -- ditto
+        MO_U_Lt  _ | isComparisonExpr x -> Just zero             -- (x > y) < 0  =  0
+        MO_S_Lt  _ | isComparisonExpr x -> Just zero
+        MO_U_Ge  _ | isComparisonExpr x -> Just one              -- (x > y) >= 0  =  1
+        MO_S_Ge  _ | isComparisonExpr x -> Just one
+
+        MO_U_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) <= 0  =  x <= y
+        MO_S_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
+        _ -> Nothing
+  where
+    zero = CmmLit (CmmInt 0 (wordWidth dflags))
+    one  = CmmLit (CmmInt 1 (wordWidth dflags))
+
+cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
+  = case mop of
+        -- Arithmetic: x*1 = x, etc
+        MO_Mul    _ -> Just x
+        MO_S_Quot _ -> Just x
+        MO_U_Quot _ -> Just x
+        MO_S_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
+        MO_U_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
+
+        -- Comparisons; trickier
+        -- See Note [Comparison operators]
+        MO_Ne    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) != 1  =  x<=y
+        MO_Eq    _ | isComparisonExpr x -> Just x                -- (x>y) == 1  =  x>y
+        MO_U_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) < 1   =  x<=y
+        MO_S_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- ditto
+        MO_U_Gt  _ | isComparisonExpr x -> Just zero             -- (x>y) > 1   = 0
+        MO_S_Gt  _ | isComparisonExpr x -> Just zero
+        MO_U_Le  _ | isComparisonExpr x -> Just one              -- (x>y) <= 1  = 1
+        MO_S_Le  _ | isComparisonExpr x -> Just one
+        MO_U_Ge  _ | isComparisonExpr x -> Just x                -- (x>y) >= 1  = x>y
+        MO_S_Ge  _ | isComparisonExpr x -> Just x
+        _ -> Nothing
+  where
+    zero = CmmLit (CmmInt 0 (wordWidth dflags))
+    one  = CmmLit (CmmInt 1 (wordWidth dflags))
+
+-- Now look for multiplication/division by powers of 2 (integers).
+
+cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
+  = case mop of
+        MO_Mul rep
+           | Just p <- exactLog2 n ->
+                 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
+        MO_U_Quot rep
+           | Just p <- exactLog2 n ->
+                 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
+        MO_U_Rem rep
+           | Just _ <- exactLog2 n ->
+                 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
+        MO_S_Quot rep
+           | Just p <- exactLog2 n,
+             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
+                                -- it is a reg.  FIXME: remove this restriction.
+                Just (cmmMachOpFold dflags (MO_S_Shr rep)
+                  [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
+        MO_S_Rem rep
+           | Just p <- exactLog2 n,
+             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
+                                -- it is a reg.  FIXME: remove this restriction.
+                -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
+                -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
+                -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
+                Just (cmmMachOpFold dflags (MO_Sub rep)
+                    [x, cmmMachOpFold dflags (MO_And rep)
+                      [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
+        _ -> Nothing
+  where
+    -- In contrast with unsigned integers, for signed ones
+    -- shift right is not the same as quot, because it rounds
+    -- to minus infinity, whereas quot rounds toward zero.
+    -- To fix this up, we add one less than the divisor to the
+    -- dividend if it is a negative number.
+    --
+    -- to avoid a test/jump, we use the following sequence:
+    --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
+    --      x2 = y & (divisor-1)
+    --      result = x + x2
+    -- this could be done a bit more simply using conditional moves,
+    -- but we're processor independent here.
+    --
+    -- we optimise the divide by 2 case slightly, generating
+    --      x1 = x >> word_size-1  (unsigned)
+    --      return = x + x1
+    signedQuotRemHelper :: Width -> Integer -> CmmExpr
+    signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
+      where
+        bits = fromIntegral (widthInBits rep) - 1
+        shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+        x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
+        x2 = if p == 1 then x1 else
+             CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
+
+-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
+-- Unfortunately this needs a unique supply because x might not be a
+-- register.  See #2253 (program 6) for an example.
+
+
+-- Anything else is just too hard.
+
+cmmMachOpFoldM _ _ _ = Nothing
+
+{- Note [Comparison operators]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+   CmmCondBranch ((x>#y) == 1) t f
+we really want to convert to
+   CmmCondBranch (x>#y) t f
+
+That's what the constant-folding operations on comparison operators do above.
+-}
+
+
+-- -----------------------------------------------------------------------------
+-- Utils
+
+isPicReg :: CmmExpr -> Bool
+isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
+isPicReg _ = False
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
new file mode 100644
index 0000000000..d7235d0167
--- /dev/null
+++ b/compiler/GHC/Cmm/Parser.y
@@ -0,0 +1,1442 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 2004-2012
+--
+-- Parser for concrete Cmm.
+--
+-----------------------------------------------------------------------------
+
+{- -----------------------------------------------------------------------------
+Note [Syntax of .cmm files]
+
+NOTE: You are very much on your own in .cmm.  There is very little
+error checking at all:
+
+  * Type errors are detected by the (optional) -dcmm-lint pass, if you
+    don't turn this on then a type error will likely result in a panic
+    from the native code generator.
+
+  * Passing the wrong number of arguments or arguments of the wrong
+    type is not detected.
+
+There are two ways to write .cmm code:
+
+ (1) High-level Cmm code delegates the stack handling to GHC, and
+     never explicitly mentions Sp or registers.
+
+ (2) Low-level Cmm manages the stack itself, and must know about
+     calling conventions.
+
+Whether you want high-level or low-level Cmm is indicated by the
+presence of an argument list on a procedure.  For example:
+
+foo ( gcptr a, bits32 b )
+{
+  // this is high-level cmm code
+
+  if (b > 0) {
+     // we can make tail calls passing arguments:
+     jump stg_ap_0_fast(a);
+  }
+
+  push (stg_upd_frame_info, a) {
+    // stack frames can be explicitly pushed
+
+    (x,y) = call wibble(a,b,3,4);
+      // calls pass arguments and return results using the native
+      // Haskell calling convention.  The code generator will automatically
+      // construct a stack frame and an info table for the continuation.
+
+    return (x,y);
+      // we can return multiple values from the current proc
+  }
+}
+
+bar
+{
+  // this is low-level cmm code, indicated by the fact that we did not
+  // put an argument list on bar.
+
+  x = R1;  // the calling convention is explicit: better be careful
+           // that this works on all platforms!
+
+  jump %ENTRY_CODE(Sp(0))
+}
+
+Here is a list of rules for high-level and low-level code.  If you
+break the rules, you get a panic (for using a high-level construct in
+a low-level proc), or wrong code (when using low-level code in a
+high-level proc).  This stuff isn't checked! (TODO!)
+
+High-level only:
+
+  - tail-calls with arguments, e.g.
+    jump stg_fun (arg1, arg2);
+
+  - function calls:
+    (ret1,ret2) = call stg_fun (arg1, arg2);
+
+    This makes a call with the NativeNodeCall convention, and the
+    values are returned to the following code using the NativeReturn
+    convention.
+
+  - returning:
+    return (ret1, ret2)
+
+    These use the NativeReturn convention to return zero or more
+    results to the caller.
+
+  - pushing stack frames:
+    push (info_ptr, field1, ..., fieldN) { ... statements ... }
+
+  - reserving temporary stack space:
+
+      reserve N = x { ... }
+
+    this reserves an area of size N (words) on the top of the stack,
+    and binds its address to x (a local register).  Typically this is
+    used for allocating temporary storage for passing to foreign
+    functions.
+
+    Note that if you make any native calls or invoke the GC in the
+    scope of the reserve block, you are responsible for ensuring that
+    the stack you reserved is laid out correctly with an info table.
+
+Low-level only:
+
+  - References to Sp, R1-R8, F1-F4 etc.
+
+    NB. foreign calls may clobber the argument registers R1-R8, F1-F4
+    etc., so ensure they are saved into variables around foreign
+    calls.
+
+  - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp
+    directly.
+
+Both high-level and low-level code can use a raw tail-call:
+
+    jump stg_fun [R1,R2]
+
+NB. you *must* specify the list of GlobalRegs that are passed via a
+jump, otherwise the register allocator will assume that all the
+GlobalRegs are dead at the jump.
+
+
+Calling Conventions
+-------------------
+
+High-level procedures use the NativeNode calling convention, or the
+NativeReturn convention if the 'return' keyword is used (see Stack
+Frames below).
+
+Low-level procedures implement their own calling convention, so it can
+be anything at all.
+
+If a low-level procedure implements the NativeNode calling convention,
+then it can be called by high-level code using an ordinary function
+call.  In general this is hard to arrange because the calling
+convention depends on the number of physical registers available for
+parameter passing, but there are two cases where the calling
+convention is platform-independent:
+
+ - Zero arguments.
+
+ - One argument of pointer or non-pointer word type; this is always
+   passed in R1 according to the NativeNode convention.
+
+ - Returning a single value; these conventions are fixed and platform
+   independent.
+
+
+Stack Frames
+------------
+
+A stack frame is written like this:
+
+INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN )
+               return ( arg1, ..., argM )
+{
+  ... code ...
+}
+
+where field1 ... fieldN are the fields of the stack frame (with types)
+arg1...argN are the values returned to the stack frame (with types).
+The return values are assumed to be passed according to the
+NativeReturn convention.
+
+On entry to the code, the stack frame looks like:
+
+   |----------|
+   | fieldN   |
+   |   ...    |
+   | field1   |
+   |----------|
+   | info_ptr |
+   |----------|
+   |  argN    |
+   |   ...    | <- Sp
+
+and some of the args may be in registers.
+
+We prepend the code by a copyIn of the args, and assign all the stack
+frame fields to their formals.  The initial "arg offset" for stack
+layout purposes consists of the whole stack frame plus any args that
+might be on the stack.
+
+A tail-call may pass a stack frame to the callee using the following
+syntax:
+
+jump f (info_ptr, field1,..,fieldN) (arg1,..,argN)
+
+where info_ptr and field1..fieldN describe the stack frame, and
+arg1..argN are the arguments passed to f using the NativeNodeCall
+convention. Note if a field is longer than a word (e.g. a D_ on
+a 32-bit machine) then the call will push as many words as
+necessary to the stack to accommodate it (e.g. 2).
+
+
+----------------------------------------------------------------------------- -}
+
+{
+{-# LANGUAGE TupleSections #-}
+
+module GHC.Cmm.Parser ( parseCmmFile ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.ExtCode
+import GHC.Cmm.CallConv
+import GHC.StgToCmm.Prof
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Monad hiding ( getCode, getCodeR, getCodeScoped, emitLabel, emit
+                               , emitStore, emitAssign, emitOutOfLine, withUpdFrameOff
+                               , getUpdFrameOff )
+import qualified GHC.StgToCmm.Monad as F
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Foreign
+import GHC.StgToCmm.Expr
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Layout     hiding (ArgRep(..))
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Bind  ( emitBlackHoleCode, emitUpdateFrame )
+import CoreSyn            ( Tickish(SourceNote) )
+
+import GHC.Cmm.Opt
+import GHC.Cmm.Graph
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch     ( mkSwitchTargets )
+import GHC.Cmm.Info
+import GHC.Cmm.BlockId
+import GHC.Cmm.Lexer
+import GHC.Cmm.CLabel
+import GHC.Cmm.Monad
+import GHC.Runtime.Layout
+import Lexer
+
+import CostCentre
+import ForeignCall
+import Module
+import GHC.Platform
+import Literal
+import Unique
+import UniqFM
+import SrcLoc
+import DynFlags
+import ErrUtils
+import StringBuffer
+import FastString
+import Panic
+import Constants
+import Outputable
+import BasicTypes
+import Bag              ( emptyBag, unitBag )
+import Var
+
+import Control.Monad
+import Data.Array
+import Data.Char        ( ord )
+import System.Exit
+import Data.Maybe
+import qualified Data.Map as M
+import qualified Data.ByteString.Char8 as BS8
+
+#include "HsVersions.h"
+}
+
+%expect 0
+
+%token
+        ':'     { L _ (CmmT_SpecChar ':') }
+        ';'     { L _ (CmmT_SpecChar ';') }
+        '{'     { L _ (CmmT_SpecChar '{') }
+        '}'     { L _ (CmmT_SpecChar '}') }
+        '['     { L _ (CmmT_SpecChar '[') }
+        ']'     { L _ (CmmT_SpecChar ']') }
+        '('     { L _ (CmmT_SpecChar '(') }
+        ')'     { L _ (CmmT_SpecChar ')') }
+        '='     { L _ (CmmT_SpecChar '=') }
+        '`'     { L _ (CmmT_SpecChar '`') }
+        '~'     { L _ (CmmT_SpecChar '~') }
+        '/'     { L _ (CmmT_SpecChar '/') }
+        '*'     { L _ (CmmT_SpecChar '*') }
+        '%'     { L _ (CmmT_SpecChar '%') }
+        '-'     { L _ (CmmT_SpecChar '-') }
+        '+'     { L _ (CmmT_SpecChar '+') }
+        '&'     { L _ (CmmT_SpecChar '&') }
+        '^'     { L _ (CmmT_SpecChar '^') }
+        '|'     { L _ (CmmT_SpecChar '|') }
+        '>'     { L _ (CmmT_SpecChar '>') }
+        '<'     { L _ (CmmT_SpecChar '<') }
+        ','     { L _ (CmmT_SpecChar ',') }
+        '!'     { L _ (CmmT_SpecChar '!') }
+
+        '..'    { L _ (CmmT_DotDot) }
+        '::'    { L _ (CmmT_DoubleColon) }
+        '>>'    { L _ (CmmT_Shr) }
+        '<<'    { L _ (CmmT_Shl) }
+        '>='    { L _ (CmmT_Ge) }
+        '<='    { L _ (CmmT_Le) }
+        '=='    { L _ (CmmT_Eq) }
+        '!='    { L _ (CmmT_Ne) }
+        '&&'    { L _ (CmmT_BoolAnd) }
+        '||'    { L _ (CmmT_BoolOr) }
+
+        'True'  { L _ (CmmT_True ) }
+        'False' { L _ (CmmT_False) }
+        'likely'{ L _ (CmmT_likely)}
+
+        'CLOSURE'       { L _ (CmmT_CLOSURE) }
+        'INFO_TABLE'    { L _ (CmmT_INFO_TABLE) }
+        'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) }
+        'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) }
+        'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) }
+        'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) }
+        'else'          { L _ (CmmT_else) }
+        'export'        { L _ (CmmT_export) }
+        'section'       { L _ (CmmT_section) }
+        'goto'          { L _ (CmmT_goto) }
+        'if'            { L _ (CmmT_if) }
+        'call'          { L _ (CmmT_call) }
+        'jump'          { L _ (CmmT_jump) }
+        'foreign'       { L _ (CmmT_foreign) }
+        'never'         { L _ (CmmT_never) }
+        'prim'          { L _ (CmmT_prim) }
+        'reserve'       { L _ (CmmT_reserve) }
+        'return'        { L _ (CmmT_return) }
+        'returns'       { L _ (CmmT_returns) }
+        'import'        { L _ (CmmT_import) }
+        'switch'        { L _ (CmmT_switch) }
+        'case'          { L _ (CmmT_case) }
+        'default'       { L _ (CmmT_default) }
+        'push'          { L _ (CmmT_push) }
+        'unwind'        { L _ (CmmT_unwind) }
+        'bits8'         { L _ (CmmT_bits8) }
+        'bits16'        { L _ (CmmT_bits16) }
+        'bits32'        { L _ (CmmT_bits32) }
+        'bits64'        { L _ (CmmT_bits64) }
+        'bits128'       { L _ (CmmT_bits128) }
+        'bits256'       { L _ (CmmT_bits256) }
+        'bits512'       { L _ (CmmT_bits512) }
+        'float32'       { L _ (CmmT_float32) }
+        'float64'       { L _ (CmmT_float64) }
+        'gcptr'         { L _ (CmmT_gcptr) }
+
+        GLOBALREG       { L _ (CmmT_GlobalReg   $$) }
+        NAME            { L _ (CmmT_Name        $$) }
+        STRING          { L _ (CmmT_String      $$) }
+        INT             { L _ (CmmT_Int         $$) }
+        FLOAT           { L _ (CmmT_Float       $$) }
+
+%monad { PD } { >>= } { return }
+%lexer { cmmlex } { L _ CmmT_EOF }
+%name cmmParse cmm
+%tokentype { Located CmmToken }
+
+-- C-- operator precedences, taken from the C-- spec
+%right '||'     -- non-std extension, called %disjoin in C--
+%right '&&'     -- non-std extension, called %conjoin in C--
+%right '!'
+%nonassoc '>=' '>' '<=' '<' '!=' '=='
+%left '|'
+%left '^'
+%left '&'
+%left '>>' '<<'
+%left '-' '+'
+%left '/' '*' '%'
+%right '~'
+
+%%
+
+cmm     :: { CmmParse () }
+        : {- empty -}                   { return () }
+        | cmmtop cmm                    { do $1; $2 }
+
+cmmtop  :: { CmmParse () }
+        : cmmproc                       { $1 }
+        | cmmdata                       { $1 }
+        | decl                          { $1 }
+        | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
+                {% liftP . withThisPackage $ \pkg ->
+                   do lits <- sequence $6;
+                      staticClosure pkg $3 $5 (map getLit lits) }
+
+-- The only static closures in the RTS are dummy closures like
+-- stg_END_TSO_QUEUE_closure and stg_dummy_ret.  We don't need
+-- to provide the full generality of static closures here.
+-- In particular:
+--      * CCS can always be CCS_DONT_CARE
+--      * closure is always extern
+--      * payload is always empty
+--      * we can derive closure and info table labels from a single NAME
+
+cmmdata :: { CmmParse () }
+        : 'section' STRING '{' data_label statics '}'
+                { do lbl <- $4;
+                     ss <- sequence $5;
+                     code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
+
+data_label :: { CmmParse CLabel }
+    : NAME ':'
+                {% liftP . withThisPackage $ \pkg ->
+                   return (mkCmmDataLabel pkg $1) }
+
+statics :: { [CmmParse [CmmStatic]] }
+        : {- empty -}                   { [] }
+        | static statics                { $1 : $2 }
+
+static  :: { CmmParse [CmmStatic] }
+        : type expr ';' { do e <- $2;
+                             return [CmmStaticLit (getLit e)] }
+        | type ';'                      { return [CmmUninitialised
+                                                        (widthInBytes (typeWidth $1))] }
+        | 'bits8' '[' ']' STRING ';'    { return [mkString $4] }
+        | 'bits8' '[' INT ']' ';'       { return [CmmUninitialised
+                                                        (fromIntegral $3)] }
+        | typenot8 '[' INT ']' ';'      { return [CmmUninitialised
+                                                (widthInBytes (typeWidth $1) *
+                                                        fromIntegral $3)] }
+        | 'CLOSURE' '(' NAME lits ')'
+                { do { lits <- sequence $4
+                ; dflags <- getDynFlags
+                     ; return $ map CmmStaticLit $
+                        mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
+                         -- mkForeignLabel because these are only used
+                         -- for CHARLIKE and INTLIKE closures in the RTS.
+                        dontCareCCS (map getLit lits) [] [] [] } }
+        -- arrays of closures required for the CHARLIKE & INTLIKE arrays
+
+lits    :: { [CmmParse CmmExpr] }
+        : {- empty -}           { [] }
+        | ',' expr lits         { $2 : $3 }
+
+cmmproc :: { CmmParse () }
+        : info maybe_conv maybe_formals maybe_body
+                { do ((entry_ret_label, info, stk_formals, formals), agraph) <-
+                       getCodeScoped $ loopDecls $ do {
+                         (entry_ret_label, info, stk_formals) <- $1;
+                         dflags <- getDynFlags;
+                         formals <- sequence (fromMaybe [] $3);
+                         withName (showSDoc dflags (ppr entry_ret_label))
+                           $4;
+                         return (entry_ret_label, info, stk_formals, formals) }
+                     let do_layout = isJust $3
+                     code (emitProcWithStackFrame $2 info
+                                entry_ret_label stk_formals formals agraph
+                                do_layout ) }
+
+maybe_conv :: { Convention }
+           : {- empty -}        { NativeNodeCall }
+           | 'return'           { NativeReturn }
+
+maybe_body :: { CmmParse () }
+           : ';'                { return () }
+           | '{' body '}'       { withSourceNote $1 $3 $2 }
+
+info    :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
+        : NAME
+                {% liftP . withThisPackage $ \pkg ->
+                   do   newFunctionName $1 pkg
+                        return (mkCmmCodeLabel pkg $1, Nothing, []) }
+
+
+        | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
+                -- ptrs, nptrs, closure type, description, type
+                {% liftP . withThisPackage $ \pkg ->
+                   do dflags <- getDynFlags
+                      let prof = profilingInfo dflags $11 $13
+                          rep  = mkRTSRep (fromIntegral $9) $
+                                   mkHeapRep dflags False (fromIntegral $5)
+                                                   (fromIntegral $7) Thunk
+                              -- not really Thunk, but that makes the info table
+                              -- we want.
+                      return (mkCmmEntryLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                              []) }
+
+        | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
+                -- ptrs, nptrs, closure type, description, type, fun type
+                {% liftP . withThisPackage $ \pkg ->
+                   do dflags <- getDynFlags
+                      let prof = profilingInfo dflags $11 $13
+                          ty   = Fun 0 (ArgSpec (fromIntegral $15))
+                                -- Arity zero, arg_type $15
+                          rep = mkRTSRep (fromIntegral $9) $
+                                    mkHeapRep dflags False (fromIntegral $5)
+                                                    (fromIntegral $7) ty
+                      return (mkCmmEntryLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                              []) }
+                -- we leave most of the fields zero here.  This is only used
+                -- to generate the BCO info table in the RTS at the moment.
+
+        | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
+                -- ptrs, nptrs, tag, closure type, description, type
+                {% liftP . withThisPackage $ \pkg ->
+                   do dflags <- getDynFlags
+                      let prof = profilingInfo dflags $13 $15
+                          ty  = Constr (fromIntegral $9)  -- Tag
+                                       (BS8.pack $13)
+                          rep = mkRTSRep (fromIntegral $11) $
+                                  mkHeapRep dflags False (fromIntegral $5)
+                                                  (fromIntegral $7) ty
+                      return (mkCmmEntryLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
+                              []) }
+
+                     -- If profiling is on, this string gets duplicated,
+                     -- but that's the way the old code did it we can fix it some other time.
+
+        | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
+                -- selector, closure type, description, type
+                {% liftP . withThisPackage $ \pkg ->
+                   do dflags <- getDynFlags
+                      let prof = profilingInfo dflags $9 $11
+                          ty  = ThunkSelector (fromIntegral $5)
+                          rep = mkRTSRep (fromIntegral $7) $
+                                   mkHeapRep dflags False 0 0 ty
+                      return (mkCmmEntryLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                              []) }
+
+        | 'INFO_TABLE_RET' '(' NAME ',' INT ')'
+                -- closure type (no live regs)
+                {% liftP . withThisPackage $ \pkg ->
+                   do let prof = NoProfilingInfo
+                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep []
+                      return (mkCmmRetLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                              []) }
+
+        | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
+                -- closure type, live regs
+                {% liftP . withThisPackage $ \pkg ->
+                   do dflags <- getDynFlags
+                      live <- sequence $7
+                      let prof = NoProfilingInfo
+                          -- drop one for the info pointer
+                          bitmap = mkLiveness dflags (drop 1 live)
+                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
+                      return (mkCmmRetLabel pkg $3,
+                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
+                                           , cit_rep = rep
+                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+                              live) }
+
+body    :: { CmmParse () }
+        : {- empty -}                   { return () }
+        | decl body                     { do $1; $2 }
+        | stmt body                     { do $1; $2 }
+
+decl    :: { CmmParse () }
+        : type names ';'                { mapM_ (newLocal $1) $2 }
+        | 'import' importNames ';'      { mapM_ newImport $2 }
+        | 'export' names ';'            { return () }  -- ignore exports
+
+
+-- an imported function name, with optional packageId
+importNames
+        :: { [(FastString, CLabel)] }
+        : importName                    { [$1] }
+        | importName ',' importNames    { $1 : $3 }
+
+importName
+        :: { (FastString,  CLabel) }
+
+        -- A label imported without an explicit packageId.
+        --      These are taken to come from some foreign, unnamed package.
+        : NAME
+        { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) }
+
+        -- as previous 'NAME', but 'IsData'
+        | 'CLOSURE' NAME
+        { ($2, mkForeignLabel $2 Nothing ForeignLabelInExternalPackage IsData) }
+
+        -- A label imported with an explicit packageId.
+        | STRING NAME
+        { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) }
+
+
+names   :: { [FastString] }
+        : NAME                          { [$1] }
+        | NAME ',' names                { $1 : $3 }
+
+stmt    :: { CmmParse () }
+        : ';'                                   { return () }
+
+        | NAME ':'
+                { do l <- newLabel $1; emitLabel l }
+
+
+
+        | lreg '=' expr ';'
+                { do reg <- $1; e <- $3; withSourceNote $2 $4 (emitAssign reg e) }
+        | type '[' expr ']' '=' expr ';'
+                { withSourceNote $2 $7 (doStore $1 $3 $6) }
+
+        -- Gah! We really want to say "foreign_results" but that causes
+        -- a shift/reduce conflict with assignment.  We either
+        -- we expand out the no-result and single result cases or
+        -- we tweak the syntax to avoid the conflict.  The later
+        -- option is taken here because the other way would require
+        -- multiple levels of expanding and get unwieldy.
+        | foreign_results 'foreign' STRING foreignLabel '(' cmm_hint_exprs0 ')' safety opt_never_returns ';'
+                {% foreignCall $3 $1 $4 $6 $8 $9 }
+        | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';'
+                {% primCall $1 $4 $6 }
+        -- stmt-level macros, stealing syntax from ordinary C-- function calls.
+        -- Perhaps we ought to use the %%-form?
+        | NAME '(' exprs0 ')' ';'
+                {% stmtMacro $1 $3  }
+        | 'switch' maybe_range expr '{' arms default '}'
+                { do as <- sequence $5; doSwitch $2 $3 as $6 }
+        | 'goto' NAME ';'
+                { do l <- lookupLabel $2; emit (mkBranch l) }
+        | 'return' '(' exprs0 ')' ';'
+                { doReturn $3 }
+        | 'jump' expr vols ';'
+                { doRawJump $2 $3 }
+        | 'jump' expr '(' exprs0 ')' ';'
+                { doJumpWithStack $2 [] $4 }
+        | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';'
+                { doJumpWithStack $2 $4 $7 }
+        | 'call' expr '(' exprs0 ')' ';'
+                { doCall $2 [] $4 }
+        | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';'
+                { doCall $6 $2 $8 }
+        | 'if' bool_expr cond_likely 'goto' NAME
+                { do l <- lookupLabel $5; cmmRawIf $2 l $3 }
+        | 'if' bool_expr cond_likely '{' body '}' else
+                { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 }
+        | 'push' '(' exprs0 ')' maybe_body
+                { pushStackFrame $3 $5 }
+        | 'reserve' expr '=' lreg maybe_body
+                { reserveStackFrame $2 $4 $5 }
+        | 'unwind' unwind_regs ';'
+                { $2 >>= code . emitUnwind }
+
+unwind_regs
+        :: { CmmParse [(GlobalReg, Maybe CmmExpr)] }
+        : GLOBALREG '=' expr_or_unknown ',' unwind_regs
+                { do e <- $3; rest <- $5; return (($1, e) : rest) }
+        | GLOBALREG '=' expr_or_unknown
+                { do e <- $3; return [($1, e)] }
+
+-- | Used by unwind to indicate unknown unwinding values.
+expr_or_unknown
+        :: { CmmParse (Maybe CmmExpr) }
+        : 'return'
+                { do return Nothing }
+        | expr
+                { do e <- $1; return (Just e) }
+
+foreignLabel     :: { CmmParse CmmExpr }
+        : NAME                          { return (CmmLit (CmmLabel (mkForeignLabel $1 Nothing ForeignLabelInThisPackage IsFunction))) }
+
+opt_never_returns :: { CmmReturnInfo }
+        :                               { CmmMayReturn }
+        | 'never' 'returns'             { CmmNeverReturns }
+
+bool_expr :: { CmmParse BoolExpr }
+        : bool_op                       { $1 }
+        | expr                          { do e <- $1; return (BoolTest e) }
+
+bool_op :: { CmmParse BoolExpr }
+        : bool_expr '&&' bool_expr      { do e1 <- $1; e2 <- $3;
+                                          return (BoolAnd e1 e2) }
+        | bool_expr '||' bool_expr      { do e1 <- $1; e2 <- $3;
+                                          return (BoolOr e1 e2)  }
+        | '!' bool_expr                 { do e <- $2; return (BoolNot e) }
+        | '(' bool_op ')'               { $2 }
+
+safety  :: { Safety }
+        : {- empty -}                   { PlayRisky }
+        | STRING                        {% parseSafety $1 }
+
+vols    :: { [GlobalReg] }
+        : '[' ']'                       { [] }
+        | '[' '*' ']'                   {% do df <- getDynFlags
+                                         ; return (realArgRegsCover df) }
+                                           -- All of them. See comment attached
+                                           -- to realArgRegsCover
+        | '[' globals ']'               { $2 }
+
+globals :: { [GlobalReg] }
+        : GLOBALREG                     { [$1] }
+        | GLOBALREG ',' globals         { $1 : $3 }
+
+maybe_range :: { Maybe (Integer,Integer) }
+        : '[' INT '..' INT ']'  { Just ($2, $4) }
+        | {- empty -}           { Nothing }
+
+arms    :: { [CmmParse ([Integer],Either BlockId (CmmParse ()))] }
+        : {- empty -}                   { [] }
+        | arm arms                      { $1 : $2 }
+
+arm     :: { CmmParse ([Integer],Either BlockId (CmmParse ())) }
+        : 'case' ints ':' arm_body      { do b <- $4; return ($2, b) }
+
+arm_body :: { CmmParse (Either BlockId (CmmParse ())) }
+        : '{' body '}'                  { return (Right (withSourceNote $1 $3 $2)) }
+        | 'goto' NAME ';'               { do l <- lookupLabel $2; return (Left l) }
+
+ints    :: { [Integer] }
+        : INT                           { [ $1 ] }
+        | INT ',' ints                  { $1 : $3 }
+
+default :: { Maybe (CmmParse ()) }
+        : 'default' ':' '{' body '}'    { Just (withSourceNote $3 $5 $4) }
+        -- taking a few liberties with the C-- syntax here; C-- doesn't have
+        -- 'default' branches
+        | {- empty -}                   { Nothing }
+
+-- Note: OldCmm doesn't support a first class 'else' statement, though
+-- CmmNode does.
+else    :: { CmmParse () }
+        : {- empty -}                   { return () }
+        | 'else' '{' body '}'           { withSourceNote $2 $4 $3 }
+
+cond_likely :: { Maybe Bool }
+        : '(' 'likely' ':' 'True'  ')'  { Just True  }
+        | '(' 'likely' ':' 'False' ')'  { Just False }
+        | {- empty -}                   { Nothing }
+
+
+-- we have to write this out longhand so that Happy's precedence rules
+-- can kick in.
+expr    :: { CmmParse CmmExpr }
+        : expr '/' expr                 { mkMachOp MO_U_Quot [$1,$3] }
+        | expr '*' expr                 { mkMachOp MO_Mul [$1,$3] }
+        | expr '%' expr                 { mkMachOp MO_U_Rem [$1,$3] }
+        | expr '-' expr                 { mkMachOp MO_Sub [$1,$3] }
+        | expr '+' expr                 { mkMachOp MO_Add [$1,$3] }
+        | expr '>>' expr                { mkMachOp MO_U_Shr [$1,$3] }
+        | expr '<<' expr                { mkMachOp MO_Shl [$1,$3] }
+        | expr '&' expr                 { mkMachOp MO_And [$1,$3] }
+        | expr '^' expr                 { mkMachOp MO_Xor [$1,$3] }
+        | expr '|' expr                 { mkMachOp MO_Or [$1,$3] }
+        | expr '>=' expr                { mkMachOp MO_U_Ge [$1,$3] }
+        | expr '>' expr                 { mkMachOp MO_U_Gt [$1,$3] }
+        | expr '<=' expr                { mkMachOp MO_U_Le [$1,$3] }
+        | expr '<' expr                 { mkMachOp MO_U_Lt [$1,$3] }
+        | expr '!=' expr                { mkMachOp MO_Ne [$1,$3] }
+        | expr '==' expr                { mkMachOp MO_Eq [$1,$3] }
+        | '~' expr                      { mkMachOp MO_Not [$2] }
+        | '-' expr                      { mkMachOp MO_S_Neg [$2] }
+        | expr0 '`' NAME '`' expr0      {% do { mo <- nameToMachOp $3 ;
+                                                return (mkMachOp mo [$1,$5]) } }
+        | expr0                         { $1 }
+
+expr0   :: { CmmParse CmmExpr }
+        : INT   maybe_ty         { return (CmmLit (CmmInt $1 (typeWidth $2))) }
+        | FLOAT maybe_ty         { return (CmmLit (CmmFloat $1 (typeWidth $2))) }
+        | STRING                 { do s <- code (newStringCLit $1);
+                                      return (CmmLit s) }
+        | reg                    { $1 }
+        | type '[' expr ']'      { do e <- $3; return (CmmLoad e $1) }
+        | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 }
+        | '(' expr ')'           { $2 }
+
+
+-- leaving out the type of a literal gives you the native word size in C--
+maybe_ty :: { CmmType }
+        : {- empty -}                   {% do dflags <- getDynFlags; return $ bWord dflags }
+        | '::' type                     { $2 }
+
+cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] }
+        : {- empty -}                   { [] }
+        | cmm_hint_exprs                { $1 }
+
+cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] }
+        : cmm_hint_expr                 { [$1] }
+        | cmm_hint_expr ',' cmm_hint_exprs      { $1 : $3 }
+
+cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) }
+        : expr                          { do e <- $1;
+                                             return (e, inferCmmHint e) }
+        | expr STRING                   {% do h <- parseCmmHint $2;
+                                              return $ do
+                                                e <- $1; return (e, h) }
+
+exprs0  :: { [CmmParse CmmExpr] }
+        : {- empty -}                   { [] }
+        | exprs                         { $1 }
+
+exprs   :: { [CmmParse CmmExpr] }
+        : expr                          { [ $1 ] }
+        | expr ',' exprs                { $1 : $3 }
+
+reg     :: { CmmParse CmmExpr }
+        : NAME                  { lookupName $1 }
+        | GLOBALREG             { return (CmmReg (CmmGlobal $1)) }
+
+foreign_results :: { [CmmParse (LocalReg, ForeignHint)] }
+        : {- empty -}                   { [] }
+        | '(' foreign_formals ')' '='   { $2 }
+
+foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] }
+        : foreign_formal                        { [$1] }
+        | foreign_formal ','                    { [$1] }
+        | foreign_formal ',' foreign_formals    { $1 : $3 }
+
+foreign_formal :: { CmmParse (LocalReg, ForeignHint) }
+        : local_lreg            { do e <- $1; return (e, inferCmmHint (CmmReg (CmmLocal e))) }
+        | STRING local_lreg     {% do h <- parseCmmHint $1;
+                                      return $ do
+                                         e <- $2; return (e,h) }
+
+local_lreg :: { CmmParse LocalReg }
+        : NAME                  { do e <- lookupName $1;
+                                     return $
+                                       case e of
+                                        CmmReg (CmmLocal r) -> r
+                                        other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") }
+
+lreg    :: { CmmParse CmmReg }
+        : NAME                  { do e <- lookupName $1;
+                                     return $
+                                       case e of
+                                        CmmReg r -> r
+                                        other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") }
+        | GLOBALREG             { return (CmmGlobal $1) }
+
+maybe_formals :: { Maybe [CmmParse LocalReg] }
+        : {- empty -}           { Nothing }
+        | '(' formals0 ')'      { Just $2 }
+
+formals0 :: { [CmmParse LocalReg] }
+        : {- empty -}           { [] }
+        | formals               { $1 }
+
+formals :: { [CmmParse LocalReg] }
+        : formal ','            { [$1] }
+        | formal                { [$1] }
+        | formal ',' formals       { $1 : $3 }
+
+formal :: { CmmParse LocalReg }
+        : type NAME             { newLocal $1 $2 }
+
+type    :: { CmmType }
+        : 'bits8'               { b8 }
+        | typenot8              { $1 }
+
+typenot8 :: { CmmType }
+        : 'bits16'              { b16 }
+        | 'bits32'              { b32 }
+        | 'bits64'              { b64 }
+        | 'bits128'             { b128 }
+        | 'bits256'             { b256 }
+        | 'bits512'             { b512 }
+        | 'float32'             { f32 }
+        | 'float64'             { f64 }
+        | 'gcptr'               {% do dflags <- getDynFlags; return $ gcWord dflags }
+
+{
+section :: String -> SectionType
+section "text"      = Text
+section "data"      = Data
+section "rodata"    = ReadOnlyData
+section "relrodata" = RelocatableReadOnlyData
+section "bss"       = UninitialisedData
+section s           = OtherSection s
+
+mkString :: String -> CmmStatic
+mkString s = CmmString (BS8.pack s)
+
+-- |
+-- Given an info table, decide what the entry convention for the proc
+-- is.  That is, for an INFO_TABLE_RET we want the return convention,
+-- otherwise it is a NativeNodeCall.
+--
+infoConv :: Maybe CmmInfoTable -> Convention
+infoConv Nothing = NativeNodeCall
+infoConv (Just info)
+  | isStackRep (cit_rep info) = NativeReturn
+  | otherwise                 = NativeNodeCall
+
+-- mkMachOp infers the type of the MachOp from the type of its first
+-- argument.  We assume that this is correct: for MachOps that don't have
+-- symmetrical args (e.g. shift ops), the first arg determines the type of
+-- the op.
+mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr
+mkMachOp fn args = do
+  dflags <- getDynFlags
+  arg_exprs <- sequence args
+  return (CmmMachOp (fn (typeWidth (cmmExprType dflags (head arg_exprs)))) arg_exprs)
+
+getLit :: CmmExpr -> CmmLit
+getLit (CmmLit l) = l
+getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)])  = CmmInt (negate i) r
+getLit _ = panic "invalid literal" -- TODO messy failure
+
+nameToMachOp :: FastString -> PD (Width -> MachOp)
+nameToMachOp name =
+  case lookupUFM machOps name of
+        Nothing -> fail ("unknown primitive " ++ unpackFS name)
+        Just m  -> return m
+
+exprOp :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse CmmExpr)
+exprOp name args_code = do
+  dflags <- getDynFlags
+  case lookupUFM (exprMacros dflags) name of
+     Just f  -> return $ do
+        args <- sequence args_code
+        return (f args)
+     Nothing -> do
+        mo <- nameToMachOp name
+        return $ mkMachOp mo args_code
+
+exprMacros :: DynFlags -> UniqFM ([CmmExpr] -> CmmExpr)
+exprMacros dflags = listToUFM [
+  ( fsLit "ENTRY_CODE",   \ [x] -> entryCode dflags x ),
+  ( fsLit "INFO_PTR",     \ [x] -> closureInfoPtr dflags x ),
+  ( fsLit "STD_INFO",     \ [x] -> infoTable dflags x ),
+  ( fsLit "FUN_INFO",     \ [x] -> funInfoTable dflags x ),
+  ( fsLit "GET_ENTRY",    \ [x] -> entryCode dflags (closureInfoPtr dflags x) ),
+  ( fsLit "GET_STD_INFO", \ [x] -> infoTable dflags (closureInfoPtr dflags x) ),
+  ( fsLit "GET_FUN_INFO", \ [x] -> funInfoTable dflags (closureInfoPtr dflags x) ),
+  ( fsLit "INFO_TYPE",    \ [x] -> infoTableClosureType dflags x ),
+  ( fsLit "INFO_PTRS",    \ [x] -> infoTablePtrs dflags x ),
+  ( fsLit "INFO_NPTRS",   \ [x] -> infoTableNonPtrs dflags x )
+  ]
+
+-- we understand a subset of C-- primitives:
+machOps = listToUFM $
+        map (\(x, y) -> (mkFastString x, y)) [
+        ( "add",        MO_Add ),
+        ( "sub",        MO_Sub ),
+        ( "eq",         MO_Eq ),
+        ( "ne",         MO_Ne ),
+        ( "mul",        MO_Mul ),
+        ( "neg",        MO_S_Neg ),
+        ( "quot",       MO_S_Quot ),
+        ( "rem",        MO_S_Rem ),
+        ( "divu",       MO_U_Quot ),
+        ( "modu",       MO_U_Rem ),
+
+        ( "ge",         MO_S_Ge ),
+        ( "le",         MO_S_Le ),
+        ( "gt",         MO_S_Gt ),
+        ( "lt",         MO_S_Lt ),
+
+        ( "geu",        MO_U_Ge ),
+        ( "leu",        MO_U_Le ),
+        ( "gtu",        MO_U_Gt ),
+        ( "ltu",        MO_U_Lt ),
+
+        ( "and",        MO_And ),
+        ( "or",         MO_Or ),
+        ( "xor",        MO_Xor ),
+        ( "com",        MO_Not ),
+        ( "shl",        MO_Shl ),
+        ( "shrl",       MO_U_Shr ),
+        ( "shra",       MO_S_Shr ),
+
+        ( "fadd",       MO_F_Add ),
+        ( "fsub",       MO_F_Sub ),
+        ( "fneg",       MO_F_Neg ),
+        ( "fmul",       MO_F_Mul ),
+        ( "fquot",      MO_F_Quot ),
+
+        ( "feq",        MO_F_Eq ),
+        ( "fne",        MO_F_Ne ),
+        ( "fge",        MO_F_Ge ),
+        ( "fle",        MO_F_Le ),
+        ( "fgt",        MO_F_Gt ),
+        ( "flt",        MO_F_Lt ),
+
+        ( "lobits8",  flip MO_UU_Conv W8  ),
+        ( "lobits16", flip MO_UU_Conv W16 ),
+        ( "lobits32", flip MO_UU_Conv W32 ),
+        ( "lobits64", flip MO_UU_Conv W64 ),
+
+        ( "zx16",     flip MO_UU_Conv W16 ),
+        ( "zx32",     flip MO_UU_Conv W32 ),
+        ( "zx64",     flip MO_UU_Conv W64 ),
+
+        ( "sx16",     flip MO_SS_Conv W16 ),
+        ( "sx32",     flip MO_SS_Conv W32 ),
+        ( "sx64",     flip MO_SS_Conv W64 ),
+
+        ( "f2f32",    flip MO_FF_Conv W32 ),  -- TODO; rounding mode
+        ( "f2f64",    flip MO_FF_Conv W64 ),  -- TODO; rounding mode
+        ( "f2i8",     flip MO_FS_Conv W8 ),
+        ( "f2i16",    flip MO_FS_Conv W16 ),
+        ( "f2i32",    flip MO_FS_Conv W32 ),
+        ( "f2i64",    flip MO_FS_Conv W64 ),
+        ( "i2f32",    flip MO_SF_Conv W32 ),
+        ( "i2f64",    flip MO_SF_Conv W64 )
+        ]
+
+callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
+callishMachOps = listToUFM $
+        map (\(x, y) -> (mkFastString x, y)) [
+        ( "read_barrier", (MO_ReadBarrier,)),
+        ( "write_barrier", (MO_WriteBarrier,)),
+        ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
+        ( "memset", memcpyLikeTweakArgs MO_Memset ),
+        ( "memmove", memcpyLikeTweakArgs MO_Memmove ),
+        ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ),
+
+        ("prefetch0", (MO_Prefetch_Data 0,)),
+        ("prefetch1", (MO_Prefetch_Data 1,)),
+        ("prefetch2", (MO_Prefetch_Data 2,)),
+        ("prefetch3", (MO_Prefetch_Data 3,)),
+
+        ( "popcnt8",  (MO_PopCnt W8,)),
+        ( "popcnt16", (MO_PopCnt W16,)),
+        ( "popcnt32", (MO_PopCnt W32,)),
+        ( "popcnt64", (MO_PopCnt W64,)),
+
+        ( "pdep8",  (MO_Pdep W8,)),
+        ( "pdep16", (MO_Pdep W16,)),
+        ( "pdep32", (MO_Pdep W32,)),
+        ( "pdep64", (MO_Pdep W64,)),
+
+        ( "pext8",  (MO_Pext W8,)),
+        ( "pext16", (MO_Pext W16,)),
+        ( "pext32", (MO_Pext W32,)),
+        ( "pext64", (MO_Pext W64,)),
+
+        ( "cmpxchg8",  (MO_Cmpxchg W8,)),
+        ( "cmpxchg16", (MO_Cmpxchg W16,)),
+        ( "cmpxchg32", (MO_Cmpxchg W32,)),
+        ( "cmpxchg64", (MO_Cmpxchg W64,))
+
+        -- ToDo: the rest, maybe
+        -- edit: which rest?
+        -- also: how do we tell CMM Lint how to type check callish macops?
+    ]
+  where
+    memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr])
+    memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument"
+    memcpyLikeTweakArgs op args@(_:_) =
+        (op align, args')
+      where
+        args' = init args
+        align = case last args of
+          CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger
+          e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e)
+        -- The alignment of memcpy-ish operations must be a
+        -- compile-time constant. We verify this here, passing it around
+        -- in the MO_* constructor. In order to do this, however, we
+        -- must intercept the arguments in primCall.
+
+parseSafety :: String -> PD Safety
+parseSafety "safe"   = return PlaySafe
+parseSafety "unsafe" = return PlayRisky
+parseSafety "interruptible" = return PlayInterruptible
+parseSafety str      = fail ("unrecognised safety: " ++ str)
+
+parseCmmHint :: String -> PD ForeignHint
+parseCmmHint "ptr"    = return AddrHint
+parseCmmHint "signed" = return SignedHint
+parseCmmHint str      = fail ("unrecognised hint: " ++ str)
+
+-- labels are always pointers, so we might as well infer the hint
+inferCmmHint :: CmmExpr -> ForeignHint
+inferCmmHint (CmmLit (CmmLabel _)) = AddrHint
+inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint
+inferCmmHint _ = NoHint
+
+isPtrGlobalReg Sp                    = True
+isPtrGlobalReg SpLim                 = True
+isPtrGlobalReg Hp                    = True
+isPtrGlobalReg HpLim                 = True
+isPtrGlobalReg CCCS                  = True
+isPtrGlobalReg CurrentTSO            = True
+isPtrGlobalReg CurrentNursery        = True
+isPtrGlobalReg (VanillaReg _ VGcPtr) = True
+isPtrGlobalReg _                     = False
+
+happyError :: PD a
+happyError = PD $ \_ s -> unP srcParseFail s
+
+-- -----------------------------------------------------------------------------
+-- Statement-level macros
+
+stmtMacro :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse ())
+stmtMacro fun args_code = do
+  case lookupUFM stmtMacros fun of
+    Nothing -> fail ("unknown macro: " ++ unpackFS fun)
+    Just fcode -> return $ do
+        args <- sequence args_code
+        code (fcode args)
+
+stmtMacros :: UniqFM ([CmmExpr] -> FCode ())
+stmtMacros = listToUFM [
+  ( fsLit "CCS_ALLOC",             \[words,ccs]  -> profAlloc words ccs ),
+  ( fsLit "ENTER_CCS_THUNK",       \[e] -> enterCostCentreThunk e ),
+
+  ( fsLit "CLOSE_NURSERY",         \[]  -> emitCloseNursery ),
+  ( fsLit "OPEN_NURSERY",          \[]  -> emitOpenNursery ),
+
+  -- completely generic heap and stack checks, for use in high-level cmm.
+  ( fsLit "HP_CHK_GEN",            \[bytes] ->
+                                      heapStackCheckGen Nothing (Just bytes) ),
+  ( fsLit "STK_CHK_GEN",           \[] ->
+                                      heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ),
+
+  -- A stack check for a fixed amount of stack.  Sounds a bit strange, but
+  -- we use the stack for a bit of temporary storage in a couple of primops
+  ( fsLit "STK_CHK_GEN_N",         \[bytes] ->
+                                      heapStackCheckGen (Just bytes) Nothing ),
+
+  -- A stack check on entry to a thunk, where the argument is the thunk pointer.
+  ( fsLit "STK_CHK_NP"   ,         \[node] -> entryHeapCheck' False node 0 [] (return ())),
+
+  ( fsLit "LOAD_THREAD_STATE",     \[] -> emitLoadThreadState ),
+  ( fsLit "SAVE_THREAD_STATE",     \[] -> emitSaveThreadState ),
+
+  ( fsLit "LDV_ENTER",             \[e] -> ldvEnter e ),
+  ( fsLit "LDV_RECORD_CREATE",     \[e] -> ldvRecordCreate e ),
+
+  ( fsLit "PUSH_UPD_FRAME",        \[sp,e] -> emitPushUpdateFrame sp e ),
+  ( fsLit "SET_HDR",               \[ptr,info,ccs] ->
+                                        emitSetDynHdr ptr info ccs ),
+  ( fsLit "TICK_ALLOC_PRIM",       \[hdr,goods,slop] ->
+                                        tickyAllocPrim hdr goods slop ),
+  ( fsLit "TICK_ALLOC_PAP",        \[goods,slop] ->
+                                        tickyAllocPAP goods slop ),
+  ( fsLit "TICK_ALLOC_UP_THK",     \[goods,slop] ->
+                                        tickyAllocThunk goods slop ),
+  ( fsLit "UPD_BH_UPDATABLE",      \[reg] -> emitBlackHoleCode reg )
+ ]
+
+emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode ()
+emitPushUpdateFrame sp e = do
+  dflags <- getDynFlags
+  emitUpdateFrame dflags sp mkUpdInfoLabel e
+
+pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse ()
+pushStackFrame fields body = do
+  dflags <- getDynFlags
+  exprs <- sequence fields
+  updfr_off <- getUpdFrameOff
+  let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old
+                                           [] updfr_off exprs
+  emit g
+  withUpdFrameOff new_updfr_off body
+
+reserveStackFrame
+  :: CmmParse CmmExpr
+  -> CmmParse CmmReg
+  -> CmmParse ()
+  -> CmmParse ()
+reserveStackFrame psize preg body = do
+  dflags <- getDynFlags
+  old_updfr_off <- getUpdFrameOff
+  reg <- preg
+  esize <- psize
+  let size = case constantFoldExpr dflags esize of
+               CmmLit (CmmInt n _) -> n
+               _other -> pprPanic "CmmParse: not a compile-time integer: "
+                            (ppr esize)
+  let frame = old_updfr_off + wORD_SIZE dflags * fromIntegral size
+  emitAssign reg (CmmStackSlot Old frame)
+  withUpdFrameOff frame body
+
+profilingInfo dflags desc_str ty_str
+  = if not (gopt Opt_SccProfilingOn dflags)
+    then NoProfilingInfo
+    else ProfilingInfo (BS8.pack desc_str) (BS8.pack ty_str)
+
+staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
+staticClosure pkg cl_label info payload
+  = do dflags <- getDynFlags
+       let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
+       code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
+
+foreignCall
+        :: String
+        -> [CmmParse (LocalReg, ForeignHint)]
+        -> CmmParse CmmExpr
+        -> [CmmParse (CmmExpr, ForeignHint)]
+        -> Safety
+        -> CmmReturnInfo
+        -> PD (CmmParse ())
+foreignCall conv_string results_code expr_code args_code safety ret
+  = do  conv <- case conv_string of
+          "C" -> return CCallConv
+          "stdcall" -> return StdCallConv
+          _ -> fail ("unknown calling convention: " ++ conv_string)
+        return $ do
+          dflags <- getDynFlags
+          results <- sequence results_code
+          expr <- expr_code
+          args <- sequence args_code
+          let
+                  expr' = adjCallTarget dflags conv expr args
+                  (arg_exprs, arg_hints) = unzip args
+                  (res_regs,  res_hints) = unzip results
+                  fc = ForeignConvention conv arg_hints res_hints ret
+                  target = ForeignTarget expr' fc
+          _ <- code $ emitForeignCall safety res_regs target arg_exprs
+          return ()
+
+
+doReturn :: [CmmParse CmmExpr] -> CmmParse ()
+doReturn exprs_code = do
+  dflags <- getDynFlags
+  exprs <- sequence exprs_code
+  updfr_off <- getUpdFrameOff
+  emit (mkReturnSimple dflags exprs updfr_off)
+
+mkReturnSimple  :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturnSimple dflags actuals updfr_off =
+  mkReturn dflags e actuals updfr_off
+  where e = entryCode dflags (CmmLoad (CmmStackSlot Old updfr_off)
+                             (gcWord dflags))
+
+doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse ()
+doRawJump expr_code vols = do
+  dflags <- getDynFlags
+  expr <- expr_code
+  updfr_off <- getUpdFrameOff
+  emit (mkRawJump dflags expr updfr_off vols)
+
+doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr]
+                -> [CmmParse CmmExpr] -> CmmParse ()
+doJumpWithStack expr_code stk_code args_code = do
+  dflags <- getDynFlags
+  expr <- expr_code
+  stk_args <- sequence stk_code
+  args <- sequence args_code
+  updfr_off <- getUpdFrameOff
+  emit (mkJumpExtra dflags NativeNodeCall expr args updfr_off stk_args)
+
+doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr]
+       -> CmmParse ()
+doCall expr_code res_code args_code = do
+  dflags <- getDynFlags
+  expr <- expr_code
+  args <- sequence args_code
+  ress <- sequence res_code
+  updfr_off <- getUpdFrameOff
+  c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off []
+  emit c
+
+adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ]
+              -> CmmExpr
+-- On Windows, we have to add the '@N' suffix to the label when making
+-- a call with the stdcall calling convention.
+adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args
+ | platformOS (targetPlatform dflags) == OSMinGW32
+  = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args))))
+  where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e)))
+                 -- c.f. CgForeignCall.emitForeignCall
+adjCallTarget _ _ expr _
+  = expr
+
+primCall
+        :: [CmmParse (CmmFormal, ForeignHint)]
+        -> FastString
+        -> [CmmParse CmmExpr]
+        -> PD (CmmParse ())
+primCall results_code name args_code
+  = case lookupUFM callishMachOps name of
+        Nothing -> fail ("unknown primitive " ++ unpackFS name)
+        Just f  -> return $ do
+                results <- sequence results_code
+                args <- sequence args_code
+                let (p, args') = f args
+                code (emitPrimCall (map fst results) p args')
+
+doStore :: CmmType -> CmmParse CmmExpr  -> CmmParse CmmExpr -> CmmParse ()
+doStore rep addr_code val_code
+  = do dflags <- getDynFlags
+       addr <- addr_code
+       val <- val_code
+        -- if the specified store type does not match the type of the expr
+        -- on the rhs, then we insert a coercion that will cause the type
+        -- mismatch to be flagged by cmm-lint.  If we don't do this, then
+        -- the store will happen at the wrong type, and the error will not
+        -- be noticed.
+       let val_width = typeWidth (cmmExprType dflags val)
+           rep_width = typeWidth rep
+       let coerce_val
+                | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val]
+                | otherwise              = val
+       emitStore addr coerce_val
+
+-- -----------------------------------------------------------------------------
+-- If-then-else and boolean expressions
+
+data BoolExpr
+  = BoolExpr `BoolAnd` BoolExpr
+  | BoolExpr `BoolOr`  BoolExpr
+  | BoolNot BoolExpr
+  | BoolTest CmmExpr
+
+-- ToDo: smart constructors which simplify the boolean expression.
+
+cmmIfThenElse cond then_part else_part likely = do
+     then_id <- newBlockId
+     join_id <- newBlockId
+     c <- cond
+     emitCond c then_id likely
+     else_part
+     emit (mkBranch join_id)
+     emitLabel then_id
+     then_part
+     -- fall through to join
+     emitLabel join_id
+
+cmmRawIf cond then_id likely = do
+    c <- cond
+    emitCond c then_id likely
+
+-- 'emitCond cond true_id'  emits code to test whether the cond is true,
+-- branching to true_id if so, and falling through otherwise.
+emitCond (BoolTest e) then_id likely = do
+  else_id <- newBlockId
+  emit (mkCbranch e then_id else_id likely)
+  emitLabel else_id
+emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely
+  | Just op' <- maybeInvertComparison op
+  = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely)
+emitCond (BoolNot e) then_id likely = do
+  else_id <- newBlockId
+  emitCond e else_id likely
+  emit (mkBranch then_id)
+  emitLabel else_id
+emitCond (e1 `BoolOr` e2) then_id likely = do
+  emitCond e1 then_id likely
+  emitCond e2 then_id likely
+emitCond (e1 `BoolAnd` e2) then_id likely = do
+        -- we'd like to invert one of the conditionals here to avoid an
+        -- extra branch instruction, but we can't use maybeInvertComparison
+        -- here because we can't look too closely at the expression since
+        -- we're in a loop.
+  and_id <- newBlockId
+  else_id <- newBlockId
+  emitCond e1 and_id likely
+  emit (mkBranch else_id)
+  emitLabel and_id
+  emitCond e2 then_id likely
+  emitLabel else_id
+
+-- -----------------------------------------------------------------------------
+-- Source code notes
+
+-- | Generate a source note spanning from "a" to "b" (inclusive), then
+-- proceed with parsing. This allows debugging tools to reason about
+-- locations in Cmm code.
+withSourceNote :: Located a -> Located b -> CmmParse c -> CmmParse c
+withSourceNote a b parse = do
+  name <- getName
+  case combineSrcSpans (getLoc a) (getLoc b) of
+    RealSrcSpan span -> code (emitTick (SourceNote span name)) >> parse
+    _other           -> parse
+
+-- -----------------------------------------------------------------------------
+-- Table jumps
+
+-- We use a simplified form of C-- switch statements for now.  A
+-- switch statement always compiles to a table jump.  Each arm can
+-- specify a list of values (not ranges), and there can be a single
+-- default branch.  The range of the table is given either by the
+-- optional range on the switch (eg. switch [0..7] {...}), or by
+-- the minimum/maximum values from the branches.
+
+doSwitch :: Maybe (Integer,Integer)
+         -> CmmParse CmmExpr
+         -> [([Integer],Either BlockId (CmmParse ()))]
+         -> Maybe (CmmParse ()) -> CmmParse ()
+doSwitch mb_range scrut arms deflt
+   = do
+        -- Compile code for the default branch
+        dflt_entry <-
+                case deflt of
+                  Nothing -> return Nothing
+                  Just e  -> do b <- forkLabelledCode e; return (Just b)
+
+        -- Compile each case branch
+        table_entries <- mapM emitArm arms
+        let table = M.fromList (concat table_entries)
+
+        dflags <- getDynFlags
+        let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range
+
+        expr <- scrut
+        -- ToDo: check for out of range and jump to default if necessary
+        emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table)
+   where
+        emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)]
+        emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ]
+        emitArm (ints,Right code) = do
+           blockid <- forkLabelledCode code
+           return [ (i,blockid) | i <- ints ]
+
+forkLabelledCode :: CmmParse () -> CmmParse BlockId
+forkLabelledCode p = do
+  (_,ag) <- getCodeScoped p
+  l <- newBlockId
+  emitOutOfLine l ag
+  return l
+
+-- -----------------------------------------------------------------------------
+-- Putting it all together
+
+-- The initial environment: we define some constants that the compiler
+-- knows about here.
+initEnv :: DynFlags -> Env
+initEnv dflags = listToUFM [
+  ( fsLit "SIZEOF_StgHeader",
+    VarN (CmmLit (CmmInt (fromIntegral (fixedHdrSize dflags)) (wordWidth dflags)) )),
+  ( fsLit "SIZEOF_StgInfoTable",
+    VarN (CmmLit (CmmInt (fromIntegral (stdInfoTableSizeB dflags)) (wordWidth dflags)) ))
+  ]
+
+parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe CmmGroup)
+parseCmmFile dflags filename = withTiming dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ()) $ do
+  buf <- hGetStringBuffer filename
+  let
+        init_loc = mkRealSrcLoc (mkFastString filename) 1 1
+        init_state = (mkPState dflags buf init_loc) { lex_state = [0] }
+                -- reset the lex_state: the Lexer monad leaves some stuff
+                -- in there we don't want.
+  case unPD cmmParse dflags init_state of
+    PFailed pst ->
+        return (getMessages pst dflags, Nothing)
+    POk pst code -> do
+        st <- initC
+        let fcode = getCmm $ unEC code "global" (initEnv dflags) [] >> return ()
+            (cmm,_) = runC dflags no_module st fcode
+        let ms = getMessages pst dflags
+        if (errorsFound dflags ms)
+         then return (ms, Nothing)
+         else return (ms, Just cmm)
+  where
+        no_module = panic "parseCmmFile: no module"
+}
diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs
new file mode 100644
index 0000000000..6db9e23ee1
--- /dev/null
+++ b/compiler/GHC/Cmm/Pipeline.hs
@@ -0,0 +1,367 @@
+{-# LANGUAGE BangPatterns #-}
+
+module GHC.Cmm.Pipeline (
+  -- | Converts C-- with an implicit stack and native C-- calls into
+  -- optimized, CPS converted and native-call-less C--.  The latter
+  -- C-- can be used to generate assembly.
+  cmmPipeline
+) where
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Lint
+import GHC.Cmm.Info.Build
+import GHC.Cmm.CommonBlockElim
+import GHC.Cmm.Switch.Implement
+import GHC.Cmm.ProcPoint
+import GHC.Cmm.ContFlowOpt
+import GHC.Cmm.LayoutStack
+import GHC.Cmm.Sink
+import GHC.Cmm.Dataflow.Collections
+
+import UniqSupply
+import DynFlags
+import ErrUtils
+import HscTypes
+import Control.Monad
+import Outputable
+import GHC.Platform
+
+-----------------------------------------------------------------------------
+-- | Top level driver for C-- pipeline
+-----------------------------------------------------------------------------
+
+cmmPipeline
+ :: HscEnv -- Compilation env including
+           -- dynamic flags: -dcmm-lint -ddump-cmm-cps
+ -> ModuleSRTInfo        -- Info about SRTs generated so far
+ -> CmmGroup             -- Input C-- with Procedures
+ -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
+
+cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
+  do let dflags = hsc_dflags hsc_env
+
+     tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
+
+     (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
+     dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms)
+
+     return (srtInfo, cmms)
+
+  where forceRes (info, group) =
+          info `seq` foldr (\decl r -> decl `seq` r) () group
+
+        dflags = hsc_dflags hsc_env
+
+cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
+cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
+cpsTop hsc_env proc =
+    do
+       ----------- Control-flow optimisations ----------------------------------
+
+       -- The first round of control-flow optimisation speeds up the
+       -- later passes by removing lots of empty blocks, so we do it
+       -- even when optimisation isn't turned on.
+       --
+       CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
+            return $ cmmCfgOptsProc splitting_proc_points proc
+       dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
+
+       let !TopInfo {stack_info=StackInfo { arg_space = entry_off
+                                          , do_layout = do_layout }} = h
+
+       ----------- Eliminate common blocks -------------------------------------
+       g <- {-# SCC "elimCommonBlocks" #-}
+            condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
+                          Opt_D_dump_cmm_cbe "Post common block elimination"
+
+       -- Any work storing block Labels must be performed _after_
+       -- elimCommonBlocks
+
+       ----------- Implement switches ------------------------------------------
+       g <- {-# SCC "createSwitchPlans" #-}
+            runUniqSM $ cmmImplementSwitchPlans dflags g
+       dump Opt_D_dump_cmm_switch "Post switch plan" g
+
+       ----------- Proc points -------------------------------------------------
+       let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
+       proc_points <-
+          if splitting_proc_points
+             then do
+               pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
+                  minimalProcPointSet (targetPlatform dflags) call_pps g
+               dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
+                     FormatCMM (ppr l $$ ppr pp $$ ppr g)
+               return pp
+             else
+               return call_pps
+
+       ----------- Layout the stack and manifest Sp ----------------------------
+       (g, stackmaps) <-
+            {-# SCC "layoutStack" #-}
+            if do_layout
+               then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
+               else return (g, mapEmpty)
+       dump Opt_D_dump_cmm_sp "Layout Stack" g
+
+       ----------- Sink and inline assignments  --------------------------------
+       g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
+            condPass Opt_CmmSink (cmmSink dflags) g
+                     Opt_D_dump_cmm_sink "Sink assignments"
+
+       ------------- CAF analysis ----------------------------------------------
+       let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
+       dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv)
+
+       g <- if splitting_proc_points
+            then do
+               ------------- Split into separate procedures -----------------------
+               let pp_map = {-# SCC "procPointAnalysis" #-}
+                            procPointAnalysis proc_points g
+               dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map"
+                  FormatCMM (ppr pp_map)
+               g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
+                    splitAtProcPoints dflags l call_pps proc_points pp_map
+                                      (CmmProc h l v g)
+               dumps Opt_D_dump_cmm_split "Post splitting" g
+               return g
+             else do
+               -- attach info tables to return points
+               return $ [attachContInfoTables call_pps (CmmProc h l v g)]
+
+       ------------- Populate info tables with stack info -----------------
+       g <- {-# SCC "setInfoTableStackMap" #-}
+            return $ map (setInfoTableStackMap dflags stackmaps) g
+       dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g
+
+       ----------- Control-flow optimisations -----------------------------
+       g <- {-# SCC "cmmCfgOpts(2)" #-}
+            return $ if optLevel dflags >= 1
+                     then map (cmmCfgOptsProc splitting_proc_points) g
+                     else g
+       g <- return (map removeUnreachableBlocksProc g)
+            -- See Note [unreachable blocks]
+       dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
+
+       return (cafEnv, g)
+
+  where dflags = hsc_dflags hsc_env
+        platform = targetPlatform dflags
+        dump = dumpGraph dflags
+
+        dumps flag name
+           = mapM_ (dumpWith dflags flag name FormatCMM . ppr)
+
+        condPass flag pass g dumpflag dumpname =
+            if gopt flag dflags
+               then do
+                    g <- return $ pass g
+                    dump dumpflag dumpname g
+                    return g
+               else return g
+
+        -- we don't need to split proc points for the NCG, unless
+        -- tablesNextToCode is off.  The latter is because we have no
+        -- label to put on info tables for basic blocks that are not
+        -- the entry point.
+        splitting_proc_points = hscTarget dflags /= HscAsm
+                             || not (tablesNextToCode dflags)
+                             || -- Note [inconsistent-pic-reg]
+                                usingInconsistentPicReg
+        usingInconsistentPicReg
+           = case (platformArch platform, platformOS platform, positionIndependent dflags)
+             of   (ArchX86, OSDarwin, pic) -> pic
+                  _                        -> False
+
+-- Note [Sinking after stack layout]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- In the past we considered running sinking pass also before stack
+-- layout, but after making some measurements we realized that:
+--
+--   a) running sinking only before stack layout produces slower
+--      code than running sinking only before stack layout
+--
+--   b) running sinking both before and after stack layout produces
+--      code that has the same performance as when running sinking
+--      only after stack layout.
+--
+-- In other words sinking before stack layout doesn't buy as anything.
+--
+-- An interesting question is "why is it better to run sinking after
+-- stack layout"? It seems that the major reason are stores and loads
+-- generated by stack layout. Consider this code before stack layout:
+--
+--  c1E:
+--      _c1C::P64 = R3;
+--      _c1B::P64 = R2;
+--      _c1A::P64 = R1;
+--      I64[(young<c1D> + 8)] = c1D;
+--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+--  c1D:
+--      R3 = _c1C::P64;
+--      R2 = _c1B::P64;
+--      R1 = _c1A::P64;
+--      call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- Stack layout pass will save all local variables live across a call
+-- (_c1C, _c1B and _c1A in this example) on the stack just before
+-- making a call and reload them from the stack after returning from a
+-- call:
+--
+--  c1E:
+--      _c1C::P64 = R3;
+--      _c1B::P64 = R2;
+--      _c1A::P64 = R1;
+--      I64[Sp - 32] = c1D;
+--      P64[Sp - 24] = _c1A::P64;
+--      P64[Sp - 16] = _c1B::P64;
+--      P64[Sp - 8] = _c1C::P64;
+--      Sp = Sp - 32;
+--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+--  c1D:
+--      _c1A::P64 = P64[Sp + 8];
+--      _c1B::P64 = P64[Sp + 16];
+--      _c1C::P64 = P64[Sp + 24];
+--      R3 = _c1C::P64;
+--      R2 = _c1B::P64;
+--      R1 = _c1A::P64;
+--      Sp = Sp + 32;
+--      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- If we don't run sinking pass after stack layout we are basically
+-- left with such code. However, running sinking on this code can lead
+-- to significant improvements:
+--
+--  c1E:
+--      I64[Sp - 32] = c1D;
+--      P64[Sp - 24] = R1;
+--      P64[Sp - 16] = R2;
+--      P64[Sp - 8] = R3;
+--      Sp = Sp - 32;
+--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+--  c1D:
+--      R3 = P64[Sp + 24];
+--      R2 = P64[Sp + 16];
+--      R1 = P64[Sp + 8];
+--      Sp = Sp + 32;
+--      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- Now we only have 9 assignments instead of 15.
+--
+-- There is one case when running sinking before stack layout could
+-- be beneficial. Consider this:
+--
+--   L1:
+--      x = y
+--      call f() returns L2
+--   L2: ...x...y...
+--
+-- Since both x and y are live across a call to f, they will be stored
+-- on the stack during stack layout and restored after the call:
+--
+--   L1:
+--      x = y
+--      P64[Sp - 24] = L2
+--      P64[Sp - 16] = x
+--      P64[Sp - 8]  = y
+--      Sp = Sp - 24
+--      call f() returns L2
+--   L2:
+--      y = P64[Sp + 16]
+--      x = P64[Sp + 8]
+--      Sp = Sp + 24
+--      ...x...y...
+--
+-- However, if we run sinking before stack layout we would propagate x
+-- to its usage place (both x and y must be local register for this to
+-- be possible - global registers cannot be floated past a call):
+--
+--   L1:
+--      x = y
+--      call f() returns L2
+--   L2: ...y...y...
+--
+-- Thus making x dead at the call to f(). If we ran stack layout now
+-- we would generate less stores and loads:
+--
+--   L1:
+--      x = y
+--      P64[Sp - 16] = L2
+--      P64[Sp - 8]  = y
+--      Sp = Sp - 16
+--      call f() returns L2
+--   L2:
+--      y = P64[Sp + 8]
+--      Sp = Sp + 16
+--      ...y...y...
+--
+-- But since we don't see any benefits from running sinking before stack
+-- layout, this situation probably doesn't arise too often in practice.
+--
+
+{- Note [inconsistent-pic-reg]
+
+On x86/Darwin, PIC is implemented by inserting a sequence like
+
+    call 1f
+ 1: popl %reg
+
+at the proc entry point, and then referring to labels as offsets from
+%reg.  If we don't split proc points, then we could have many entry
+points in a proc that would need this sequence, and each entry point
+would then get a different value for %reg.  If there are any join
+points, then at the join point we don't have a consistent value for
+%reg, so we don't know how to refer to labels.
+
+Hence, on x86/Darwin, we have to split proc points, and then each proc
+point will get its own PIC initialisation sequence.
+
+This isn't an issue on x86/ELF, where the sequence is
+
+    call 1f
+ 1: popl %reg
+    addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg
+
+so %reg always has a consistent value: the address of
+_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
+
+-}
+
+{- Note [unreachable blocks]
+
+The control-flow optimiser sometimes leaves unreachable blocks behind
+containing junk code.  These aren't necessarily a problem, but
+removing them is good because it might save time in the native code
+generator later.
+
+-}
+
+runUniqSM :: UniqSM a -> IO a
+runUniqSM m = do
+  us <- mkSplitUniqSupply 'u'
+  return (initUs_ us m)
+
+
+dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
+dumpGraph dflags flag name g = do
+  when (gopt Opt_DoCmmLinting dflags) $ do_lint g
+  dumpWith dflags flag name FormatCMM (ppr g)
+ where
+  do_lint g = case cmmLintGraph dflags g of
+                 Just err -> do { fatalErrorMsg dflags err
+                                ; ghcExit dflags 1
+                                }
+                 Nothing  -> return ()
+
+dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO ()
+dumpWith dflags flag txt fmt sdoc = do
+  dumpIfSet_dyn dflags flag txt fmt sdoc
+  when (not (dopt flag dflags)) $
+    -- If `-ddump-cmm-verbose -ddump-to-file` is specified,
+    -- dump each Cmm pipeline stage output to a separate file.  #16930
+    when (dopt Opt_D_dump_cmm_verbose dflags)
+      $ dumpAction dflags (mkDumpStyle dflags alwaysQualify)
+                   (dumpOptionsFromFlag flag) txt fmt sdoc
+  dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc
diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs
new file mode 100644
index 0000000000..891cbd9c6d
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr.hs
@@ -0,0 +1,309 @@
+{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts, FlexibleInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of Cmm as (a superset of) C--
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+--
+-- This is where we walk over CmmNode emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+--      1) if a value has wordRep type, the type is not appended in the
+--      output.
+--      2) MachOps that operate over wordRep type are printed in a
+--      C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+
+module GHC.Cmm.Ppr
+  ( module GHC.Cmm.Ppr.Decl
+  , module GHC.Cmm.Ppr.Expr
+  )
+where
+
+import GhcPrelude hiding (succ)
+
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import DynFlags
+import FastString
+import Outputable
+import GHC.Cmm.Ppr.Decl
+import GHC.Cmm.Ppr.Expr
+import Util
+
+import BasicTypes
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+
+-------------------------------------------------
+-- Outputable instances
+
+instance Outputable CmmStackInfo where
+    ppr = pprStackInfo
+
+instance Outputable CmmTopInfo where
+    ppr = pprTopInfo
+
+
+instance Outputable (CmmNode e x) where
+    ppr = pprNode
+
+instance Outputable Convention where
+    ppr = pprConvention
+
+instance Outputable ForeignConvention where
+    ppr = pprForeignConvention
+
+instance Outputable ForeignTarget where
+    ppr = pprForeignTarget
+
+instance Outputable CmmReturnInfo where
+    ppr = pprReturnInfo
+
+instance Outputable (Block CmmNode C C) where
+    ppr = pprBlock
+instance Outputable (Block CmmNode C O) where
+    ppr = pprBlock
+instance Outputable (Block CmmNode O C) where
+    ppr = pprBlock
+instance Outputable (Block CmmNode O O) where
+    ppr = pprBlock
+
+instance Outputable (Graph CmmNode e x) where
+    ppr = pprGraph
+
+instance Outputable CmmGraph where
+    ppr = pprCmmGraph
+
+----------------------------------------------------------
+-- Outputting types Cmm contains
+
+pprStackInfo :: CmmStackInfo -> SDoc
+pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) =
+  text "arg_space: " <> ppr arg_space <+>
+  text "updfr_space: " <> ppr updfr_space
+
+pprTopInfo :: CmmTopInfo -> SDoc
+pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) =
+  vcat [text "info_tbls: " <> ppr info_tbl,
+        text "stack_info: " <> ppr stack_info]
+
+----------------------------------------------------------
+-- Outputting blocks and graphs
+
+pprBlock :: IndexedCO x SDoc SDoc ~ SDoc
+         => Block CmmNode e x -> IndexedCO e SDoc SDoc
+pprBlock block
+    = foldBlockNodesB3 ( ($$) . ppr
+                       , ($$) . (nest 4) . ppr
+                       , ($$) . (nest 4) . ppr
+                       )
+                       block
+                       empty
+
+pprGraph :: Graph CmmNode e x -> SDoc
+pprGraph GNil = empty
+pprGraph (GUnit block) = ppr block
+pprGraph (GMany entry body exit)
+   = text "{"
+  $$ nest 2 (pprMaybeO entry $$ (vcat $ map ppr $ bodyToBlockList body) $$ pprMaybeO exit)
+  $$ text "}"
+  where pprMaybeO :: Outputable (Block CmmNode e x)
+                  => MaybeO ex (Block CmmNode e x) -> SDoc
+        pprMaybeO NothingO = empty
+        pprMaybeO (JustO block) = ppr block
+
+pprCmmGraph :: CmmGraph -> SDoc
+pprCmmGraph g
+   = text "{" <> text "offset"
+  $$ nest 2 (vcat $ map ppr blocks)
+  $$ text "}"
+  where blocks = revPostorder g
+    -- revPostorder has the side-effect of discarding unreachable code,
+    -- so pretty-printed Cmm will omit any unreachable blocks.  This can
+    -- sometimes be confusing.
+
+---------------------------------------------
+-- Outputting CmmNode and types which it contains
+
+pprConvention :: Convention -> SDoc
+pprConvention (NativeNodeCall   {}) = text "<native-node-call-convention>"
+pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>"
+pprConvention (NativeReturn {})     = text "<native-ret-convention>"
+pprConvention  Slow                 = text "<slow-convention>"
+pprConvention  GC                   = text "<gc-convention>"
+
+pprForeignConvention :: ForeignConvention -> SDoc
+pprForeignConvention (ForeignConvention c args res ret) =
+          doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret
+
+pprReturnInfo :: CmmReturnInfo -> SDoc
+pprReturnInfo CmmMayReturn = empty
+pprReturnInfo CmmNeverReturns = text "never returns"
+
+pprForeignTarget :: ForeignTarget -> SDoc
+pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn
+  where
+        ppr_target :: CmmExpr -> SDoc
+        ppr_target t@(CmmLit _) = ppr t
+        ppr_target fn'          = parens (ppr fn')
+
+pprForeignTarget (PrimTarget op)
+ -- HACK: We're just using a ForeignLabel to get this printed, the label
+ --       might not really be foreign.
+ = ppr
+               (CmmLabel (mkForeignLabel
+                         (mkFastString (show op))
+                         Nothing ForeignLabelInThisPackage IsFunction))
+
+pprNode :: CmmNode e x -> SDoc
+pprNode node = pp_node <+> pp_debug
+  where
+    pp_node :: SDoc
+    pp_node = sdocWithDynFlags $ \dflags -> case node of
+      -- label:
+      CmmEntry id tscope -> lbl <> colon <+>
+         (sdocWithDynFlags $ \dflags ->
+           ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope))
+          where
+            lbl = if gopt Opt_SuppressUniques dflags
+                then text "_lbl_"
+                else ppr id
+
+      -- // text
+      CmmComment s -> text "//" <+> ftext s
+
+      -- //tick bla<...>
+      CmmTick t -> ppUnless (gopt Opt_SuppressTicks dflags) $
+                   text "//tick" <+> ppr t
+
+      -- unwind reg = expr;
+      CmmUnwind regs ->
+          text "unwind "
+          <> commafy (map (\(r,e) -> ppr r <+> char '=' <+> ppr e) regs) <> semi
+
+      -- reg = expr;
+      CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi
+
+      -- rep[lv] = expr;
+      CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi
+          where
+            rep = sdocWithDynFlags $ \dflags ->
+                  ppr ( cmmExprType dflags expr )
+
+      -- call "ccall" foo(x, y)[r1, r2];
+      -- ToDo ppr volatile
+      CmmUnsafeForeignCall target results args ->
+          hsep [ ppUnless (null results) $
+                    parens (commafy $ map ppr results) <+> equals,
+                 text "call",
+                 ppr target <> parens (commafy $ map ppr args) <> semi]
+
+      -- goto label;
+      CmmBranch ident -> text "goto" <+> ppr ident <> semi
+
+      -- if (expr) goto t; else goto f;
+      CmmCondBranch expr t f l ->
+          hsep [ text "if"
+               , parens(ppr expr)
+               , case l of
+                   Nothing -> empty
+                   Just b -> parens (text "likely:" <+> ppr b)
+               , text "goto"
+               , ppr t <> semi
+               , text "else goto"
+               , ppr f <> semi
+               ]
+
+      CmmSwitch expr ids ->
+          hang (hsep [ text "switch"
+                     , range
+                     , if isTrivialCmmExpr expr
+                       then ppr expr
+                       else parens (ppr expr)
+                     , text "{"
+                     ])
+             4 (vcat (map ppCase cases) $$ def) $$ rbrace
+          where
+            (cases, mbdef) = switchTargetsFallThrough ids
+            ppCase (is,l) = hsep
+                            [ text "case"
+                            , commafy $ map integer is
+                            , text ": goto"
+                            , ppr l <> semi
+                            ]
+            def | Just l <- mbdef = hsep
+                            [ text "default:"
+                            , braces (text "goto" <+> ppr l <> semi)
+                            ]
+                | otherwise = empty
+
+            range = brackets $ hsep [integer lo, text "..", integer hi]
+              where (lo,hi) = switchTargetsRange ids
+
+      CmmCall tgt k regs out res updfr_off ->
+          hcat [ text "call", space
+               , pprFun tgt, parens (interpp'SP regs), space
+               , returns <+>
+                 text "args: " <> ppr out <> comma <+>
+                 text "res: " <> ppr res <> comma <+>
+                 text "upd: " <> ppr updfr_off
+               , semi ]
+          where pprFun f@(CmmLit _) = ppr f
+                pprFun f = parens (ppr f)
+
+                returns
+                  | Just r <- k = text "returns to" <+> ppr r <> comma
+                  | otherwise   = empty
+
+      CmmForeignCall {tgt=t, res=rs, args=as, succ=s, ret_args=a, ret_off=u, intrbl=i} ->
+          hcat $ if i then [text "interruptible", space] else [] ++
+               [ text "foreign call", space
+               , ppr t, text "(...)", space
+               , text "returns to" <+> ppr s
+                    <+> text "args:" <+> parens (ppr as)
+                    <+> text "ress:" <+> parens (ppr rs)
+               , text "ret_args:" <+> ppr a
+               , text "ret_off:" <+> ppr u
+               , semi ]
+
+    pp_debug :: SDoc
+    pp_debug =
+      if not debugIsOn then empty
+      else case node of
+             CmmEntry {}             -> empty -- Looks terrible with text "  // CmmEntry"
+             CmmComment {}           -> empty -- Looks also terrible with text "  // CmmComment"
+             CmmTick {}              -> empty
+             CmmUnwind {}            -> text "  // CmmUnwind"
+             CmmAssign {}            -> text "  // CmmAssign"
+             CmmStore {}             -> text "  // CmmStore"
+             CmmUnsafeForeignCall {} -> text "  // CmmUnsafeForeignCall"
+             CmmBranch {}            -> text "  // CmmBranch"
+             CmmCondBranch {}        -> text "  // CmmCondBranch"
+             CmmSwitch {}            -> text "  // CmmSwitch"
+             CmmCall {}              -> text "  // CmmCall"
+             CmmForeignCall {}       -> text "  // CmmForeignCall"
+
+    commafy :: [SDoc] -> SDoc
+    commafy xs = hsep $ punctuate comma xs
diff --git a/compiler/GHC/Cmm/Ppr/Decl.hs b/compiler/GHC/Cmm/Ppr/Decl.hs
new file mode 100644
index 0000000000..2544e6a0d3
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr/Decl.hs
@@ -0,0 +1,169 @@
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of common Cmm types
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+--
+-- This is where we walk over Cmm emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+--      1) if a value has wordRep type, the type is not appended in the
+--      output.
+--      2) MachOps that operate over wordRep type are printed in a
+--      C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+--
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module GHC.Cmm.Ppr.Decl
+    ( writeCmms, pprCmms, pprCmmGroup, pprSection, pprStatic
+    )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Ppr.Expr
+import GHC.Cmm
+
+import DynFlags
+import Outputable
+import FastString
+
+import Data.List
+import System.IO
+
+import qualified Data.ByteString as BS
+
+
+pprCmms :: (Outputable info, Outputable g)
+        => [GenCmmGroup CmmStatics info g] -> SDoc
+pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
+        where
+          separator = space $$ text "-------------------" $$ space
+
+writeCmms :: (Outputable info, Outputable g)
+          => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO ()
+writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms)
+
+-----------------------------------------------------------------------------
+
+instance (Outputable d, Outputable info, Outputable i)
+      => Outputable (GenCmmDecl d info i) where
+    ppr t = pprTop t
+
+instance Outputable CmmStatics where
+    ppr = pprStatics
+
+instance Outputable CmmStatic where
+    ppr = pprStatic
+
+instance Outputable CmmInfoTable where
+    ppr = pprInfoTable
+
+
+-----------------------------------------------------------------------------
+
+pprCmmGroup :: (Outputable d, Outputable info, Outputable g)
+            => GenCmmGroup d info g -> SDoc
+pprCmmGroup tops
+    = vcat $ intersperse blankLine $ map pprTop tops
+
+-- --------------------------------------------------------------------------
+-- Top level `procedure' blocks.
+--
+pprTop :: (Outputable d, Outputable info, Outputable i)
+       => GenCmmDecl d info i -> SDoc
+
+pprTop (CmmProc info lbl live graph)
+
+  = vcat [ ppr lbl <> lparen <> rparen <+> lbrace <+> text "// " <+> ppr live
+         , nest 8 $ lbrace <+> ppr info $$ rbrace
+         , nest 4 $ ppr graph
+         , rbrace ]
+
+-- --------------------------------------------------------------------------
+-- We follow [1], 4.5
+--
+--      section "data" { ... }
+--
+pprTop (CmmData section ds) =
+    (hang (pprSection section <+> lbrace) 4 (ppr ds))
+    $$ rbrace
+
+-- --------------------------------------------------------------------------
+-- Info tables.
+
+pprInfoTable :: CmmInfoTable -> SDoc
+pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
+                           , cit_prof = prof_info
+                           , cit_srt = srt })
+  = vcat [ text "label: " <> ppr lbl
+         , text "rep: " <> ppr rep
+         , case prof_info of
+             NoProfilingInfo -> empty
+             ProfilingInfo ct cd ->
+               vcat [ text "type: " <> text (show (BS.unpack ct))
+                    , text "desc: " <> text (show (BS.unpack cd)) ]
+         , text "srt: " <> ppr srt ]
+
+instance Outputable ForeignHint where
+  ppr NoHint     = empty
+  ppr SignedHint = quotes(text "signed")
+--  ppr AddrHint   = quotes(text "address")
+-- Temp Jan08
+  ppr AddrHint   = (text "PtrHint")
+
+-- --------------------------------------------------------------------------
+-- Static data.
+--      Strings are printed as C strings, and we print them as I8[],
+--      following C--
+--
+pprStatics :: CmmStatics -> SDoc
+pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
+
+pprStatic :: CmmStatic -> SDoc
+pprStatic s = case s of
+    CmmStaticLit lit   -> nest 4 $ text "const" <+> pprLit lit <> semi
+    CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
+    CmmString s'       -> nest 4 $ text "I8[]" <+> text (show s')
+
+-- --------------------------------------------------------------------------
+-- data sections
+--
+pprSection :: Section -> SDoc
+pprSection (Section t suffix) =
+  section <+> doubleQuotes (pprSectionType t <+> char '.' <+> ppr suffix)
+  where
+    section = text "section"
+
+pprSectionType :: SectionType -> SDoc
+pprSectionType s = doubleQuotes (ptext t)
+ where
+  t = case s of
+    Text              -> sLit "text"
+    Data              -> sLit "data"
+    ReadOnlyData      -> sLit "readonly"
+    ReadOnlyData16    -> sLit "readonly16"
+    RelocatableReadOnlyData
+                      -> sLit "relreadonly"
+    UninitialisedData -> sLit "uninitialised"
+    CString           -> sLit "cstring"
+    OtherSection s'   -> sLit s' -- Not actually a literal though.
diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs
new file mode 100644
index 0000000000..53a335e561
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr/Expr.hs
@@ -0,0 +1,286 @@
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of common Cmm types
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+--
+-- This is where we walk over Cmm emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+--      1) if a value has wordRep type, the type is not appended in the
+--      output.
+--      2) MachOps that operate over wordRep type are printed in a
+--      C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+--
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module GHC.Cmm.Ppr.Expr
+    ( pprExpr, pprLit
+    )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+
+import Outputable
+import DynFlags
+
+import Data.Maybe
+import Numeric ( fromRat )
+
+-----------------------------------------------------------------------------
+
+instance Outputable CmmExpr where
+    ppr e = pprExpr e
+
+instance Outputable CmmReg where
+    ppr e = pprReg e
+
+instance Outputable CmmLit where
+    ppr l = pprLit l
+
+instance Outputable LocalReg where
+    ppr e = pprLocalReg e
+
+instance Outputable Area where
+    ppr e = pprArea e
+
+instance Outputable GlobalReg where
+    ppr e = pprGlobalReg e
+
+-- --------------------------------------------------------------------------
+-- Expressions
+--
+
+pprExpr :: CmmExpr -> SDoc
+pprExpr e
+    = sdocWithDynFlags $ \dflags ->
+      case e of
+        CmmRegOff reg i ->
+                pprExpr (CmmMachOp (MO_Add rep)
+                           [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)])
+                where rep = typeWidth (cmmRegType dflags reg)
+        CmmLit lit -> pprLit lit
+        _other     -> pprExpr1 e
+
+-- Here's the precedence table from GHC.Cmm.Parser:
+-- %nonassoc '>=' '>' '<=' '<' '!=' '=='
+-- %left '|'
+-- %left '^'
+-- %left '&'
+-- %left '>>' '<<'
+-- %left '-' '+'
+-- %left '/' '*' '%'
+-- %right '~'
+
+-- We just cope with the common operators for now, the rest will get
+-- a default conservative behaviour.
+
+-- %nonassoc '>=' '>' '<=' '<' '!=' '=='
+pprExpr1, pprExpr7, pprExpr8 :: CmmExpr -> SDoc
+pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op
+   = pprExpr7 x <+> doc <+> pprExpr7 y
+pprExpr1 e = pprExpr7 e
+
+infixMachOp1, infixMachOp7, infixMachOp8 :: MachOp -> Maybe SDoc
+
+infixMachOp1 (MO_Eq     _) = Just (text "==")
+infixMachOp1 (MO_Ne     _) = Just (text "!=")
+infixMachOp1 (MO_Shl    _) = Just (text "<<")
+infixMachOp1 (MO_U_Shr  _) = Just (text ">>")
+infixMachOp1 (MO_U_Ge   _) = Just (text ">=")
+infixMachOp1 (MO_U_Le   _) = Just (text "<=")
+infixMachOp1 (MO_U_Gt   _) = Just (char '>')
+infixMachOp1 (MO_U_Lt   _) = Just (char '<')
+infixMachOp1 _             = Nothing
+
+-- %left '-' '+'
+pprExpr7 (CmmMachOp (MO_Add rep1) [x, CmmLit (CmmInt i rep2)]) | i < 0
+   = pprExpr7 (CmmMachOp (MO_Sub rep1) [x, CmmLit (CmmInt (negate i) rep2)])
+pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op
+   = pprExpr7 x <+> doc <+> pprExpr8 y
+pprExpr7 e = pprExpr8 e
+
+infixMachOp7 (MO_Add _)  = Just (char '+')
+infixMachOp7 (MO_Sub _)  = Just (char '-')
+infixMachOp7 _           = Nothing
+
+-- %left '/' '*' '%'
+pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op
+   = pprExpr8 x <+> doc <+> pprExpr9 y
+pprExpr8 e = pprExpr9 e
+
+infixMachOp8 (MO_U_Quot _) = Just (char '/')
+infixMachOp8 (MO_Mul _)    = Just (char '*')
+infixMachOp8 (MO_U_Rem _)  = Just (char '%')
+infixMachOp8 _             = Nothing
+
+pprExpr9 :: CmmExpr -> SDoc
+pprExpr9 e =
+   case e of
+        CmmLit    lit       -> pprLit1 lit
+        CmmLoad   expr rep  -> ppr rep <> brackets (ppr expr)
+        CmmReg    reg       -> ppr reg
+        CmmRegOff  reg off  -> parens (ppr reg <+> char '+' <+> int off)
+        CmmStackSlot a off  -> parens (ppr a   <+> char '+' <+> int off)
+        CmmMachOp mop args  -> genMachOp mop args
+
+genMachOp :: MachOp -> [CmmExpr] -> SDoc
+genMachOp mop args
+   | Just doc <- infixMachOp mop = case args of
+        -- dyadic
+        [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y
+
+        -- unary
+        [x]   -> doc <> pprExpr9 x
+
+        _     -> pprTrace "GHC.Cmm.Ppr.Expr.genMachOp: machop with strange number of args"
+                          (pprMachOp mop <+>
+                            parens (hcat $ punctuate comma (map pprExpr args)))
+                          empty
+
+   | isJust (infixMachOp1 mop)
+   || isJust (infixMachOp7 mop)
+   || isJust (infixMachOp8 mop)  = parens (pprExpr (CmmMachOp mop args))
+
+   | otherwise = char '%' <> ppr_op <> parens (commafy (map pprExpr args))
+        where ppr_op = text (map (\c -> if c == ' ' then '_' else c)
+                                 (show mop))
+                -- replace spaces in (show mop) with underscores,
+
+--
+-- Unsigned ops on the word size of the machine get nice symbols.
+-- All else get dumped in their ugly format.
+--
+infixMachOp :: MachOp -> Maybe SDoc
+infixMachOp mop
+        = case mop of
+            MO_And    _ -> Just $ char '&'
+            MO_Or     _ -> Just $ char '|'
+            MO_Xor    _ -> Just $ char '^'
+            MO_Not    _ -> Just $ char '~'
+            MO_S_Neg  _ -> Just $ char '-' -- there is no unsigned neg :)
+            _ -> Nothing
+
+-- --------------------------------------------------------------------------
+-- Literals.
+--  To minimise line noise we adopt the convention that if the literal
+--  has the natural machine word size, we do not append the type
+--
+pprLit :: CmmLit -> SDoc
+pprLit lit = sdocWithDynFlags $ \dflags ->
+             case lit of
+    CmmInt i rep ->
+        hcat [ (if i < 0 then parens else id)(integer i)
+             , ppUnless (rep == wordWidth dflags) $
+               space <> dcolon <+> ppr rep ]
+
+    CmmFloat f rep     -> hsep [ double (fromRat f), dcolon, ppr rep ]
+    CmmVec lits        -> char '<' <> commafy (map pprLit lits) <> char '>'
+    CmmLabel clbl      -> ppr clbl
+    CmmLabelOff clbl i -> ppr clbl <> ppr_offset i
+    CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-'
+                                  <> ppr clbl2 <> ppr_offset i
+    CmmBlock id        -> ppr id
+    CmmHighStackMark -> text "<highSp>"
+
+pprLit1 :: CmmLit -> SDoc
+pprLit1 lit@(CmmLabelOff {}) = parens (pprLit lit)
+pprLit1 lit                  = pprLit lit
+
+ppr_offset :: Int -> SDoc
+ppr_offset i
+    | i==0      = empty
+    | i>=0      = char '+' <> int i
+    | otherwise = char '-' <> int (-i)
+
+-- --------------------------------------------------------------------------
+-- Registers, whether local (temps) or global
+--
+pprReg :: CmmReg -> SDoc
+pprReg r
+    = case r of
+        CmmLocal  local  -> pprLocalReg  local
+        CmmGlobal global -> pprGlobalReg global
+
+--
+-- We only print the type of the local reg if it isn't wordRep
+--
+pprLocalReg :: LocalReg -> SDoc
+pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags ->
+--   = ppr rep <> char '_' <> ppr uniq
+-- Temp Jan08
+    char '_' <> pprUnique dflags uniq <>
+       (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08               -- sigh
+                    then dcolon <> ptr <> ppr rep
+                    else dcolon <> ptr <> ppr rep)
+   where
+     pprUnique dflags unique =
+        if gopt Opt_SuppressUniques dflags
+            then text "_locVar_"
+            else ppr unique
+     ptr = empty
+         --if isGcPtrType rep
+         --      then doubleQuotes (text "ptr")
+         --      else empty
+
+-- Stack areas
+pprArea :: Area -> SDoc
+pprArea Old        = text "old"
+pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
+
+-- needs to be kept in syn with CmmExpr.hs.GlobalReg
+--
+pprGlobalReg :: GlobalReg -> SDoc
+pprGlobalReg gr
+    = case gr of
+        VanillaReg n _ -> char 'R' <> int n
+-- Temp Jan08
+--        VanillaReg n VNonGcPtr -> char 'R' <> int n
+--        VanillaReg n VGcPtr    -> char 'P' <> int n
+        FloatReg   n   -> char 'F' <> int n
+        DoubleReg  n   -> char 'D' <> int n
+        LongReg    n   -> char 'L' <> int n
+        XmmReg     n   -> text "XMM" <> int n
+        YmmReg     n   -> text "YMM" <> int n
+        ZmmReg     n   -> text "ZMM" <> int n
+        Sp             -> text "Sp"
+        SpLim          -> text "SpLim"
+        Hp             -> text "Hp"
+        HpLim          -> text "HpLim"
+        MachSp         -> text "MachSp"
+        UnwindReturnReg-> text "UnwindReturnReg"
+        CCCS           -> text "CCCS"
+        CurrentTSO     -> text "CurrentTSO"
+        CurrentNursery -> text "CurrentNursery"
+        HpAlloc        -> text "HpAlloc"
+        EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info"
+        GCEnter1       -> text "stg_gc_enter_1"
+        GCFun          -> text "stg_gc_fun"
+        BaseReg        -> text "BaseReg"
+        PicBaseReg     -> text "PicBaseReg"
+
+-----------------------------------------------------------------------------
+
+commafy :: [SDoc] -> SDoc
+commafy xs = fsep $ punctuate comma xs
diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs
new file mode 100644
index 0000000000..00a7a73d89
--- /dev/null
+++ b/compiler/GHC/Cmm/ProcPoint.hs
@@ -0,0 +1,496 @@
+{-# LANGUAGE GADTs, DisambiguateRecordFields, BangPatterns #-}
+
+module GHC.Cmm.ProcPoint
+    ( ProcPointSet, Status(..)
+    , callProcPoints, minimalProcPointSet
+    , splitAtProcPoints, procPointAnalysis
+    , attachContInfoTables
+    )
+where
+
+import GhcPrelude hiding (last, unzip, succ, zip)
+
+import DynFlags
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Ppr () -- For Outputable instances
+import GHC.Cmm.Utils
+import GHC.Cmm.Info
+import GHC.Cmm.Liveness
+import GHC.Cmm.Switch
+import Data.List (sortBy)
+import Maybes
+import Control.Monad
+import Outputable
+import GHC.Platform
+import UniqSupply
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+
+-- Compute a minimal set of proc points for a control-flow graph.
+
+-- Determine a protocol for each proc point (which live variables will
+-- be passed as arguments and which will be on the stack).
+
+{-
+A proc point is a basic block that, after CPS transformation, will
+start a new function.  The entry block of the original function is a
+proc point, as is the continuation of each function call.
+A third kind of proc point arises if we want to avoid copying code.
+Suppose we have code like the following:
+
+  f() {
+    if (...) { ..1..; call foo(); ..2..}
+    else     { ..3..; call bar(); ..4..}
+    x = y + z;
+    return x;
+  }
+
+The statement 'x = y + z' can be reached from two different proc
+points: the continuations of foo() and bar().  We would prefer not to
+put a copy in each continuation; instead we would like 'x = y + z' to
+be the start of a new procedure to which the continuations can jump:
+
+  f_cps () {
+    if (...) { ..1..; push k_foo; jump foo_cps(); }
+    else     { ..3..; push k_bar; jump bar_cps(); }
+  }
+  k_foo() { ..2..; jump k_join(y, z); }
+  k_bar() { ..4..; jump k_join(y, z); }
+  k_join(y, z) { x = y + z; return x; }
+
+You might think then that a criterion to make a node a proc point is
+that it is directly reached by two distinct proc points.  (Note
+[Direct reachability].)  But this criterion is a bit too simple; for
+example, 'return x' is also reached by two proc points, yet there is
+no point in pulling it out of k_join.  A good criterion would be to
+say that a node should be made a proc point if it is reached by a set
+of proc points that is different than its immediate dominator.  NR
+believes this criterion can be shown to produce a minimum set of proc
+points, and given a dominator tree, the proc points can be chosen in
+time linear in the number of blocks.  Lacking a dominator analysis,
+however, we turn instead to an iterative solution, starting with no
+proc points and adding them according to these rules:
+
+  1. The entry block is a proc point.
+  2. The continuation of a call is a proc point.
+  3. A node is a proc point if it is directly reached by more proc
+     points than one of its predecessors.
+
+Because we don't understand the problem very well, we apply rule 3 at
+most once per iteration, then recompute the reachability information.
+(See Note [No simple dataflow].)  The choice of the new proc point is
+arbitrary, and I don't know if the choice affects the final solution,
+so I don't know if the number of proc points chosen is the
+minimum---but the set will be minimal.
+
+
+
+Note [Proc-point analysis]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Given a specified set of proc-points (a set of block-ids), "proc-point
+analysis" figures out, for every block, which proc-point it belongs to.
+All the blocks belonging to proc-point P will constitute a single
+top-level C procedure.
+
+A non-proc-point block B "belongs to" a proc-point P iff B is
+reachable from P without going through another proc-point.
+
+Invariant: a block B should belong to at most one proc-point; if it
+belongs to two, that's a bug.
+
+Note [Non-existing proc-points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On some architectures it might happen that the list of proc-points
+computed before stack layout pass will be invalidated by the stack
+layout. This will happen if stack layout removes from the graph
+blocks that were determined to be proc-points. Later on in the pipeline
+we use list of proc-points to perform [Proc-point analysis], but
+if a proc-point does not exist anymore then we will get compiler panic.
+See #8205.
+-}
+
+type ProcPointSet = LabelSet
+
+data Status
+  = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
+  | ProcPoint               -- this block is itself a proc point
+
+instance Outputable Status where
+  ppr (ReachedBy ps)
+      | setNull ps = text "<not-reached>"
+      | otherwise = text "reached by" <+>
+                    (hsep $ punctuate comma $ map ppr $ setElems ps)
+  ppr ProcPoint = text "<procpt>"
+
+--------------------------------------------------
+-- Proc point analysis
+
+-- Once you know what the proc-points are, figure out
+-- what proc-points each block is reachable from
+-- See Note [Proc-point analysis]
+procPointAnalysis :: ProcPointSet -> CmmGraph -> LabelMap Status
+procPointAnalysis procPoints cmmGraph@(CmmGraph {g_graph = graph}) =
+    analyzeCmmFwd procPointLattice procPointTransfer cmmGraph initProcPoints
+  where
+    initProcPoints =
+        mkFactBase
+            procPointLattice
+            [ (id, ProcPoint)
+            | id <- setElems procPoints
+            -- See Note [Non-existing proc-points]
+            , id `setMember` labelsInGraph
+            ]
+    labelsInGraph = labelsDefined graph
+
+procPointTransfer :: TransferFun Status
+procPointTransfer block facts =
+    let label = entryLabel block
+        !fact = case getFact procPointLattice label facts of
+            ProcPoint -> ReachedBy $! setSingleton label
+            f -> f
+        result = map (\id -> (id, fact)) (successors block)
+    in mkFactBase procPointLattice result
+
+procPointLattice :: DataflowLattice Status
+procPointLattice = DataflowLattice unreached add_to
+  where
+    unreached = ReachedBy setEmpty
+    add_to (OldFact ProcPoint) _ = NotChanged ProcPoint
+    add_to _ (NewFact ProcPoint) = Changed ProcPoint -- because of previous case
+    add_to (OldFact (ReachedBy p)) (NewFact (ReachedBy p'))
+        | setSize union > setSize p = Changed (ReachedBy union)
+        | otherwise = NotChanged (ReachedBy p)
+      where
+        union = setUnion p' p
+
+----------------------------------------------------------------------
+
+-- It is worth distinguishing two sets of proc points: those that are
+-- induced by calls in the original graph and those that are
+-- introduced because they're reachable from multiple proc points.
+--
+-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
+callProcPoints      :: CmmGraph -> ProcPointSet
+callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g
+  where add :: LabelSet -> CmmBlock -> LabelSet
+        add set b = case lastNode b of
+                      CmmCall {cml_cont = Just k} -> setInsert k set
+                      CmmForeignCall {succ=k}     -> setInsert k set
+                      _ -> set
+
+minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
+                    -> UniqSM ProcPointSet
+-- Given the set of successors of calls (which must be proc-points)
+-- figure out the minimal set of necessary proc-points
+minimalProcPointSet platform callProcPoints g
+  = extendPPSet platform g (revPostorder g) callProcPoints
+
+extendPPSet
+    :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
+extendPPSet platform g blocks procPoints =
+    let env = procPointAnalysis procPoints g
+        add pps block = let id = entryLabel block
+                        in  case mapLookup id env of
+                              Just ProcPoint -> setInsert id pps
+                              _ -> pps
+        procPoints' = foldlGraphBlocks add setEmpty g
+        newPoints = mapMaybe ppSuccessor blocks
+        newPoint  = listToMaybe newPoints
+        ppSuccessor b =
+            let nreached id = case mapLookup id env `orElse`
+                                    pprPanic "no ppt" (ppr id <+> ppr b) of
+                                ProcPoint -> 1
+                                ReachedBy ps -> setSize ps
+                block_procpoints = nreached (entryLabel b)
+                -- | Looking for a successor of b that is reached by
+                -- more proc points than b and is not already a proc
+                -- point.  If found, it can become a proc point.
+                newId succ_id = not (setMember succ_id procPoints') &&
+                                nreached succ_id > block_procpoints
+            in  listToMaybe $ filter newId $ successors b
+
+    in case newPoint of
+         Just id ->
+             if setMember id procPoints'
+                then panic "added old proc pt"
+                else extendPPSet platform g blocks (setInsert id procPoints')
+         Nothing -> return procPoints'
+
+
+-- At this point, we have found a set of procpoints, each of which should be
+-- the entry point of a procedure.
+-- Now, we create the procedure for each proc point,
+-- which requires that we:
+-- 1. build a map from proc points to the blocks reachable from the proc point
+-- 2. turn each branch to a proc point into a jump
+-- 3. turn calls and returns into jumps
+-- 4. build info tables for the procedures -- and update the info table for
+--    the SRTs in the entry procedure as well.
+-- Input invariant: A block should only be reachable from a single ProcPoint.
+-- ToDo: use the _ret naming convention that the old code generator
+-- used. -- EZY
+splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> LabelMap Status ->
+                     CmmDecl -> UniqSM [CmmDecl]
+splitAtProcPoints dflags entry_label callPPs procPoints procMap
+                  (CmmProc (TopInfo {info_tbls = info_tbls})
+                           top_l _ g@(CmmGraph {g_entry=entry})) =
+  do -- Build a map from procpoints to the blocks they reach
+     let add_block
+             :: LabelMap (LabelMap CmmBlock)
+             -> CmmBlock
+             -> LabelMap (LabelMap CmmBlock)
+         add_block graphEnv b =
+           case mapLookup bid procMap of
+             Just ProcPoint -> add graphEnv bid bid b
+             Just (ReachedBy set) ->
+               case setElems set of
+                 []   -> graphEnv
+                 [id] -> add graphEnv id bid b
+                 _    -> panic "Each block should be reachable from only one ProcPoint"
+             Nothing -> graphEnv
+           where bid = entryLabel b
+         add graphEnv procId bid b = mapInsert procId graph' graphEnv
+               where graph  = mapLookup procId graphEnv `orElse` mapEmpty
+                     graph' = mapInsert bid b graph
+
+     let liveness = cmmGlobalLiveness dflags g
+     let ppLiveness pp = filter isArgReg $
+                         regSetToList $
+                         expectJust "ppLiveness" $ mapLookup pp liveness
+
+     graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g
+
+     -- Build a map from proc point BlockId to pairs of:
+     --  * Labels for their new procedures
+     --  * Labels for the info tables of their new procedures (only if
+     --    the proc point is a callPP)
+     -- Due to common blockification, we may overestimate the set of procpoints.
+     let add_label map pp = mapInsert pp lbls map
+           where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls))
+                      | otherwise   = (block_lbl, guard (setMember pp callPPs) >>
+                                                    Just info_table_lbl)
+                      where block_lbl      = blockLbl pp
+                            info_table_lbl = infoTblLbl pp
+
+         procLabels :: LabelMap (CLabel, Maybe CLabel)
+         procLabels = foldl' add_label mapEmpty
+                             (filter (flip mapMember (toBlockMap g)) (setElems procPoints))
+
+     -- In each new graph, add blocks jumping off to the new procedures,
+     -- and replace branches to procpoints with branches to the jump-off blocks
+     let add_jump_block
+             :: (LabelMap Label, [CmmBlock])
+             -> (Label, CLabel)
+             -> UniqSM (LabelMap Label, [CmmBlock])
+         add_jump_block (env, bs) (pp, l) =
+           do bid <- liftM mkBlockId getUniqueM
+              let b = blockJoin (CmmEntry bid GlobalScope) emptyBlock jump
+                  live = ppLiveness pp
+                  jump = CmmCall (CmmLit (CmmLabel l)) Nothing live 0 0 0
+              return (mapInsert pp bid env, b : bs)
+
+         add_jumps
+             :: LabelMap CmmGraph
+             -> (Label, LabelMap CmmBlock)
+             -> UniqSM (LabelMap CmmGraph)
+         add_jumps newGraphEnv (ppId, blockEnv) =
+           do let needed_jumps = -- find which procpoints we currently branch to
+                    mapFoldr add_if_branch_to_pp [] blockEnv
+                  add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
+                  add_if_branch_to_pp block rst =
+                    case lastNode block of
+                      CmmBranch id          -> add_if_pp id rst
+                      CmmCondBranch _ ti fi _ -> add_if_pp ti (add_if_pp fi rst)
+                      CmmSwitch _ ids       -> foldr add_if_pp rst $ switchTargetsToList ids
+                      _                     -> rst
+
+                  -- when jumping to a PP that has an info table, if
+                  -- tablesNextToCode is off we must jump to the entry
+                  -- label instead.
+                  jump_label (Just info_lbl) _
+                             | tablesNextToCode dflags = info_lbl
+                             | otherwise               = toEntryLbl info_lbl
+                  jump_label Nothing         block_lbl = block_lbl
+
+                  add_if_pp id rst = case mapLookup id procLabels of
+                                       Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst
+                                       Nothing                 -> rst
+              (jumpEnv, jumpBlocks) <-
+                 foldM add_jump_block (mapEmpty, []) needed_jumps
+                  -- update the entry block
+              let b = expectJust "block in env" $ mapLookup ppId blockEnv
+                  blockEnv' = mapInsert ppId b blockEnv
+                  -- replace branches to procpoints with branches to jumps
+                  blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
+                  -- add the jump blocks to the graph
+                  blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks
+              let g' = ofBlockMap ppId blockEnv'''
+              -- pprTrace "g' pre jumps" (ppr g') $ do
+              return (mapInsert ppId g' newGraphEnv)
+
+     graphEnv <- foldM add_jumps mapEmpty $ mapToList graphEnv
+
+     let to_proc (bid, g)
+             | bid == entry
+             =  CmmProc (TopInfo {info_tbls  = info_tbls,
+                                  stack_info = stack_info})
+                        top_l live g'
+             | otherwise
+             = case expectJust "pp label" $ mapLookup bid procLabels of
+                 (lbl, Just info_lbl)
+                    -> CmmProc (TopInfo { info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl)
+                                        , stack_info=stack_info})
+                               lbl live g'
+                 (lbl, Nothing)
+                    -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info})
+                               lbl live g'
+                where
+                 g' = replacePPIds g
+                 live = ppLiveness (g_entry g')
+                 stack_info = StackInfo { arg_space = 0
+                                        , updfr_space =  Nothing
+                                        , do_layout = True }
+                               -- cannot use panic, this is printed by -ddump-cmm
+
+         -- References to procpoint IDs can now be replaced with the
+         -- infotable's label
+         replacePPIds g = {-# SCC "replacePPIds" #-}
+                          mapGraphNodes (id, mapExp repl, mapExp repl) g
+           where repl e@(CmmLit (CmmBlock bid)) =
+                   case mapLookup bid procLabels of
+                     Just (_, Just info_lbl)  -> CmmLit (CmmLabel info_lbl)
+                     _ -> e
+                 repl e = e
+
+     -- The C back end expects to see return continuations before the
+     -- call sites.  Here, we sort them in reverse order -- it gets
+     -- reversed later.
+     let (_, block_order) =
+             foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int)
+                   (revPostorder g)
+         add_block_num (i, map) block =
+           (i + 1, mapInsert (entryLabel block) i map)
+         sort_fn (bid, _) (bid', _) =
+           compare (expectJust "block_order" $ mapLookup bid  block_order)
+                   (expectJust "block_order" $ mapLookup bid' block_order)
+     procs <- return $ map to_proc $ sortBy sort_fn $ mapToList graphEnv
+     return -- pprTrace "procLabels" (ppr procLabels)
+            -- pprTrace "splitting graphs" (ppr procs)
+            procs
+splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t]
+
+-- Only called from GHC.Cmm.ProcPoint.splitAtProcPoints. NB. does a
+-- recursive lookup, see comment below.
+replaceBranches :: LabelMap BlockId -> CmmGraph -> CmmGraph
+replaceBranches env cmmg
+  = {-# SCC "replaceBranches" #-}
+    ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg
+  where
+    f block = replaceLastNode block $ last (lastNode block)
+
+    last :: CmmNode O C -> CmmNode O C
+    last (CmmBranch id)          = CmmBranch (lookup id)
+    last (CmmCondBranch e ti fi l) = CmmCondBranch e (lookup ti) (lookup fi) l
+    last (CmmSwitch e ids)       = CmmSwitch e (mapSwitchTargets lookup ids)
+    last l@(CmmCall {})          = l { cml_cont = Nothing }
+            -- NB. remove the continuation of a CmmCall, since this
+            -- label will now be in a different CmmProc.  Not only
+            -- is this tidier, it stops CmmLint from complaining.
+    last l@(CmmForeignCall {})   = l
+    lookup id = fmap lookup (mapLookup id env) `orElse` id
+            -- XXX: this is a recursive lookup, it follows chains
+            -- until the lookup returns Nothing, at which point we
+            -- return the last BlockId
+
+-- --------------------------------------------------------------
+-- Not splitting proc points: add info tables for continuations
+
+attachContInfoTables :: ProcPointSet -> CmmDecl -> CmmDecl
+attachContInfoTables call_proc_points (CmmProc top_info top_l live g)
+ = CmmProc top_info{info_tbls = info_tbls'} top_l live g
+ where
+   info_tbls' = mapUnion (info_tbls top_info) $
+                mapFromList [ (l, mkEmptyContInfoTable (infoTblLbl l))
+                            | l <- setElems call_proc_points
+                            , l /= g_entry g ]
+attachContInfoTables _ other_decl
+ = other_decl
+
+----------------------------------------------------------------
+
+{-
+Note [Direct reachability]
+
+Block B is directly reachable from proc point P iff control can flow
+from P to B without passing through an intervening proc point.
+-}
+
+----------------------------------------------------------------
+
+{-
+Note [No simple dataflow]
+
+Sadly, it seems impossible to compute the proc points using a single
+dataflow pass.  One might attempt to use this simple lattice:
+
+  data Location = Unknown
+                | InProc BlockId -- node is in procedure headed by the named proc point
+                | ProcPoint      -- node is itself a proc point
+
+At a join, a node in two different blocks becomes a proc point.
+The difficulty is that the change of information during iterative
+computation may promote a node prematurely.  Here's a program that
+illustrates the difficulty:
+
+  f () {
+  entry:
+    ....
+  L1:
+    if (...) { ... }
+    else { ... }
+
+  L2: if (...) { g(); goto L1; }
+      return x + y;
+  }
+
+The only proc-point needed (besides the entry) is L1.  But in an
+iterative analysis, consider what happens to L2.  On the first pass
+through, it rises from Unknown to 'InProc entry', but when L1 is
+promoted to a proc point (because it's the successor of g()), L1's
+successors will be promoted to 'InProc L1'.  The problem hits when the
+new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
+The join operation makes it a proc point when in fact it needn't be,
+because its immediate dominator L1 is already a proc point and there
+are no other proc points that directly reach L2.
+-}
+
+
+
+{- Note [Separate Adams optimization]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It may be worthwhile to attempt the Adams optimization by rewriting
+the graph before the assignment of proc-point protocols.  Here are a
+couple of rules:
+
+  g() returns to k;                    g() returns to L;
+  k: CopyIn c ress; goto L:
+   ...                        ==>        ...
+  L: // no CopyIn node here            L: CopyIn c ress;
+
+
+And when c == c' and ress == ress', this also:
+
+  g() returns to k;                    g() returns to L;
+  k: CopyIn c ress; goto L:
+   ...                        ==>        ...
+  L: CopyIn c' ress'                   L: CopyIn c' ress' ;
+
+In both cases the goal is to eliminate k.
+-}
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
new file mode 100644
index 0000000000..8e231df300
--- /dev/null
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -0,0 +1,854 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Sink (
+     cmmSink
+  ) where
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Opt
+import GHC.Cmm.Liveness
+import GHC.Cmm.Utils
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Platform.Regs
+import GHC.Platform (isARM, platformArch)
+
+import DynFlags
+import Unique
+import UniqFM
+
+import qualified Data.IntSet as IntSet
+import Data.List (partition)
+import qualified Data.Set as Set
+import Data.Maybe
+
+-- Compact sets for membership tests of local variables.
+
+type LRegSet = IntSet.IntSet
+
+emptyLRegSet :: LRegSet
+emptyLRegSet = IntSet.empty
+
+nullLRegSet :: LRegSet -> Bool
+nullLRegSet = IntSet.null
+
+insertLRegSet :: LocalReg -> LRegSet -> LRegSet
+insertLRegSet l = IntSet.insert (getKey (getUnique l))
+
+elemLRegSet :: LocalReg -> LRegSet -> Bool
+elemLRegSet l = IntSet.member (getKey (getUnique l))
+
+-- -----------------------------------------------------------------------------
+-- Sinking and inlining
+
+-- This is an optimisation pass that
+--  (a) moves assignments closer to their uses, to reduce register pressure
+--  (b) pushes assignments into a single branch of a conditional if possible
+--  (c) inlines assignments to registers that are mentioned only once
+--  (d) discards dead assignments
+--
+-- This tightens up lots of register-heavy code.  It is particularly
+-- helpful in the Cmm generated by the Stg->Cmm code generator, in
+-- which every function starts with a copyIn sequence like:
+--
+--    x1 = R1
+--    x2 = Sp[8]
+--    x3 = Sp[16]
+--    if (Sp - 32 < SpLim) then L1 else L2
+--
+-- we really want to push the x1..x3 assignments into the L2 branch.
+--
+-- Algorithm:
+--
+--  * Start by doing liveness analysis.
+--
+--  * Keep a list of assignments A; earlier ones may refer to later ones.
+--    Currently we only sink assignments to local registers, because we don't
+--    have liveness information about global registers.
+--
+--  * Walk forwards through the graph, look at each node N:
+--
+--    * If it is a dead assignment, i.e. assignment to a register that is
+--      not used after N, discard it.
+--
+--    * Try to inline based on current list of assignments
+--      * If any assignments in A (1) occur only once in N, and (2) are
+--        not live after N, inline the assignment and remove it
+--        from A.
+--
+--      * If an assignment in A is cheap (RHS is local register), then
+--        inline the assignment and keep it in A in case it is used afterwards.
+--
+--      * Otherwise don't inline.
+--
+--    * If N is assignment to a local register pick up the assignment
+--      and add it to A.
+--
+--    * If N is not an assignment to a local register:
+--      * remove any assignments from A that conflict with N, and
+--        place them before N in the current block.  We call this
+--        "dropping" the assignments.
+--
+--      * An assignment conflicts with N if it:
+--        - assigns to a register mentioned in N
+--        - mentions a register assigned by N
+--        - reads from memory written by N
+--      * do this recursively, dropping dependent assignments
+--
+--    * At an exit node:
+--      * drop any assignments that are live on more than one successor
+--        and are not trivial
+--      * if any successor has more than one predecessor (a join-point),
+--        drop everything live in that successor. Since we only propagate
+--        assignments that are not dead at the successor, we will therefore
+--        eliminate all assignments dead at this point. Thus analysis of a
+--        join-point will always begin with an empty list of assignments.
+--
+--
+-- As a result of above algorithm, sinking deletes some dead assignments
+-- (transitively, even).  This isn't as good as removeDeadAssignments,
+-- but it's much cheaper.
+
+-- -----------------------------------------------------------------------------
+-- things that we aren't optimising very well yet.
+--
+-- -----------
+-- (1) From GHC's FastString.hashStr:
+--
+--  s2ay:
+--      if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
+--  c2gn:
+--      R1 = _s2au::I64;
+--      call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
+--  c2gp:
+--      _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
+--                                 4091);
+--      _s2an::I64 = _s2an::I64 + 1;
+--      _s2au::I64 = _s2cO::I64;
+--      goto s2ay;
+--
+-- a nice loop, but we didn't eliminate the silly assignment at the end.
+-- See Note [dependent assignments], which would probably fix this.
+-- This is #8336.
+--
+-- -----------
+-- (2) From stg_atomically_frame in PrimOps.cmm
+--
+-- We have a diamond control flow:
+--
+--     x = ...
+--       |
+--      / \
+--     A   B
+--      \ /
+--       |
+--    use of x
+--
+-- Now x won't be sunk down to its use, because we won't push it into
+-- both branches of the conditional.  We certainly do have to check
+-- that we can sink it past all the code in both A and B, but having
+-- discovered that, we could sink it to its use.
+--
+
+-- -----------------------------------------------------------------------------
+
+type Assignment = (LocalReg, CmmExpr, AbsMem)
+  -- Assignment caches AbsMem, an abstraction of the memory read by
+  -- the RHS of the assignment.
+
+type Assignments = [Assignment]
+  -- A sequence of assignments; kept in *reverse* order
+  -- So the list [ x=e1, y=e2 ] means the sequence of assignments
+  --     y = e2
+  --     x = e1
+
+cmmSink :: DynFlags -> CmmGraph -> CmmGraph
+cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
+  where
+  liveness = cmmLocalLiveness dflags graph
+  getLive l = mapFindWithDefault Set.empty l liveness
+
+  blocks = revPostorder graph
+
+  join_pts = findJoinPoints blocks
+
+  sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock]
+  sink _ [] = []
+  sink sunk (b:bs) =
+    -- pprTrace "sink" (ppr lbl) $
+    blockJoin first final_middle final_last : sink sunk' bs
+    where
+      lbl = entryLabel b
+      (first, middle, last) = blockSplit b
+
+      succs = successors last
+
+      -- Annotate the middle nodes with the registers live *after*
+      -- the node.  This will help us decide whether we can inline
+      -- an assignment in the current node or not.
+      live = Set.unions (map getLive succs)
+      live_middle = gen_kill dflags last live
+      ann_middles = annotate dflags live_middle (blockToList middle)
+
+      -- Now sink and inline in this block
+      (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
+      fold_last = constantFoldNode dflags last
+      (final_last, assigs') = tryToInline dflags live fold_last assigs
+
+      -- We cannot sink into join points (successors with more than
+      -- one predecessor), so identify the join points and the set
+      -- of registers live in them.
+      (joins, nonjoins) = partition (`mapMember` join_pts) succs
+      live_in_joins = Set.unions (map getLive joins)
+
+      -- We do not want to sink an assignment into multiple branches,
+      -- so identify the set of registers live in multiple successors.
+      -- This is made more complicated because when we sink an assignment
+      -- into one branch, this might change the set of registers that are
+      -- now live in multiple branches.
+      init_live_sets = map getLive nonjoins
+      live_in_multi live_sets r =
+         case filter (Set.member r) live_sets of
+           (_one:_two:_) -> True
+           _ -> False
+
+      -- Now, drop any assignments that we will not sink any further.
+      (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
+
+      drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
+          where
+            should_drop =  conflicts dflags a final_last
+                        || not (isTrivial dflags rhs) && live_in_multi live_sets r
+                        || r `Set.member` live_in_joins
+
+            live_sets' | should_drop = live_sets
+                       | otherwise   = map upd live_sets
+
+            upd set | r `Set.member` set = set `Set.union` live_rhs
+                    | otherwise          = set
+
+            live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
+
+      final_middle = foldl' blockSnoc middle' dropped_last
+
+      sunk' = mapUnion sunk $
+                 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
+                             | l <- succs ]
+
+{- TODO: enable this later, when we have some good tests in place to
+   measure the effect and tune it.
+
+-- small: an expression we don't mind duplicating
+isSmall :: CmmExpr -> Bool
+isSmall (CmmReg (CmmLocal _)) = True  --
+isSmall (CmmLit _) = True
+isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
+isSmall (CmmRegOff (CmmLocal _) _) = True
+isSmall _ = False
+-}
+
+--
+-- We allow duplication of trivial expressions: registers (both local and
+-- global) and literals.
+--
+isTrivial :: DynFlags -> CmmExpr -> Bool
+isTrivial _ (CmmReg (CmmLocal _)) = True
+isTrivial dflags (CmmReg (CmmGlobal r)) = -- see Note [Inline GlobalRegs?]
+  if isARM (platformArch (targetPlatform dflags))
+  then True -- CodeGen.Platform.ARM does not have globalRegMaybe
+  else isJust (globalRegMaybe (targetPlatform dflags) r)
+  -- GlobalRegs that are loads from BaseReg are not trivial
+isTrivial _ (CmmLit _) = True
+isTrivial _ _          = False
+
+--
+-- annotate each node with the set of registers live *after* the node
+--
+annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
+annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
+  where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
+
+--
+-- Find the blocks that have multiple successors (join points)
+--
+findJoinPoints :: [CmmBlock] -> LabelMap Int
+findJoinPoints blocks = mapFilter (>1) succ_counts
+ where
+  all_succs = concatMap successors blocks
+
+  succ_counts :: LabelMap Int
+  succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
+
+--
+-- filter the list of assignments to remove any assignments that
+-- are not live in a continuation.
+--
+filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
+filterAssignments dflags live assigs = reverse (go assigs [])
+  where go []             kept = kept
+        go (a@(r,_,_):as) kept | needed    = go as (a:kept)
+                               | otherwise = go as kept
+           where
+              needed = r `Set.member` live
+                       || any (conflicts dflags a) (map toNode kept)
+                       --  Note that we must keep assignments that are
+                       -- referred to by other assignments we have
+                       -- already kept.
+
+-- -----------------------------------------------------------------------------
+-- Walk through the nodes of a block, sinking and inlining assignments
+-- as we go.
+--
+-- On input we pass in a:
+--    * list of nodes in the block
+--    * a list of assignments that appeared *before* this block and
+--      that are being sunk.
+--
+-- On output we get:
+--    * a new block
+--    * a list of assignments that will be placed *after* that block.
+--
+
+walk :: DynFlags
+     -> [(LocalRegSet, CmmNode O O)]    -- nodes of the block, annotated with
+                                        -- the set of registers live *after*
+                                        -- this node.
+
+     -> Assignments                     -- The current list of
+                                        -- assignments we are sinking.
+                                        -- Earlier assignments may refer
+                                        -- to later ones.
+
+     -> ( Block CmmNode O O             -- The new block
+        , Assignments                   -- Assignments to sink further
+        )
+
+walk dflags nodes assigs = go nodes emptyBlock assigs
+ where
+   go []               block as = (block, as)
+   go ((live,node):ns) block as
+    | shouldDiscard node live           = go ns block as
+       -- discard dead assignment
+    | Just a <- shouldSink dflags node2 = go ns block (a : as1)
+    | otherwise                         = go ns block' as'
+    where
+      node1 = constantFoldNode dflags node
+
+      (node2, as1) = tryToInline dflags live node1 as
+
+      (dropped, as') = dropAssignmentsSimple dflags
+                          (\a -> conflicts dflags a node2) as1
+
+      block' = foldl' blockSnoc block dropped `blockSnoc` node2
+
+
+--
+-- Heuristic to decide whether to pick up and sink an assignment
+-- Currently we pick up all assignments to local registers.  It might
+-- be profitable to sink assignments to global regs too, but the
+-- liveness analysis doesn't track those (yet) so we can't.
+--
+shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
+shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
+  where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
+shouldSink _ _other = Nothing
+
+--
+-- discard dead assignments.  This doesn't do as good a job as
+-- removeDeadAssignments, because it would need multiple passes
+-- to get all the dead code, but it catches the common case of
+-- superfluous reloads from the stack that the stack allocator
+-- leaves behind.
+--
+-- Also we catch "r = r" here.  You might think it would fall
+-- out of inlining, but the inliner will see that r is live
+-- after the instruction and choose not to inline r in the rhs.
+--
+shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
+shouldDiscard node live
+   = case node of
+       CmmAssign r (CmmReg r') | r == r' -> True
+       CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
+       _otherwise -> False
+
+
+toNode :: Assignment -> CmmNode O O
+toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
+
+dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
+                      -> ([CmmNode O O], Assignments)
+dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
+
+dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
+                -> ([CmmNode O O], Assignments)
+dropAssignments dflags should_drop state assigs
+ = (dropped, reverse kept)
+ where
+   (dropped,kept) = go state assigs [] []
+
+   go _ []             dropped kept = (dropped, kept)
+   go state (assig : rest) dropped kept
+      | conflict  = go state' rest (toNode assig : dropped) kept
+      | otherwise = go state' rest dropped (assig:kept)
+      where
+        (dropit, state') = should_drop assig state
+        conflict = dropit || any (conflicts dflags assig) dropped
+
+
+-- -----------------------------------------------------------------------------
+-- Try to inline assignments into a node.
+-- This also does constant folding for primpops, since
+-- inlining opens up opportunities for doing so.
+
+tryToInline
+   :: DynFlags
+   -> LocalRegSet               -- set of registers live after this
+                                -- node.  We cannot inline anything
+                                -- that is live after the node, unless
+                                -- it is small enough to duplicate.
+   -> CmmNode O x               -- The node to inline into
+   -> Assignments               -- Assignments to inline
+   -> (
+        CmmNode O x             -- New node
+      , Assignments             -- Remaining assignments
+      )
+
+tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
+ where
+  usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
+  usages = foldLocalRegsUsed dflags addUsage emptyUFM node
+
+  go _usages node _skipped [] = (node, [])
+
+  go usages node skipped (a@(l,rhs,_) : rest)
+   | cannot_inline           = dont_inline
+   | occurs_none             = discard  -- Note [discard during inlining]
+   | occurs_once             = inline_and_discard
+   | isTrivial dflags rhs    = inline_and_keep
+   | otherwise               = dont_inline
+   where
+        inline_and_discard = go usages' inl_node skipped rest
+          where usages' = foldLocalRegsUsed dflags addUsage usages rhs
+
+        discard = go usages node skipped rest
+
+        dont_inline        = keep node  -- don't inline the assignment, keep it
+        inline_and_keep    = keep inl_node -- inline the assignment, keep it
+
+        keep node' = (final_node, a : rest')
+          where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
+                usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
+                                            usages rhs
+                -- we must not inline anything that is mentioned in the RHS
+                -- of a binding that we have already skipped, so we set the
+                -- usages of the regs on the RHS to 2.
+
+        cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
+                        || l `elemLRegSet` skipped
+                        || not (okToInline dflags rhs node)
+
+        l_usages = lookupUFM usages l
+        l_live   = l `elemRegSet` live
+
+        occurs_once = not l_live && l_usages == Just 1
+        occurs_none = not l_live && l_usages == Nothing
+
+        inl_node = improveConditional (mapExpDeep inl_exp node)
+
+        inl_exp :: CmmExpr -> CmmExpr
+        -- inl_exp is where the inlining actually takes place!
+        inl_exp (CmmReg    (CmmLocal l'))     | l == l' = rhs
+        inl_exp (CmmRegOff (CmmLocal l') off) | l == l'
+                    = cmmOffset dflags rhs off
+                    -- re-constant fold after inlining
+        inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args
+        inl_exp other = other
+
+
+{- Note [improveConditional]
+
+cmmMachOpFold tries to simplify conditionals to turn things like
+  (a == b) != 1
+into
+  (a != b)
+but there's one case it can't handle: when the comparison is over
+floating-point values, we can't invert it, because floating-point
+comparisons aren't invertible (because of NaNs).
+
+But we *can* optimise this conditional by swapping the true and false
+branches. Given
+  CmmCondBranch ((a >## b) != 1) t f
+we can turn it into
+  CmmCondBranch (a >## b) f t
+
+So here we catch conditionals that weren't optimised by cmmMachOpFold,
+and apply above transformation to eliminate the comparison against 1.
+
+It's tempting to just turn every != into == and then let cmmMachOpFold
+do its thing, but that risks changing a nice fall-through conditional
+into one that requires two jumps. (see swapcond_last in
+GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where
+we can eliminate a comparison.
+-}
+improveConditional :: CmmNode O x -> CmmNode O x
+improveConditional
+  (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l)
+  | neLike mop, isComparisonExpr x
+  = CmmCondBranch x f t (fmap not l)
+  where
+    neLike (MO_Ne _) = True
+    neLike (MO_U_Lt _) = True   -- (x<y) < 1 behaves like (x<y) != 1
+    neLike (MO_S_Lt _) = True   -- (x<y) < 1 behaves like (x<y) != 1
+    neLike _ = False
+improveConditional other = other
+
+-- Note [dependent assignments]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- If our assignment list looks like
+--
+--    [ y = e,  x = ... y ... ]
+--
+-- We cannot inline x.  Remember this list is really in reverse order,
+-- so it means  x = ... y ...; y = e
+--
+-- Hence if we inline x, the outer assignment to y will capture the
+-- reference in x's right hand side.
+--
+-- In this case we should rename the y in x's right-hand side,
+-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
+-- Now we can go ahead and inline x.
+--
+-- For now we do nothing, because this would require putting
+-- everything inside UniqSM.
+--
+-- One more variant of this (#7366):
+--
+--   [ y = e, y = z ]
+--
+-- If we don't want to inline y = e, because y is used many times, we
+-- might still be tempted to inline y = z (because we always inline
+-- trivial rhs's).  But of course we can't, because y is equal to e,
+-- not z.
+
+-- Note [discard during inlining]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Opportunities to discard assignments sometimes appear after we've
+-- done some inlining.  Here's an example:
+--
+--      x = R1;
+--      y = P64[x + 7];
+--      z = P64[x + 15];
+--      /* z is dead */
+--      R1 = y & (-8);
+--
+-- The x assignment is trivial, so we inline it in the RHS of y, and
+-- keep both x and y.  z gets dropped because it is dead, then we
+-- inline y, and we have a dead assignment to x.  If we don't notice
+-- that x is dead in tryToInline, we end up retaining it.
+
+addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
+addUsage m r = addToUFM_C (+) m r 1
+
+regsUsedIn :: LRegSet -> CmmExpr -> Bool
+regsUsedIn ls _ | nullLRegSet ls = False
+regsUsedIn ls e = wrapRecExpf f e False
+  where f (CmmReg (CmmLocal l))      _ | l `elemLRegSet` ls = True
+        f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
+        f _ z = z
+
+-- we don't inline into CmmUnsafeForeignCall if the expression refers
+-- to global registers.  This is a HACK to avoid global registers
+-- clashing with C argument-passing registers, really the back-end
+-- ought to be able to handle it properly, but currently neither PprC
+-- nor the NCG can do it.  See Note [Register parameter passing]
+-- See also GHC.StgToCmm.Foreign.load_args_into_temps.
+okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
+    not (globalRegistersConflict dflags expr node)
+okToInline _ _ _ = True
+
+-- -----------------------------------------------------------------------------
+
+-- | @conflicts (r,e) node@ is @False@ if and only if the assignment
+-- @r = e@ can be safely commuted past statement @node@.
+conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
+conflicts dflags (r, rhs, addr) node
+
+  -- (1) node defines registers used by rhs of assignment. This catches
+  -- assignments and all three kinds of calls. See Note [Sinking and calls]
+  | globalRegistersConflict dflags rhs node                       = True
+  | localRegistersConflict  dflags rhs node                       = True
+
+  -- (2) node uses register defined by assignment
+  | foldRegsUsed dflags (\b r' -> r == r' || b) False node        = True
+
+  -- (3) a store to an address conflicts with a read of the same memory
+  | CmmStore addr' e <- node
+  , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
+
+  -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
+  | HeapMem    <- addr, CmmAssign (CmmGlobal Hp) _ <- node        = True
+  | StackMem   <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
+  | SpMem{}    <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
+
+  -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
+  | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem      = True
+
+  -- (6) native calls clobber any memory
+  | CmmCall{} <- node, memConflicts addr AnyMem                   = True
+
+  -- (7) otherwise, no conflict
+  | otherwise = False
+
+-- Returns True if node defines any global registers that are used in the
+-- Cmm expression
+globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+globalRegistersConflict dflags expr node =
+    foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
+                 False node
+
+-- Returns True if node defines any local registers that are used in the
+-- Cmm expression
+localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+localRegistersConflict dflags expr node =
+    foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal  r) expr)
+                 False node
+
+-- Note [Sinking and calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
+-- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
+-- stack layout (see Note [Sinking after stack layout]) which leads to two
+-- invariants related to calls:
+--
+--   a) during stack layout phase all safe foreign calls are turned into
+--      unsafe foreign calls (see Note [Lower safe foreign calls]). This
+--      means that we will never encounter CmmForeignCall node when running
+--      sinking after stack layout
+--
+--   b) stack layout saves all variables live across a call on the stack
+--      just before making a call (remember we are not sinking assignments to
+--      stack):
+--
+--       L1:
+--          x = R1
+--          P64[Sp - 16] = L2
+--          P64[Sp - 8]  = x
+--          Sp = Sp - 16
+--          call f() returns L2
+--       L2:
+--
+--      We will attempt to sink { x = R1 } but we will detect conflict with
+--      { P64[Sp - 8]  = x } and hence we will drop { x = R1 } without even
+--      checking whether it conflicts with { call f() }. In this way we will
+--      never need to check any assignment conflicts with CmmCall. Remember
+--      that we still need to check for potential memory conflicts.
+--
+-- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
+-- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
+-- This assumption holds only when we do sinking after stack layout. If we run
+-- it before stack layout we need to check for possible conflicts with all three
+-- kinds of calls. Our `conflicts` function does that by using a generic
+-- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
+-- UserOfRegs typeclasses.
+--
+
+-- An abstraction of memory read or written.
+data AbsMem
+  = NoMem            -- no memory accessed
+  | AnyMem           -- arbitrary memory
+  | HeapMem          -- definitely heap memory
+  | StackMem         -- definitely stack memory
+  | SpMem            -- <size>[Sp+n]
+       {-# UNPACK #-} !Int
+       {-# UNPACK #-} !Int
+
+-- Having SpMem is important because it lets us float loads from Sp
+-- past stores to Sp as long as they don't overlap, and this helps to
+-- unravel some long sequences of
+--    x1 = [Sp + 8]
+--    x2 = [Sp + 16]
+--    ...
+--    [Sp + 8]  = xi
+--    [Sp + 16] = xj
+--
+-- Note that SpMem is invalidated if Sp is changed, but the definition
+-- of 'conflicts' above handles that.
+
+-- ToDo: this won't currently fix the following commonly occurring code:
+--    x1 = [R1 + 8]
+--    x2 = [R1 + 16]
+--    ..
+--    [Hp - 8] = x1
+--    [Hp - 16] = x2
+--    ..
+
+-- because [R1 + 8] and [Hp - 8] are both HeapMem.  We know that
+-- assignments to [Hp + n] do not conflict with any other heap memory,
+-- but this is tricky to nail down.  What if we had
+--
+--   x = Hp + n
+--   [x] = ...
+--
+--  the store to [x] should be "new heap", not "old heap".
+--  Furthermore, you could imagine that if we started inlining
+--  functions in Cmm then there might well be reads of heap memory
+--  that was written in the same basic block.  To take advantage of
+--  non-aliasing of heap memory we will have to be more clever.
+
+-- Note [Foreign calls clobber heap]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- It is tempting to say that foreign calls clobber only
+-- non-heap/stack memory, but unfortunately we break this invariant in
+-- the RTS.  For example, in stg_catch_retry_frame we call
+-- stmCommitNestedTransaction() which modifies the contents of the
+-- TRec it is passed (this actually caused incorrect code to be
+-- generated).
+--
+-- Since the invariant is true for the majority of foreign calls,
+-- perhaps we ought to have a special annotation for calls that can
+-- modify heap/stack memory.  For now we just use the conservative
+-- definition here.
+--
+-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and
+-- therefore we should never float any memory operations across one of
+-- these calls.
+
+
+bothMems :: AbsMem -> AbsMem -> AbsMem
+bothMems NoMem    x         = x
+bothMems x        NoMem     = x
+bothMems HeapMem  HeapMem   = HeapMem
+bothMems StackMem StackMem     = StackMem
+bothMems (SpMem o1 w1) (SpMem o2 w2)
+  | o1 == o2  = SpMem o1 (max w1 w2)
+  | otherwise = StackMem
+bothMems SpMem{}  StackMem  = StackMem
+bothMems StackMem SpMem{}   = StackMem
+bothMems _         _        = AnyMem
+
+memConflicts :: AbsMem -> AbsMem -> Bool
+memConflicts NoMem      _          = False
+memConflicts _          NoMem      = False
+memConflicts HeapMem    StackMem   = False
+memConflicts StackMem   HeapMem    = False
+memConflicts SpMem{}    HeapMem    = False
+memConflicts HeapMem    SpMem{}    = False
+memConflicts (SpMem o1 w1) (SpMem o2 w2)
+  | o1 < o2   = o1 + w1 > o2
+  | otherwise = o2 + w2 > o1
+memConflicts _         _         = True
+
+exprMem :: DynFlags -> CmmExpr -> AbsMem
+exprMem dflags (CmmLoad addr w)  = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
+exprMem dflags (CmmMachOp _ es)  = foldr bothMems NoMem (map (exprMem dflags) es)
+exprMem _      _                 = NoMem
+
+loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
+loadAddr dflags e w =
+  case e of
+   CmmReg r       -> regAddr dflags r 0 w
+   CmmRegOff r i  -> regAddr dflags r i w
+   _other | regUsedIn dflags spReg e -> StackMem
+          | otherwise -> AnyMem
+
+regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
+regAddr _      (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
+regAddr _      (CmmGlobal Hp) _ _ = HeapMem
+regAddr _      (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
+regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
+regAddr _      _ _ _ = AnyMem
+
+{-
+Note [Inline GlobalRegs?]
+
+Should we freely inline GlobalRegs?
+
+Actually it doesn't make a huge amount of difference either way, so we
+*do* currently treat GlobalRegs as "trivial" and inline them
+everywhere, but for what it's worth, here is what I discovered when I
+(SimonM) looked into this:
+
+Common sense says we should not inline GlobalRegs, because when we
+have
+
+  x = R1
+
+the register allocator will coalesce this assignment, generating no
+code, and simply record the fact that x is bound to $rbx (or
+whatever).  Furthermore, if we were to sink this assignment, then the
+range of code over which R1 is live increases, and the range of code
+over which x is live decreases.  All things being equal, it is better
+for x to be live than R1, because R1 is a fixed register whereas x can
+live in any register.  So we should neither sink nor inline 'x = R1'.
+
+However, not inlining GlobalRegs can have surprising
+consequences. e.g. (cgrun020)
+
+  c3EN:
+      _s3DB::P64 = R1;
+      _c3ES::P64 = _s3DB::P64 & 7;
+      if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV;
+  c3EU:
+      _s3DD::P64 = P64[_s3DB::P64 + 6];
+      _s3DE::P64 = P64[_s3DB::P64 + 14];
+      I64[Sp - 8] = c3F0;
+      R1 = _s3DE::P64;
+      P64[Sp] = _s3DD::P64;
+
+inlining the GlobalReg gives:
+
+  c3EN:
+      if (R1 & 7 >= 2) goto c3EU; else goto c3EV;
+  c3EU:
+      I64[Sp - 8] = c3F0;
+      _s3DD::P64 = P64[R1 + 6];
+      R1 = P64[R1 + 14];
+      P64[Sp] = _s3DD::P64;
+
+but if we don't inline the GlobalReg, instead we get:
+
+      _s3DB::P64 = R1;
+      if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV;
+  c3EU:
+      I64[Sp - 8] = c3F0;
+      R1 = P64[_s3DB::P64 + 14];
+      P64[Sp] = P64[_s3DB::P64 + 6];
+
+This looks better - we managed to inline _s3DD - but in fact it
+generates an extra reg-reg move:
+
+.Lc3EU:
+        movq $c3F0_info,-8(%rbp)
+        movq %rbx,%rax
+        movq 14(%rbx),%rbx
+        movq 6(%rax),%rax
+        movq %rax,(%rbp)
+
+because _s3DB is now live across the R1 assignment, we lost the
+benefit of coalescing.
+
+Who is at fault here?  Perhaps if we knew that _s3DB was an alias for
+R1, then we would not sink a reference to _s3DB past the R1
+assignment.  Or perhaps we *should* do that - we might gain by sinking
+it, despite losing the coalescing opportunity.
+
+Sometimes not inlining global registers wins by virtue of the rule
+about not inlining into arguments of a foreign call, e.g. (T7163) this
+is what happens when we inlined F1:
+
+      _s3L2::F32 = F1;
+      _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32);
+      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(_c3O3::F32);
+
+but if we don't inline F1:
+
+      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(%MO_F_Mul_W32(_s3L2::F32,
+                                                                                            10.0 :: W32));
+-}
diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs
new file mode 100644
index 0000000000..e89fadfd2e
--- /dev/null
+++ b/compiler/GHC/Cmm/Switch.hs
@@ -0,0 +1,502 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Switch (
+     SwitchTargets,
+     mkSwitchTargets,
+     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
+     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
+     switchTargetsToList, eqSwitchTargetWith,
+
+     SwitchPlan(..),
+     targetSupportsSwitch,
+     createSwitchPlan,
+  ) where
+
+import GhcPrelude
+
+import Outputable
+import DynFlags
+import GHC.Cmm.Dataflow.Label (Label)
+
+import Data.Maybe
+import Data.List (groupBy)
+import Data.Function (on)
+import qualified Data.Map as M
+
+-- Note [Cmm Switches, the general plan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Compiling a high-level switch statement, as it comes out of a STG case
+-- expression, for example, allows for a surprising amount of design decisions.
+-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
+-- well as from the actual code generation.
+--
+-- The overall plan is:
+--  * The Stg → Cmm transformation creates a single `SwitchTargets` in
+--    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
+--    At this stage, they are unsuitable for code generation.
+--  * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
+--    switch statements with code that is suitable for code generation, i.e.
+--    a nice balanced tree of decisions with dense jump tables in the leafs.
+--    The actual planning of this tree is performed in pure code in createSwitchPlan
+--    in this module. See Note [createSwitchPlan].
+--  * The actual code generation will not do any further processing and
+--    implement each CmmSwitch with a jump tables.
+--
+-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
+-- statements alone, as we can turn a SwitchTargets value into a nice
+-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
+--
+-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
+-- separated.
+
+-----------------------------------------------------------------------------
+-- Note [Magic Constants in GHC.Cmm.Switch]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- There are a lot of heuristics here that depend on magic values where it is
+-- hard to determine the "best" value (for whatever that means). These are the
+-- magic values:
+
+-- | Number of consecutive default values allowed in a jump table. If there are
+-- more of them, the jump tables are split.
+--
+-- Currently 7, as it costs 7 words of additional code when a jump table is
+-- split (at least on x64, determined experimentally).
+maxJumpTableHole :: Integer
+maxJumpTableHole = 7
+
+-- | Minimum size of a jump table. If the number is smaller, the switch is
+-- implemented using conditionals.
+-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
+minJumpTableSize :: Int
+minJumpTableSize = 5
+
+-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
+minJumpTableOffset :: Integer
+minJumpTableOffset = 2
+
+
+-----------------------------------------------------------------------------
+-- Switch Targets
+
+-- Note [SwitchTargets]
+-- ~~~~~~~~~~~~~~~~~~~~
+--
+-- The branches of a switch are stored in a SwitchTargets, which consists of an
+-- (optional) default jump target, and a map from values to jump targets.
+--
+-- If the default jump target is absent, the behaviour of the switch outside the
+-- values of the map is undefined.
+--
+-- We use an Integer for the keys the map so that it can be used in switches on
+-- unsigned as well as signed integers.
+--
+-- The map may be empty (we prune out-of-range branches here, so it could be us
+-- emptying it).
+--
+-- Before code generation, the table needs to be brought into a form where all
+-- entries are non-negative, so that it can be compiled into a jump table.
+-- See switchTargetsToTable.
+
+
+-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
+-- value, and knows whether the value is signed, the possible range, an
+-- optional default value and a map from values to jump labels.
+data SwitchTargets =
+    SwitchTargets
+        Bool                       -- Signed values
+        (Integer, Integer)         -- Range
+        (Maybe Label)              -- Default value
+        (M.Map Integer Label)      -- The branches
+    deriving (Show, Eq)
+
+-- | The smart constructor mkSwitchTargets normalises the map a bit:
+--  * No entries outside the range
+--  * No entries equal to the default
+--  * No default if all elements have explicit values
+mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
+mkSwitchTargets signed range@(lo,hi) mbdef ids
+    = SwitchTargets signed range mbdef' ids'
+  where
+    ids' = dropDefault $ restrict ids
+    mbdef' | defaultNeeded = mbdef
+           | otherwise     = Nothing
+
+    -- Drop entries outside the range, if there is a range
+    restrict = restrictMap (lo,hi)
+
+    -- Drop entries that equal the default, if there is a default
+    dropDefault | Just l <- mbdef = M.filter (/= l)
+                | otherwise       = id
+
+    -- Check if the default is still needed
+    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
+
+
+-- | Changes all labels mentioned in the SwitchTargets value
+mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
+mapSwitchTargets f (SwitchTargets signed range mbdef branches)
+    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
+
+-- | Returns the list of non-default branches of the SwitchTargets value
+switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
+switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
+
+-- | Return the default label of the SwitchTargets value
+switchTargetsDefault :: SwitchTargets -> Maybe Label
+switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
+
+-- | Return the range of the SwitchTargets value
+switchTargetsRange :: SwitchTargets -> (Integer, Integer)
+switchTargetsRange (SwitchTargets _ range _ _) = range
+
+-- | Return whether this is used for a signed value
+switchTargetsSigned :: SwitchTargets -> Bool
+switchTargetsSigned (SwitchTargets signed _ _ _) = signed
+
+-- | switchTargetsToTable creates a dense jump table, usable for code generation.
+--
+-- Also returns an offset to add to the value; the list is 0-based on the
+-- result of that addition.
+--
+-- The conversion from Integer to Int is a bit of a wart, as the actual
+-- scrutinee might be an unsigned word, but it just works, due to wrap-around
+-- arithmetic (as verified by the CmmSwitchTest test case).
+switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
+switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
+    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
+  where
+    labelFor i = case M.lookup i branches of Just l -> Just l
+                                             Nothing -> mbdef
+    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
+          | otherwise                           = lo
+
+-- Note [Jump Table Offset]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Usually, the code for a jump table starting at x will first subtract x from
+-- the value, to avoid a large amount of empty entries. But if x is very small,
+-- the extra entries are no worse than the subtraction in terms of code size, and
+-- not having to do the subtraction is quicker.
+--
+-- I.e. instead of
+--     _u20N:
+--             leaq -1(%r14),%rax
+--             jmp *_n20R(,%rax,8)
+--     _n20R:
+--             .quad   _c20p
+--             .quad   _c20q
+-- do
+--     _u20N:
+--             jmp *_n20Q(,%r14,8)
+--
+--     _n20Q:
+--             .quad   0
+--             .quad   _c20p
+--             .quad   _c20q
+--             .quad   _c20r
+
+-- | The list of all labels occurring in the SwitchTargets value.
+switchTargetsToList :: SwitchTargets -> [Label]
+switchTargetsToList (SwitchTargets _ _ mbdef branches)
+    = maybeToList mbdef ++ M.elems branches
+
+-- | Groups cases with equal targets, suitable for pretty-printing to a
+-- c-like switch statement with fall-through semantics.
+switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
+switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
+  where
+    groups = map (\xs -> (map fst xs, snd (head xs))) $
+             groupBy ((==) `on` snd) $
+             M.toList branches
+
+-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
+eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
+eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
+    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
+  where
+    goMB Nothing Nothing = True
+    goMB (Just l1) (Just l2) = l1 `eq` l2
+    goMB _ _ = False
+    goList [] [] = True
+    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
+    goList _ _ = False
+
+-----------------------------------------------------------------------------
+-- Code generation for Switches
+
+
+-- | A SwitchPlan abstractly describes how a Switch statement ought to be
+-- implemented. See Note [createSwitchPlan]
+data SwitchPlan
+    = Unconditionally Label
+    | IfEqual Integer Label SwitchPlan
+    | IfLT Bool Integer SwitchPlan SwitchPlan
+    | JumpTable SwitchTargets
+  deriving Show
+--
+-- Note [createSwitchPlan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- A SwitchPlan describes how a Switch statement is to be broken down into
+-- smaller pieces suitable for code generation.
+--
+-- createSwitchPlan creates such a switch plan, in these steps:
+--  1. It splits the switch statement at segments of non-default values that
+--     are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
+--  2. Too small jump tables should be avoided, so we break up smaller pieces
+--     in breakTooSmall.
+--  3. We fill in the segments between those pieces with a jump to the default
+--     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
+--  4. We find and replace two less-than branches by a single equal-to-test in
+--     findSingleValues
+--  5. The thus collected pieces are assembled to a balanced binary tree.
+
+{-
+  Note [Two alts + default]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Discussion and a bit more info at #14644
+
+When dealing with a switch of the form:
+switch(e) {
+  case 1: goto l1;
+  case 3000: goto l2;
+  default: goto ldef;
+}
+
+If we treat it as a sparse jump table we would generate:
+
+if (e > 3000) //Check if value is outside of the jump table.
+    goto ldef;
+else {
+    if (e < 3000) { //Compare to upper value
+        if(e != 1) //Compare to remaining value
+            goto ldef;
+          else
+            goto l2;
+    }
+    else
+        goto l1;
+}
+
+Instead we special case this to :
+
+if (e==1) goto l1;
+else if (e==3000) goto l2;
+else goto l3;
+
+This means we have:
+* Less comparisons for: 1,<3000
+* Unchanged for 3000
+* One more for >3000
+
+This improves code in a few ways:
+* One comparison less means smaller code which helps with cache.
+* It exchanges a taken jump for two jumps no taken in the >range case.
+  Jumps not taken are cheaper (See Agner guides) making this about as fast.
+* For all other cases the first range check is removed making it faster.
+
+The end result is that the change is not measurably slower for the case
+>3000 and faster for the other cases.
+
+This makes running this kind of match in an inner loop cheaper by 10-20%
+depending on the data.
+In nofib this improves wheel-sieve1 by 4-9% depending on problem
+size.
+
+We could also add a second conditional jump after the comparison to
+keep the range check like this:
+    cmp 3000, rArgument
+    jg <default>
+    je <branch 2>
+While this is fairly cheap it made no big difference for the >3000 case
+and slowed down all other cases making it not worthwhile.
+-}
+
+
+-- | Does the target support switch out of the box? Then leave this to the
+-- target!
+targetSupportsSwitch :: HscTarget -> Bool
+targetSupportsSwitch HscC = True
+targetSupportsSwitch HscLlvm = True
+targetSupportsSwitch _ = False
+
+-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
+-- down into smaller pieces suitable for code generation.
+createSwitchPlan :: SwitchTargets -> SwitchPlan
+-- Lets do the common case of a singleton map quickly and efficiently (#10677)
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+    | [(x, l)] <- M.toList m
+    = IfEqual x l (Unconditionally defLabel)
+-- And another common case, matching "booleans"
+createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
+    | [(x1, l1), (_x2,l2)] <- M.toAscList m
+    --Checking If |range| = 2 is enough if we have two unique literals
+    , hi - lo == 1
+    = IfEqual x1 l1 (Unconditionally l2)
+-- See Note [Two alts + default]
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+    | [(x1, l1), (x2,l2)] <- M.toAscList m
+    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
+createSwitchPlan (SwitchTargets signed range mbdef m) =
+    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
+    plan
+  where
+    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
+    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
+    plan = buildTree signed $ flatPlan
+
+
+---
+--- Step 1: Splitting at large holes
+---
+splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
+splitAtHoles _        m | M.null m = []
+splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
+  where
+    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
+    nonHoles = reassocTuples lo holes hi
+
+    (lo,_) = M.findMin m
+    (hi,_) = M.findMax m
+
+---
+--- Step 2: Avoid small jump tables
+---
+-- We do not want jump tables below a certain size. This breaks them up
+-- (into singleton maps, for now).
+breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
+breakTooSmall m
+  | M.size m > minJumpTableSize = [m]
+  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]
+
+---
+---  Step 3: Fill in the blanks
+---
+
+-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
+-- two entries, dividing the range.
+-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
+-- the expression is < n, and plan2 otherwise.
+
+type FlatSwitchPlan = SeparatedList Integer SwitchPlan
+
+mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
+
+-- If we have no default (i.e. undefined where there is no entry), we can
+-- branch at the minimum of each map
+mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
+mkFlatSwitchPlan signed  Nothing _ (m:ms)
+  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
+
+-- If we have a default, we have to interleave segments that jump
+-- to the default between the maps
+mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
+  where
+    go (lo,hi) []
+        | lo > hi = []
+        | otherwise = [(lo, Unconditionally l)]
+    go (lo,hi) (m:ms)
+        | lo < min
+        = (lo, Unconditionally l) : go (min,hi) (m:ms)
+        | lo == min
+        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
+        | otherwise
+        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
+      where
+        min = fst (M.findMin m)
+        max = fst (M.findMax m)
+
+
+mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
+mkLeafPlan signed mbdef m
+    | [(_,l)] <- M.toList m -- singleton map
+    = Unconditionally l
+    | otherwise
+    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
+  where
+    min = fst (M.findMin m)
+    max = fst (M.findMax m)
+
+---
+---  Step 4: Reduce the number of branches using ==
+---
+
+-- A sequence of three unconditional jumps, with the outer two pointing to the
+-- same value and the bounds off by exactly one can be improved
+findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
+findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
+  | l == l3 && i + 1 == i'
+  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
+findSingleValues (p, (i,p'):xs)
+  = (p,i) `consSL` findSingleValues (p', xs)
+findSingleValues (p, [])
+  = (p, [])
+
+---
+---  Step 5: Actually build the tree
+---
+
+-- Build a balanced tree from a separated list
+buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
+buildTree _ (p,[]) = p
+buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
+  where
+    (sl1, m, sl2) = divideSL sl
+
+
+
+--
+-- Utility data type: Non-empty lists with extra markers in between each
+-- element:
+--
+
+type SeparatedList b a = (a, [(b,a)])
+
+consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
+consSL (a, b) (a', xs) = (a, (b,a'):xs)
+
+divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
+divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
+divideSL (p,xs) = ((p, xs1), m, (p', xs2))
+  where
+    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
+
+--
+-- Other Utilities
+--
+
+restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
+restrictMap (lo,hi) m = mid
+  where (_,   mid_hi) = M.split (lo-1) m
+        (mid, _) =      M.split (hi+1) mid_hi
+
+-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
+reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
+reassocTuples initial [] last
+    = [(initial,last)]
+reassocTuples initial ((a,b):tuples) last
+    = (initial,a) : reassocTuples b tuples last
+
+-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- I (Joachim) separated the two somewhat closely related modules
+--
+--  - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
+--    for implementing a Cmm switch (createSwitchPlan), and
+--  - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
+--
+-- for these reasons:
+--
+--  * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
+--    GHC specific modules at all (with the exception of Output and
+--    GHC.Cmm.Dataflow (Literal)).
+--  * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
+--    the dependency tree.
+--  * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
+--    used in GHC.Cmm.Node.
+--  * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
+--    for more parallelism when building GHC.
+--  * The interaction between the modules is very explicit and easy to
+--    understand, due to the small and simple interface.
diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs
new file mode 100644
index 0000000000..dfac116764
--- /dev/null
+++ b/compiler/GHC/Cmm/Switch/Implement.hs
@@ -0,0 +1,116 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Switch.Implement
+  ( cmmImplementSwitchPlans
+  )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import UniqSupply
+import DynFlags
+
+--
+-- This module replaces Switch statements as generated by the Stg -> Cmm
+-- transformation, which might be huge and sparse and hence unsuitable for
+-- assembly code, by proper constructs (if-then-else trees, dense jump tables).
+--
+-- The actual, abstract strategy is determined by createSwitchPlan in
+-- GHC.Cmm.Switch and returned as a SwitchPlan; here is just the implementation in
+-- terms of Cmm code. See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch.
+--
+-- This division into different modules is both to clearly separate concerns,
+-- but also because createSwitchPlan needs access to the constructors of
+-- SwitchTargets, a data type exported abstractly by GHC.Cmm.Switch.
+--
+
+-- | Traverses the 'CmmGraph', making sure that 'CmmSwitch' are suitable for
+-- code generation.
+cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph
+cmmImplementSwitchPlans dflags g
+    -- Switch generation done by backend (LLVM/C)
+    | targetSupportsSwitch (hscTarget dflags) = return g
+    | otherwise = do
+    blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g)
+    return $ ofBlockList (g_entry g) blocks'
+
+visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock]
+visitSwitches dflags block
+  | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block
+  = do
+    let plan = createSwitchPlan ids
+    -- See Note [Floating switch expressions]
+    (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr
+
+    (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan
+
+    let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail
+
+    return $ block' : newBlocks
+
+  | otherwise
+  = return [block]
+
+-- Note [Floating switch expressions]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-- When we translate a sparse switch into a search tree we would like
+-- to compute the value we compare against only once.
+
+-- For this purpose we assign the switch expression to a local register
+-- and then use this register when constructing the actual binary tree.
+
+-- This is important as the expression could contain expensive code like
+-- memory loads or divisions which we REALLY don't want to duplicate.
+
+-- This happened in parts of the handwritten RTS Cmm code. See also #16933
+
+-- See Note [Floating switch expressions]
+floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr)
+floatSwitchExpr _      reg@(CmmReg {})  = return (emptyBlock, reg)
+floatSwitchExpr dflags expr             = do
+  (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM
+  return (BMiddle assign, expr')
+
+
+-- Implementing a switch plan (returning a tail block)
+implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock])
+implementSwitchPlan dflags scope expr = go
+  where
+    go (Unconditionally l)
+      = return (emptyBlock `blockJoinTail` CmmBranch l, [])
+    go (JumpTable ids)
+      = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, [])
+    go (IfLT signed i ids1 ids2)
+      = do
+        (bid1, newBlocks1) <- go' ids1
+        (bid2, newBlocks2) <- go' ids2
+
+        let lt | signed    = cmmSLtWord
+               | otherwise = cmmULtWord
+            scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i
+            lastNode = CmmCondBranch scrut bid1 bid2 Nothing
+            lastBlock = emptyBlock `blockJoinTail` lastNode
+        return (lastBlock, newBlocks1++newBlocks2)
+    go (IfEqual i l ids2)
+      = do
+        (bid2, newBlocks2) <- go' ids2
+
+        let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i
+            lastNode = CmmCondBranch scrut bid2 l Nothing
+            lastBlock = emptyBlock `blockJoinTail` lastNode
+        return (lastBlock, newBlocks2)
+
+    -- Same but returning a label to branch to
+    go' (Unconditionally l)
+      = return (l, [])
+    go' p
+      = do
+        bid <- mkBlockId `fmap` getUniqueM
+        (last, newBlocks) <- go p
+        let block = CmmEntry bid scope `blockJoinHead` last
+        return (bid, block: newBlocks)
diff --git a/compiler/GHC/Cmm/Type.hs b/compiler/GHC/Cmm/Type.hs
new file mode 100644
index 0000000000..867a260078
--- /dev/null
+++ b/compiler/GHC/Cmm/Type.hs
@@ -0,0 +1,432 @@
+module GHC.Cmm.Type
+    ( CmmType   -- Abstract
+    , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
+    , cInt
+    , cmmBits, cmmFloat
+    , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
+    , isFloatType, isGcPtrType, isBitsType
+    , isWord32, isWord64, isFloat64, isFloat32
+
+    , Width(..)
+    , widthInBits, widthInBytes, widthInLog, widthFromBytes
+    , wordWidth, halfWordWidth, cIntWidth
+    , halfWordMask
+    , narrowU, narrowS
+    , rEP_CostCentreStack_mem_alloc
+    , rEP_CostCentreStack_scc_count
+    , rEP_StgEntCounter_allocs
+    , rEP_StgEntCounter_allocd
+
+    , ForeignHint(..)
+
+    , Length
+    , vec, vec2, vec4, vec8, vec16
+    , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
+    , cmmVec
+    , vecLength, vecElemType
+    , isVecType
+   )
+where
+
+
+import GhcPrelude
+
+import DynFlags
+import FastString
+import Outputable
+
+import Data.Word
+import Data.Int
+
+-----------------------------------------------------------------------------
+--              CmmType
+-----------------------------------------------------------------------------
+
+  -- NOTE: CmmType is an abstract type, not exported from this
+  --       module so you can easily change its representation
+  --
+  -- However Width is exported in a concrete way,
+  -- and is used extensively in pattern-matching
+
+data CmmType    -- The important one!
+  = CmmType CmmCat Width
+
+data CmmCat                -- "Category" (not exported)
+   = GcPtrCat              -- GC pointer
+   | BitsCat               -- Non-pointer
+   | FloatCat              -- Float
+   | VecCat Length CmmCat  -- Vector
+   deriving( Eq )
+        -- See Note [Signed vs unsigned] at the end
+
+instance Outputable CmmType where
+  ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
+
+instance Outputable CmmCat where
+  ppr FloatCat       = text "F"
+  ppr GcPtrCat       = text "P"
+  ppr BitsCat        = text "I"
+  ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
+
+-- Why is CmmType stratified?  For native code generation,
+-- most of the time you just want to know what sort of register
+-- to put the thing in, and for this you need to know how
+-- many bits thing has, and whether it goes in a floating-point
+-- register.  By contrast, the distinction between GcPtr and
+-- GcNonPtr is of interest to only a few parts of the code generator.
+
+-------- Equality on CmmType --------------
+-- CmmType is *not* an instance of Eq; sometimes we care about the
+-- Gc/NonGc distinction, and sometimes we don't
+-- So we use an explicit function to force you to think about it
+cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
+cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
+
+cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
+  -- This equality is temporary; used in CmmLint
+  -- but the RTS files are not yet well-typed wrt pointers
+cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
+   = c1 `weak_eq` c2 && w1==w2
+   where
+     weak_eq :: CmmCat -> CmmCat -> Bool
+     FloatCat         `weak_eq` FloatCat         = True
+     FloatCat         `weak_eq` _other           = False
+     _other           `weak_eq` FloatCat         = False
+     (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
+                                                   && cat1 `weak_eq` cat2
+     (VecCat {})      `weak_eq` _other           = False
+     _other           `weak_eq` (VecCat {})      = False
+     _word1           `weak_eq` _word2           = True        -- Ignores GcPtr
+
+--- Simple operations on CmmType -----
+typeWidth :: CmmType -> Width
+typeWidth (CmmType _ w) = w
+
+cmmBits, cmmFloat :: Width -> CmmType
+cmmBits  = CmmType BitsCat
+cmmFloat = CmmType FloatCat
+
+-------- Common CmmTypes ------------
+-- Floats and words of specific widths
+b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
+b8     = cmmBits W8
+b16    = cmmBits W16
+b32    = cmmBits W32
+b64    = cmmBits W64
+b128   = cmmBits W128
+b256   = cmmBits W256
+b512   = cmmBits W512
+f32    = cmmFloat W32
+f64    = cmmFloat W64
+
+-- CmmTypes of native word widths
+bWord :: DynFlags -> CmmType
+bWord dflags = cmmBits (wordWidth dflags)
+
+bHalfWord :: DynFlags -> CmmType
+bHalfWord dflags = cmmBits (halfWordWidth dflags)
+
+gcWord :: DynFlags -> CmmType
+gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
+
+cInt :: DynFlags -> CmmType
+cInt dflags = cmmBits (cIntWidth  dflags)
+
+------------ Predicates ----------------
+isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
+isFloatType (CmmType FloatCat    _) = True
+isFloatType _other                  = False
+
+isGcPtrType (CmmType GcPtrCat _) = True
+isGcPtrType _other               = False
+
+isBitsType (CmmType BitsCat _) = True
+isBitsType _                   = False
+
+isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
+-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
+-- isFloat32 and 64 are obvious
+
+isWord64 (CmmType BitsCat  W64) = True
+isWord64 (CmmType GcPtrCat W64) = True
+isWord64 _other                 = False
+
+isWord32 (CmmType BitsCat  W32) = True
+isWord32 (CmmType GcPtrCat W32) = True
+isWord32 _other                 = False
+
+isFloat32 (CmmType FloatCat W32) = True
+isFloat32 _other                 = False
+
+isFloat64 (CmmType FloatCat W64) = True
+isFloat64 _other                 = False
+
+-----------------------------------------------------------------------------
+--              Width
+-----------------------------------------------------------------------------
+
+data Width   = W8 | W16 | W32 | W64
+             | W128
+             | W256
+             | W512
+             deriving (Eq, Ord, Show)
+
+instance Outputable Width where
+   ppr rep = ptext (mrStr rep)
+
+mrStr :: Width -> PtrString
+mrStr = sLit . show
+
+
+-------- Common Widths  ------------
+wordWidth :: DynFlags -> Width
+wordWidth dflags
+ | wORD_SIZE dflags == 4 = W32
+ | wORD_SIZE dflags == 8 = W64
+ | otherwise             = panic "MachOp.wordRep: Unknown word size"
+
+halfWordWidth :: DynFlags -> Width
+halfWordWidth dflags
+ | wORD_SIZE dflags == 4 = W16
+ | wORD_SIZE dflags == 8 = W32
+ | otherwise             = panic "MachOp.halfWordRep: Unknown word size"
+
+halfWordMask :: DynFlags -> Integer
+halfWordMask dflags
+ | wORD_SIZE dflags == 4 = 0xFFFF
+ | wORD_SIZE dflags == 8 = 0xFFFFFFFF
+ | otherwise             = panic "MachOp.halfWordMask: Unknown word size"
+
+-- cIntRep is the Width for a C-language 'int'
+cIntWidth :: DynFlags -> Width
+cIntWidth dflags = case cINT_SIZE dflags of
+                   4 -> W32
+                   8 -> W64
+                   s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
+
+widthInBits :: Width -> Int
+widthInBits W8   = 8
+widthInBits W16  = 16
+widthInBits W32  = 32
+widthInBits W64  = 64
+widthInBits W128 = 128
+widthInBits W256 = 256
+widthInBits W512 = 512
+
+
+widthInBytes :: Width -> Int
+widthInBytes W8   = 1
+widthInBytes W16  = 2
+widthInBytes W32  = 4
+widthInBytes W64  = 8
+widthInBytes W128 = 16
+widthInBytes W256 = 32
+widthInBytes W512 = 64
+
+
+widthFromBytes :: Int -> Width
+widthFromBytes 1  = W8
+widthFromBytes 2  = W16
+widthFromBytes 4  = W32
+widthFromBytes 8  = W64
+widthFromBytes 16 = W128
+widthFromBytes 32 = W256
+widthFromBytes 64 = W512
+
+widthFromBytes n  = pprPanic "no width for given number of bytes" (ppr n)
+
+-- log_2 of the width in bytes, useful for generating shifts.
+widthInLog :: Width -> Int
+widthInLog W8   = 0
+widthInLog W16  = 1
+widthInLog W32  = 2
+widthInLog W64  = 3
+widthInLog W128 = 4
+widthInLog W256 = 5
+widthInLog W512 = 6
+
+
+-- widening / narrowing
+
+narrowU :: Width -> Integer -> Integer
+narrowU W8  x = fromIntegral (fromIntegral x :: Word8)
+narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
+narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
+narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
+narrowU _ _ = panic "narrowTo"
+
+narrowS :: Width -> Integer -> Integer
+narrowS W8  x = fromIntegral (fromIntegral x :: Int8)
+narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
+narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
+narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
+narrowS _ _ = panic "narrowTo"
+
+-----------------------------------------------------------------------------
+--              SIMD
+-----------------------------------------------------------------------------
+
+type Length = Int
+
+vec :: Length -> CmmType -> CmmType
+vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
+  where
+    vecw :: Width
+    vecw = widthFromBytes (l*widthInBytes w)
+
+vec2, vec4, vec8, vec16 :: CmmType -> CmmType
+vec2  = vec 2
+vec4  = vec 4
+vec8  = vec 8
+vec16 = vec 16
+
+vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
+vec2f64 = vec 2 f64
+vec2b64 = vec 2 b64
+vec4f32 = vec 4 f32
+vec4b32 = vec 4 b32
+vec8b16 = vec 8 b16
+vec16b8 = vec 16 b8
+
+cmmVec :: Int -> CmmType -> CmmType
+cmmVec n (CmmType cat w) =
+    CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
+
+vecLength :: CmmType -> Length
+vecLength (CmmType (VecCat l _) _) = l
+vecLength _                        = panic "vecLength: not a vector"
+
+vecElemType :: CmmType -> CmmType
+vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
+  where
+    scalw :: Width
+    scalw = widthFromBytes (widthInBytes w `div` l)
+vecElemType _ = panic "vecElemType: not a vector"
+
+isVecType :: CmmType -> Bool
+isVecType (CmmType (VecCat {}) _) = True
+isVecType _                       = False
+
+-------------------------------------------------------------------------
+-- Hints
+
+-- Hints are extra type information we attach to the arguments and
+-- results of a foreign call, where more type information is sometimes
+-- needed by the ABI to make the correct kind of call.
+
+data ForeignHint
+  = NoHint | AddrHint | SignedHint
+  deriving( Eq )
+        -- Used to give extra per-argument or per-result
+        -- information needed by foreign calling conventions
+
+-------------------------------------------------------------------------
+
+-- These don't really belong here, but I don't know where is best to
+-- put them.
+
+rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
+rEP_CostCentreStack_mem_alloc dflags
+    = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
+    where pc = platformConstants dflags
+
+rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
+rEP_CostCentreStack_scc_count dflags
+    = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
+    where pc = platformConstants dflags
+
+rEP_StgEntCounter_allocs :: DynFlags -> CmmType
+rEP_StgEntCounter_allocs dflags
+    = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
+    where pc = platformConstants dflags
+
+rEP_StgEntCounter_allocd :: DynFlags -> CmmType
+rEP_StgEntCounter_allocd dflags
+    = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
+    where pc = platformConstants dflags
+
+-------------------------------------------------------------------------
+{-      Note [Signed vs unsigned]
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+Should a CmmType include a signed vs. unsigned distinction?
+
+This is very much like a "hint" in C-- terminology: it isn't necessary
+in order to generate correct code, but it might be useful in that the
+compiler can generate better code if it has access to higher-level
+hints about data.  This is important at call boundaries, because the
+definition of a function is not visible at all of its call sites, so
+the compiler cannot infer the hints.
+
+Here in Cmm, we're taking a slightly different approach.  We include
+the int vs. float hint in the CmmType, because (a) the majority of
+platforms have a strong distinction between float and int registers,
+and (b) we don't want to do any heavyweight hint-inference in the
+native code backend in order to get good code.  We're treating the
+hint more like a type: our Cmm is always completely consistent with
+respect to hints.  All coercions between float and int are explicit.
+
+What about the signed vs. unsigned hint?  This information might be
+useful if we want to keep sub-word-sized values in word-size
+registers, which we must do if we only have word-sized registers.
+
+On such a system, there are two straightforward conventions for
+representing sub-word-sized values:
+
+(a) Leave the upper bits undefined.  Comparison operations must
+    sign- or zero-extend both operands before comparing them,
+    depending on whether the comparison is signed or unsigned.
+
+(b) Always keep the values sign- or zero-extended as appropriate.
+    Arithmetic operations must narrow the result to the appropriate
+    size.
+
+A clever compiler might not use either (a) or (b) exclusively, instead
+it would attempt to minimize the coercions by analysis: the same kind
+of analysis that propagates hints around.  In Cmm we don't want to
+have to do this, so we plump for having richer types and keeping the
+type information consistent.
+
+If signed/unsigned hints are missing from CmmType, then the only
+choice we have is (a), because we don't know whether the result of an
+operation should be sign- or zero-extended.
+
+Many architectures have extending load operations, which work well
+with (b).  To make use of them with (a), you need to know whether the
+value is going to be sign- or zero-extended by an enclosing comparison
+(for example), which involves knowing above the context.  This is
+doable but more complex.
+
+Further complicating the issue is foreign calls: a foreign calling
+convention can specify that signed 8-bit quantities are passed as
+sign-extended 32 bit quantities, for example (this is the case on the
+PowerPC).  So we *do* need sign information on foreign call arguments.
+
+Pros for adding signed vs. unsigned to CmmType:
+
+  - It would let us use convention (b) above, and get easier
+    code generation for extending loads.
+
+  - Less information required on foreign calls.
+
+  - MachOp type would be simpler
+
+Cons:
+
+  - More complexity
+
+  - What is the CmmType for a VanillaReg?  Currently it is
+    always wordRep, but now we have to decide whether it is
+    signed or unsigned.  The same VanillaReg can thus have
+    different CmmType in different parts of the program.
+
+  - Extra coercions cluttering up expressions.
+
+Currently for GHC, the foreign call point is moot, because we do our
+own promotion of sub-word-sized values to word-sized values.  The Int8
+type is represented by an Int# which is kept sign-extended at all times
+(this is slightly naughty, because we're making assumptions about the
+C calling convention rather early on in the compiler).  However, given
+this, the cons outweigh the pros.
+
+-}
+
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
new file mode 100644
index 0000000000..d879c7b82f
--- /dev/null
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -0,0 +1,607 @@
+{-# LANGUAGE GADTs, RankNTypes #-}
+{-# LANGUAGE BangPatterns #-}
+
+-----------------------------------------------------------------------------
+--
+-- Cmm utilities.
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.Utils(
+        -- CmmType
+        primRepCmmType, slotCmmType, slotForeignHint,
+        typeCmmType, typeForeignHint, primRepForeignHint,
+
+        -- CmmLit
+        zeroCLit, mkIntCLit,
+        mkWordCLit, packHalfWordsCLit,
+        mkByteStringCLit,
+        mkDataLits, mkRODataLits,
+        mkStgWordCLit,
+
+        -- CmmExpr
+        mkIntExpr, zeroExpr,
+        mkLblExpr,
+        cmmRegOff,  cmmOffset,  cmmLabelOff,  cmmOffsetLit,  cmmOffsetExpr,
+        cmmRegOffB, cmmOffsetB, cmmLabelOffB, cmmOffsetLitB, cmmOffsetExprB,
+        cmmRegOffW, cmmOffsetW, cmmLabelOffW, cmmOffsetLitW, cmmOffsetExprW,
+        cmmIndex, cmmIndexExpr, cmmLoadIndex, cmmLoadIndexW,
+        cmmNegate,
+        cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
+        cmmSLtWord,
+        cmmNeWord, cmmEqWord,
+        cmmOrWord, cmmAndWord,
+        cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord,
+        cmmToWord,
+
+        cmmMkAssign,
+
+        isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr,
+
+        baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr,
+        currentTSOExpr, currentNurseryExpr, cccsExpr,
+
+        -- Statics
+        blankWord,
+
+        -- Tagging
+        cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
+        cmmConstrTag1,
+
+        -- Overlap and usage
+        regsOverlap, regUsedIn,
+
+        -- Liveness and bitmaps
+        mkLiveness,
+
+        -- * Operations that probably don't belong here
+        modifyGraph,
+
+        ofBlockMap, toBlockMap,
+        ofBlockList, toBlockList, bodyToBlockList,
+        toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough,
+        foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1,
+
+        -- * Ticks
+        blockTicks
+  ) where
+
+import GhcPrelude
+
+import TyCon    ( PrimRep(..), PrimElemRep(..) )
+import GHC.Types.RepType  ( UnaryType, SlotTy (..), typePrimRep1 )
+
+import GHC.Runtime.Layout
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import Outputable
+import DynFlags
+import Unique
+import GHC.Platform.Regs
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import Data.Bits
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+
+---------------------------------------------------
+--
+--      CmmTypes
+--
+---------------------------------------------------
+
+primRepCmmType :: DynFlags -> PrimRep -> CmmType
+primRepCmmType _      VoidRep          = panic "primRepCmmType:VoidRep"
+primRepCmmType dflags LiftedRep        = gcWord dflags
+primRepCmmType dflags UnliftedRep      = gcWord dflags
+primRepCmmType dflags IntRep           = bWord dflags
+primRepCmmType dflags WordRep          = bWord dflags
+primRepCmmType _      Int8Rep          = b8
+primRepCmmType _      Word8Rep         = b8
+primRepCmmType _      Int16Rep         = b16
+primRepCmmType _      Word16Rep        = b16
+primRepCmmType _      Int32Rep         = b32
+primRepCmmType _      Word32Rep        = b32
+primRepCmmType _      Int64Rep         = b64
+primRepCmmType _      Word64Rep        = b64
+primRepCmmType dflags AddrRep          = bWord dflags
+primRepCmmType _      FloatRep         = f32
+primRepCmmType _      DoubleRep        = f64
+primRepCmmType _      (VecRep len rep) = vec len (primElemRepCmmType rep)
+
+slotCmmType :: DynFlags -> SlotTy -> CmmType
+slotCmmType dflags PtrSlot    = gcWord dflags
+slotCmmType dflags WordSlot   = bWord dflags
+slotCmmType _      Word64Slot = b64
+slotCmmType _      FloatSlot  = f32
+slotCmmType _      DoubleSlot = f64
+
+primElemRepCmmType :: PrimElemRep -> CmmType
+primElemRepCmmType Int8ElemRep   = b8
+primElemRepCmmType Int16ElemRep  = b16
+primElemRepCmmType Int32ElemRep  = b32
+primElemRepCmmType Int64ElemRep  = b64
+primElemRepCmmType Word8ElemRep  = b8
+primElemRepCmmType Word16ElemRep = b16
+primElemRepCmmType Word32ElemRep = b32
+primElemRepCmmType Word64ElemRep = b64
+primElemRepCmmType FloatElemRep  = f32
+primElemRepCmmType DoubleElemRep = f64
+
+typeCmmType :: DynFlags -> UnaryType -> CmmType
+typeCmmType dflags ty = primRepCmmType dflags (typePrimRep1 ty)
+
+primRepForeignHint :: PrimRep -> ForeignHint
+primRepForeignHint VoidRep      = panic "primRepForeignHint:VoidRep"
+primRepForeignHint LiftedRep    = AddrHint
+primRepForeignHint UnliftedRep  = AddrHint
+primRepForeignHint IntRep       = SignedHint
+primRepForeignHint Int8Rep      = SignedHint
+primRepForeignHint Int16Rep     = SignedHint
+primRepForeignHint Int32Rep     = SignedHint
+primRepForeignHint Int64Rep     = SignedHint
+primRepForeignHint WordRep      = NoHint
+primRepForeignHint Word8Rep     = NoHint
+primRepForeignHint Word16Rep    = NoHint
+primRepForeignHint Word32Rep    = NoHint
+primRepForeignHint Word64Rep    = NoHint
+primRepForeignHint AddrRep      = AddrHint -- NB! AddrHint, but NonPtrArg
+primRepForeignHint FloatRep     = NoHint
+primRepForeignHint DoubleRep    = NoHint
+primRepForeignHint (VecRep {})  = NoHint
+
+slotForeignHint :: SlotTy -> ForeignHint
+slotForeignHint PtrSlot       = AddrHint
+slotForeignHint WordSlot      = NoHint
+slotForeignHint Word64Slot    = NoHint
+slotForeignHint FloatSlot     = NoHint
+slotForeignHint DoubleSlot    = NoHint
+
+typeForeignHint :: UnaryType -> ForeignHint
+typeForeignHint = primRepForeignHint . typePrimRep1
+
+---------------------------------------------------
+--
+--      CmmLit
+--
+---------------------------------------------------
+
+-- XXX: should really be Integer, since Int doesn't necessarily cover
+-- the full range of target Ints.
+mkIntCLit :: DynFlags -> Int -> CmmLit
+mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags)
+
+mkIntExpr :: DynFlags -> Int -> CmmExpr
+mkIntExpr dflags i = CmmLit $! mkIntCLit dflags i
+
+zeroCLit :: DynFlags -> CmmLit
+zeroCLit dflags = CmmInt 0 (wordWidth dflags)
+
+zeroExpr :: DynFlags -> CmmExpr
+zeroExpr dflags = CmmLit (zeroCLit dflags)
+
+mkWordCLit :: DynFlags -> Integer -> CmmLit
+mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
+
+mkByteStringCLit
+  :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt)
+-- We have to make a top-level decl for the string,
+-- and return a literal pointing to it
+mkByteStringCLit lbl bytes
+  = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
+  where
+    -- This can not happen for String literals (as there \NUL is replaced by
+    -- C0 80). However, it can happen with Addr# literals.
+    sec = if 0 `BS.elem` bytes then ReadOnlyData else CString
+
+mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+-- Build a data-segment data block
+mkDataLits section lbl lits
+  = CmmData section (Statics lbl $ map CmmStaticLit lits)
+
+mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+-- Build a read-only data block
+mkRODataLits lbl lits
+  = mkDataLits section lbl lits
+  where
+    section | any needsRelocation lits = Section RelocatableReadOnlyData lbl
+            | otherwise                = Section ReadOnlyData lbl
+    needsRelocation (CmmLabel _)      = True
+    needsRelocation (CmmLabelOff _ _) = True
+    needsRelocation _                 = False
+
+mkStgWordCLit :: DynFlags -> StgWord -> CmmLit
+mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags)
+
+packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit
+-- Make a single word literal in which the lower_half_word is
+-- at the lower address, and the upper_half_word is at the
+-- higher address
+-- ToDo: consider using half-word lits instead
+--       but be careful: that's vulnerable when reversed
+packHalfWordsCLit dflags lower_half_word upper_half_word
+   = if wORDS_BIGENDIAN dflags
+     then mkWordCLit dflags ((l `shiftL` halfWordSizeInBits dflags) .|. u)
+     else mkWordCLit dflags (l .|. (u `shiftL` halfWordSizeInBits dflags))
+    where l = fromStgHalfWord lower_half_word
+          u = fromStgHalfWord upper_half_word
+
+---------------------------------------------------
+--
+--      CmmExpr
+--
+---------------------------------------------------
+
+mkLblExpr :: CLabel -> CmmExpr
+mkLblExpr lbl = CmmLit (CmmLabel lbl)
+
+cmmOffsetExpr :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+-- assumes base and offset have the same CmmType
+cmmOffsetExpr dflags e (CmmLit (CmmInt n _)) = cmmOffset dflags e (fromInteger n)
+cmmOffsetExpr dflags e byte_off = CmmMachOp (MO_Add (cmmExprWidth dflags e)) [e, byte_off]
+
+cmmOffset :: DynFlags -> CmmExpr -> Int -> CmmExpr
+cmmOffset _ e                 0        = e
+cmmOffset _ (CmmReg reg)      byte_off = cmmRegOff reg byte_off
+cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off)
+cmmOffset _ (CmmLit lit)      byte_off = CmmLit (cmmOffsetLit lit byte_off)
+cmmOffset _ (CmmStackSlot area off) byte_off
+  = CmmStackSlot area (off - byte_off)
+  -- note stack area offsets increase towards lower addresses
+cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2
+  = CmmMachOp (MO_Add rep)
+              [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)]
+cmmOffset dflags expr byte_off
+  = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)]
+  where
+    width = cmmExprWidth dflags expr
+
+-- Smart constructor for CmmRegOff.  Same caveats as cmmOffset above.
+cmmRegOff :: CmmReg -> Int -> CmmExpr
+cmmRegOff reg 0        = CmmReg reg
+cmmRegOff reg byte_off = CmmRegOff reg byte_off
+
+cmmOffsetLit :: CmmLit -> Int -> CmmLit
+cmmOffsetLit (CmmLabel l)      byte_off = cmmLabelOff l byte_off
+cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off)
+cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off
+                                        = CmmLabelDiffOff l1 l2 (m+byte_off) w
+cmmOffsetLit (CmmInt m rep)    byte_off = CmmInt (m + fromIntegral byte_off) rep
+cmmOffsetLit _                 byte_off = pprPanic "cmmOffsetLit" (ppr byte_off)
+
+cmmLabelOff :: CLabel -> Int -> CmmLit
+-- Smart constructor for CmmLabelOff
+cmmLabelOff lbl 0        = CmmLabel lbl
+cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off
+
+-- | Useful for creating an index into an array, with a statically known offset.
+-- The type is the element type; used for making the multiplier
+cmmIndex :: DynFlags
+         -> Width       -- Width w
+         -> CmmExpr     -- Address of vector of items of width w
+         -> Int         -- Which element of the vector (0 based)
+         -> CmmExpr     -- Address of i'th element
+cmmIndex dflags width base idx = cmmOffset dflags base (idx * widthInBytes width)
+
+-- | Useful for creating an index into an array, with an unknown offset.
+cmmIndexExpr :: DynFlags
+             -> Width           -- Width w
+             -> CmmExpr         -- Address of vector of items of width w
+             -> CmmExpr         -- Which element of the vector (0 based)
+             -> CmmExpr         -- Address of i'th element
+cmmIndexExpr dflags width base (CmmLit (CmmInt n _)) = cmmIndex dflags width base (fromInteger n)
+cmmIndexExpr dflags width base idx =
+  cmmOffsetExpr dflags base byte_off
+  where
+    idx_w = cmmExprWidth dflags idx
+    byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr dflags (widthInLog width)]
+
+cmmLoadIndex :: DynFlags -> CmmType -> CmmExpr -> Int -> CmmExpr
+cmmLoadIndex dflags ty expr ix = CmmLoad (cmmIndex dflags (typeWidth ty) expr ix) ty
+
+-- The "B" variants take byte offsets
+cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
+cmmRegOffB = cmmRegOff
+
+cmmOffsetB :: DynFlags -> CmmExpr -> ByteOff -> CmmExpr
+cmmOffsetB = cmmOffset
+
+cmmOffsetExprB :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+cmmOffsetExprB = cmmOffsetExpr
+
+cmmLabelOffB :: CLabel -> ByteOff -> CmmLit
+cmmLabelOffB = cmmLabelOff
+
+cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit
+cmmOffsetLitB = cmmOffsetLit
+
+-----------------------
+-- The "W" variants take word offsets
+
+cmmOffsetExprW :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+-- The second arg is a *word* offset; need to change it to bytes
+cmmOffsetExprW dflags  e (CmmLit (CmmInt n _)) = cmmOffsetW dflags e (fromInteger n)
+cmmOffsetExprW dflags e wd_off = cmmIndexExpr dflags (wordWidth dflags) e wd_off
+
+cmmOffsetW :: DynFlags -> CmmExpr -> WordOff -> CmmExpr
+cmmOffsetW dflags e n = cmmOffsetB dflags e (wordsToBytes dflags n)
+
+cmmRegOffW :: DynFlags -> CmmReg -> WordOff -> CmmExpr
+cmmRegOffW dflags reg wd_off = cmmRegOffB reg (wordsToBytes dflags wd_off)
+
+cmmOffsetLitW :: DynFlags -> CmmLit -> WordOff -> CmmLit
+cmmOffsetLitW dflags lit wd_off = cmmOffsetLitB lit (wordsToBytes dflags wd_off)
+
+cmmLabelOffW :: DynFlags -> CLabel -> WordOff -> CmmLit
+cmmLabelOffW dflags lbl wd_off = cmmLabelOffB lbl (wordsToBytes dflags wd_off)
+
+cmmLoadIndexW :: DynFlags -> CmmExpr -> Int -> CmmType -> CmmExpr
+cmmLoadIndexW dflags base off ty = CmmLoad (cmmOffsetW dflags base off) ty
+
+-----------------------
+cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
+  cmmSLtWord,
+  cmmNeWord, cmmEqWord,
+  cmmOrWord, cmmAndWord,
+  cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord
+  :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+cmmOrWord dflags  e1 e2 = CmmMachOp (mo_wordOr dflags)  [e1, e2]
+cmmAndWord dflags e1 e2 = CmmMachOp (mo_wordAnd dflags) [e1, e2]
+cmmNeWord dflags  e1 e2 = CmmMachOp (mo_wordNe dflags)  [e1, e2]
+cmmEqWord dflags  e1 e2 = CmmMachOp (mo_wordEq dflags)  [e1, e2]
+cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2]
+cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2]
+cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2]
+cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2]
+cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2]
+cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2]
+cmmSubWord dflags e1 e2 = CmmMachOp (mo_wordSub dflags) [e1, e2]
+cmmMulWord dflags e1 e2 = CmmMachOp (mo_wordMul dflags) [e1, e2]
+cmmQuotWord dflags e1 e2 = CmmMachOp (mo_wordUQuot dflags) [e1, e2]
+
+cmmNegate :: DynFlags -> CmmExpr -> CmmExpr
+cmmNegate _      (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
+cmmNegate dflags e                       = CmmMachOp (MO_S_Neg (cmmExprWidth dflags e)) [e]
+
+blankWord :: DynFlags -> CmmStatic
+blankWord dflags = CmmUninitialised (wORD_SIZE dflags)
+
+cmmToWord :: DynFlags -> CmmExpr -> CmmExpr
+cmmToWord dflags e
+  | w == word  = e
+  | otherwise  = CmmMachOp (MO_UU_Conv w word) [e]
+  where
+    w = cmmExprWidth dflags e
+    word = wordWidth dflags
+
+cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr)
+cmmMkAssign dflags expr uq =
+  let !ty = cmmExprType dflags expr
+      reg = (CmmLocal (LocalReg uq ty))
+  in  (CmmAssign reg expr, CmmReg reg)
+
+
+---------------------------------------------------
+--
+--      CmmExpr predicates
+--
+---------------------------------------------------
+
+isTrivialCmmExpr :: CmmExpr -> Bool
+isTrivialCmmExpr (CmmLoad _ _)      = False
+isTrivialCmmExpr (CmmMachOp _ _)    = False
+isTrivialCmmExpr (CmmLit _)         = True
+isTrivialCmmExpr (CmmReg _)         = True
+isTrivialCmmExpr (CmmRegOff _ _)    = True
+isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot"
+
+hasNoGlobalRegs :: CmmExpr -> Bool
+hasNoGlobalRegs (CmmLoad e _)              = hasNoGlobalRegs e
+hasNoGlobalRegs (CmmMachOp _ es)           = all hasNoGlobalRegs es
+hasNoGlobalRegs (CmmLit _)                 = True
+hasNoGlobalRegs (CmmReg (CmmLocal _))      = True
+hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True
+hasNoGlobalRegs _ = False
+
+isLit :: CmmExpr -> Bool
+isLit (CmmLit _) = True
+isLit _          = False
+
+isComparisonExpr :: CmmExpr -> Bool
+isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
+isComparisonExpr _                  = False
+
+---------------------------------------------------
+--
+--      Tagging
+--
+---------------------------------------------------
+
+-- Tag bits mask
+cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr
+cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags)
+cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags))
+
+-- Used to untag a possibly tagged pointer
+-- A static label need not be untagged
+cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
+cmmUntag _ e@(CmmLit (CmmLabel _)) = e
+-- Default case
+cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags)
+
+-- Test if a closure pointer is untagged
+cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags)
+
+-- Get constructor tag, but one based.
+cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
+
+
+-----------------------------------------------------------------------------
+-- Overlap and usage
+
+-- | Returns True if the two STG registers overlap on the specified
+-- platform, in the sense that writing to one will clobber the
+-- other. This includes the case that the two registers are the same
+-- STG register. See Note [Overlapping global registers] for details.
+regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
+regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
+  | Just real  <- globalRegMaybe (targetPlatform dflags) g,
+    Just real' <- globalRegMaybe (targetPlatform dflags) g',
+    real == real'
+    = True
+regsOverlap _ reg reg' = reg == reg'
+
+-- | Returns True if the STG register is used by the expression, in
+-- the sense that a store to the register might affect the value of
+-- the expression.
+--
+-- We must check for overlapping registers and not just equal
+-- registers here, otherwise CmmSink may incorrectly reorder
+-- assignments that conflict due to overlap. See #10521 and Note
+-- [Overlapping global registers].
+regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
+regUsedIn dflags = regUsedIn_ where
+  _   `regUsedIn_` CmmLit _         = False
+  reg `regUsedIn_` CmmLoad e  _     = reg `regUsedIn_` e
+  reg `regUsedIn_` CmmReg reg'      = regsOverlap dflags reg reg'
+  reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
+  reg `regUsedIn_` CmmMachOp _ es   = any (reg `regUsedIn_`) es
+  _   `regUsedIn_` CmmStackSlot _ _ = False
+
+--------------------------------------------
+--
+--        mkLiveness
+--
+---------------------------------------------
+
+mkLiveness :: DynFlags -> [LocalReg] -> Liveness
+mkLiveness _      [] = []
+mkLiveness dflags (reg:regs)
+  = bits ++ mkLiveness dflags regs
+  where
+    sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1)
+            `quot` wORD_SIZE dflags
+            -- number of words, rounded up
+    bits = replicate sizeW is_non_ptr -- True <=> Non Ptr
+
+    is_non_ptr = not $ isGcPtrType (localRegType reg)
+
+
+-- ============================================== -
+-- ============================================== -
+-- ============================================== -
+
+---------------------------------------------------
+--
+--      Manipulating CmmGraphs
+--
+---------------------------------------------------
+
+modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
+modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
+
+toBlockMap :: CmmGraph -> LabelMap CmmBlock
+toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
+
+ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
+ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
+
+toBlockList :: CmmGraph -> [CmmBlock]
+toBlockList g = mapElems $ toBlockMap g
+
+-- | like 'toBlockList', but the entry block always comes first
+toBlockListEntryFirst :: CmmGraph -> [CmmBlock]
+toBlockListEntryFirst g
+  | mapNull m  = []
+  | otherwise  = entry_block : others
+  where
+    m = toBlockMap g
+    entry_id = g_entry g
+    Just entry_block = mapLookup entry_id m
+    others = filter ((/= entry_id) . entryLabel) (mapElems m)
+
+-- | Like 'toBlockListEntryFirst', but we strive to ensure that we order blocks
+-- so that the false case of a conditional jumps to the next block in the output
+-- list of blocks. This matches the way OldCmm blocks were output since in
+-- OldCmm the false case was a fallthrough, whereas in Cmm conditional branches
+-- have both true and false successors. Block ordering can make a big difference
+-- in performance in the LLVM backend. Note that we rely crucially on the order
+-- of successors returned for CmmCondBranch by the NonLocal instance for CmmNode
+-- defined in cmm/CmmNode.hs. -GBM
+toBlockListEntryFirstFalseFallthrough :: CmmGraph -> [CmmBlock]
+toBlockListEntryFirstFalseFallthrough g
+  | mapNull m  = []
+  | otherwise  = dfs setEmpty [entry_block]
+  where
+    m = toBlockMap g
+    entry_id = g_entry g
+    Just entry_block = mapLookup entry_id m
+
+    dfs :: LabelSet -> [CmmBlock] -> [CmmBlock]
+    dfs _ [] = []
+    dfs visited (block:bs)
+      | id `setMember` visited = dfs visited bs
+      | otherwise              = block : dfs (setInsert id visited) bs'
+      where id = entryLabel block
+            bs' = foldr add_id bs (successors block)
+            add_id id bs = case mapLookup id m of
+                              Just b  -> b : bs
+                              Nothing -> bs
+
+ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
+ofBlockList entry blocks = CmmGraph { g_entry = entry
+                                    , g_graph = GMany NothingO body NothingO }
+  where body = foldr addBlock emptyBody blocks
+
+bodyToBlockList :: Body CmmNode -> [CmmBlock]
+bodyToBlockList body = mapElems body
+
+mapGraphNodes :: ( CmmNode C O -> CmmNode C O
+                 , CmmNode O O -> CmmNode O O
+                 , CmmNode O C -> CmmNode O C)
+              -> CmmGraph -> CmmGraph
+mapGraphNodes funs@(mf,_,_) g =
+  ofBlockMap (entryLabel $ mf $ CmmEntry (g_entry g) GlobalScope) $
+  mapMap (mapBlock3' funs) $ toBlockMap g
+
+mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph
+mapGraphNodes1 f = modifyGraph (mapGraph f)
+
+
+foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a
+foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g
+
+revPostorder :: CmmGraph -> [CmmBlock]
+revPostorder g = {-# SCC "revPostorder" #-}
+    revPostorderFrom (toBlockMap g) (g_entry g)
+
+-------------------------------------------------
+-- Tick utilities
+
+-- | Extract all tick annotations from the given block
+blockTicks :: Block CmmNode C C -> [CmmTickish]
+blockTicks b = reverse $ foldBlockNodesF goStmt b []
+  where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish]
+        goStmt  (CmmTick t) ts = t:ts
+        goStmt  _other      ts = ts
+
+
+-- -----------------------------------------------------------------------------
+-- Access to common global registers
+
+baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr,
+  spLimExpr, hpLimExpr, cccsExpr :: CmmExpr
+baseExpr = CmmReg baseReg
+spExpr = CmmReg spReg
+spLimExpr = CmmReg spLimReg
+hpExpr = CmmReg hpReg
+hpLimExpr = CmmReg hpLimReg
+currentTSOExpr = CmmReg currentTSOReg
+currentNurseryExpr = CmmReg currentNurseryReg
+cccsExpr = CmmReg cccsReg
diff --git a/compiler/GHC/Cmm/cmm-notes b/compiler/GHC/Cmm/cmm-notes
new file mode 100644
index 0000000000..d664a195b7
--- /dev/null
+++ b/compiler/GHC/Cmm/cmm-notes
@@ -0,0 +1,184 @@
+More notes (Aug 11)
+~~~~~~~~~~~~~~~~~~
+* CmmInfo.cmmToRawCmm expands info tables to their representations
+  (needed for .cmm files as well as the code generators)
+
+* Why is FCode a lazy monad?  That makes it inefficient.
+  We want laziness to get code out one procedure at a time,
+  but not at the instruction level.
+  UPDATE (31/5/2016): FCode is strict since 09afcc9b.
+
+Things we did
+  * Remove CmmCvt.graphToZgraph (Conversion from old to new Cmm reps)
+  * Remove HscMain.optionallyConvertAndOrCPS (converted old Cmm to
+    new, ran pipeline, and converted back)
+  * Remove CmmDecl. Put its types in Cmm.  Import Cmm into OldCmm
+    so it can get those types.
+
+
+More notes (June 11)
+~~~~~~~~~~~~~~~~~~~~
+
+* In CmmContFlowOpt.branchChainElim, can a single block be the
+  successor of two calls?
+
+* Check in ClosureInfo:
+     -- NB: Results here should line up with the results of SMRep.rtsClosureType
+
+More notes (May 11)
+~~~~~~~~~~~~~~~~~~~
+In CmmNode, consider splitting CmmCall into two: call and jump
+
+Notes on new codegen (Aug 10)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Things to do:
+ - Proc points pass all arguments on the stack, adding more code and
+   slowing down things a lot. We either need to fix this or even better
+   would be to get rid of proc points.
+
+ - Sort out Label, LabelMap, LabelSet versus BlockId, BlockEnv, BlockSet
+   dichotomy. Mostly this means global replace, but we also need to make
+   Label an instance of Outputable (probably in the Outputable module).
+
+   EZY: We should use Label, since that's the terminology Hoopl uses.
+
+ - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
+   EZY (2011-04-16): The mini-inliner has been generalized and ported,
+   but the constant folding and other optimizations need to still be
+   ported.
+
+ - AsmCodeGen has post-native-cg branch eliminator (shortCutBranches);
+   we ultimately want to share this with the Cmm branch eliminator.
+
+ - At the moment, references to global registers like Hp are "lowered" 
+   late (in CgUtils.fixStgRegisters). We should do this early, in the
+	new native codegen, much in the way that we lower calling conventions.
+	Might need to be a bit sophisticated about aliasing.
+
+ - Move to new Cmm rep:
+     * Make native CG consume New Cmm; 
+     * Convert Old Cmm->New Cmm to keep old path alive
+     * Produce New Cmm when reading in .cmm files
+
+ - Top-level SRT threading is a bit ugly
+
+ - See "CAFs" below; we want to totally refactor the way SRTs are calculated
+
+ - Garbage-collect https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cps
+   moving good stuff into 
+   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/new-code-gen-pipeline
+
+ - Currently AsmCodeGen top level calls AsmCodeGen.cmmToCmm, which is a small
+   C-- optimiser.  It has quite a lot of boilerplate folding code in AsmCodeGen
+   (cmmBlockConFold, cmmStmtConFold, cmmExprConFold), before calling out to
+   CmmOpt.  ToDo: see what optimisations are being done; and do them before
+   AsmCodeGen.
+
+ - If we stick CAF and stack liveness info on a LastCall node (not LastRet/Jump)
+   then all CAF and stack liveness stuff be completed before we split
+   into separate C procedures.
+
+   Short term:
+     compute and attach liveness into LastCall
+     right at end, split, cvt to old rep
+     [must split before cvt, because old rep is not expressive enough]
+
+   Longer term: 
+     when old rep disappears, 
+     move the whole splitting game into the C back end *only*
+	 (guided by the procpoint set)
+
+----------------------------------------------------
+	Proc-points
+----------------------------------------------------
+
+Consider this program, which has a diamond control flow, 
+with a call on one branch
+ fn(p,x) {
+        h()
+	if b then { ... f(x) ...; q=5; goto J }
+             else { ...; q=7; goto J }
+     J: ..p...q...
+  }
+then the join point J is a "proc-point".  So, is 'p' passed to J
+as a parameter?  Or, if 'p' was saved on the stack anyway, perhaps
+to keep it alive across the call to h(), maybe 'p' gets communicated
+to J that way. This is an awkward choice.  (We think that we currently
+never pass variables to join points via arguments.)
+
+Furthermore, there is *no way* to pass q to J in a register (other
+than a parameter register).
+
+What we want is to do register allocation across the whole caboodle.
+Then we could drop all the code that deals with the above awkward
+decisions about spilling variables across proc-points.
+
+Note that J doesn't need an info table.
+
+What we really want is for each LastCall (not LastJump/Ret) 
+to have an info table.   Note that ProcPoints that are not successors
+of calls don't need an info table.
+
+Figuring out proc-points
+~~~~~~~~~~~~~~~~~~~~~~~~
+Proc-points are identified by
+GHC.Cmm.ProcPoint.minimalProcPointSet/extendPPSet Although there isn't
+that much code, JD thinks that it could be done much more nicely using
+a dominator analysis, using the Dataflow Engine.
+
+----------------------------------------------------
+		CAFs
+----------------------------------------------------
+
+* The code for a procedure f may refer to either the *closure* 
+  or the *entry point* of another top-level procedure g.  
+  If f is live, then so is g.  f's SRT must include g's closure.
+
+* The CLabel for the entry-point/closure reveals whether g is 
+  a CAF (or refers to CAFs).  See the IdLabel constructor of CLabel.
+
+* The CAF-ness of the original top-level definitions is figured out
+  (by GHC.Iface.Tidy) before we generate C--.  This CafInfo is only set for
+  top-level Ids; nested bindings stay with MayHaveCafRefs.
+
+* Currently an SRT contains (only) pointers to (top-level) closures.
+
+* Consider this Core code
+	f = \x -> let g = \y -> ...x...y...h1...
+                  in ...h2...g...
+  and suppose that h1, h2 have IdInfo of MayHaveCafRefs.
+  Therefore, so will f,  But g will not (since it's nested).
+
+  This generates C-- roughly like this:
+     f_closure: .word f_entry
+     f_entry() [info-tbl-for-f] { ...jump g_entry...jump h2... }
+     g_entry() [info-tbl-for-g] { ...jump h1... }
+
+  Note that there is no top-level closure for g (only an info table).
+  This fact (whether or not there is a top-level closure) is recorded
+  in the InfoTable attached to the CmmProc for f, g
+  INVARIANT: 
+     Any out-of-Group references to an IdLabel goes to
+     a Proc whose InfoTable says "I have a top-level closure".
+  Equivalently: 
+     A CmmProc whose InfoTable says "I do not have a top-level
+     closure" is referred to only from its own Group.
+
+* So:   info-tbl-for-f must have an SRT that keeps h1,h2 alive
+        info-tbl-for-g must have an SRT that keeps h1 (only) alive
+
+  But if we just look for the free CAF refs, we get:
+	f   h2 (only)
+        g   h1
+
+  So we need to do a transitive closure thing to flesh out 
+  f's keep-alive refs to include h1.
+
+* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a
+  CmmInfoTable attached to each CmmProc.  CmmPipeline.toTops actually does
+  the attaching, right at the end of the pipeline.  The C_SRT part
+  gives offsets within a single, shared table of closure pointers.
+
+* DECIDED: we can generate SRTs based on the final Cmm program
+  without knowledge of how it is generated.
diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs
new file mode 100644
index 0000000000..a413820e30
--- /dev/null
+++ b/compiler/GHC/CmmToC.hs
@@ -0,0 +1,1380 @@
+{-# LANGUAGE CPP, DeriveFunctor, GADTs, PatternSynonyms #-}
+
+-----------------------------------------------------------------------------
+--
+-- Pretty-printing of Cmm as C, suitable for feeding gcc
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-- Print Cmm as real C, for -fvia-C
+--
+-- See wiki:commentary/compiler/backends/ppr-c
+--
+-- This is simpler than the old PprAbsC, because Cmm is "macro-expanded"
+-- relative to the old AbstractC, and many oddities/decorations have
+-- disappeared from the data type.
+--
+-- This code generator is only supported in unregisterised mode.
+--
+-----------------------------------------------------------------------------
+
+module GHC.CmmToC (
+        writeC
+  ) where
+
+#include "HsVersions.h"
+
+-- Cmm stuff
+import GhcPrelude
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import ForeignCall
+import GHC.Cmm hiding (pprBBlock)
+import GHC.Cmm.Ppr () -- For Outputable instances
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+
+-- Utils
+import CPrim
+import DynFlags
+import FastString
+import Outputable
+import GHC.Platform
+import UniqSet
+import UniqFM
+import Unique
+import Util
+
+-- The rest
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import Control.Monad.ST
+import Data.Bits
+import Data.Char
+import Data.List
+import Data.Map (Map)
+import Data.Word
+import System.IO
+import qualified Data.Map as Map
+import Control.Monad (ap)
+import qualified Data.Array.Unsafe as U ( castSTUArray )
+import Data.Array.ST
+
+-- --------------------------------------------------------------------------
+-- Top level
+
+writeC :: DynFlags -> Handle -> RawCmmGroup -> IO ()
+writeC dflags handle cmm = printForC dflags handle (pprC cmm $$ blankLine)
+
+-- --------------------------------------------------------------------------
+-- Now do some real work
+--
+-- for fun, we could call cmmToCmm over the tops...
+--
+
+pprC :: RawCmmGroup -> SDoc
+pprC tops = vcat $ intersperse blankLine $ map pprTop tops
+
+--
+-- top level procs
+--
+pprTop :: RawCmmDecl -> SDoc
+pprTop (CmmProc infos clbl _in_live_regs graph) =
+
+    (case mapLookup (g_entry graph) infos of
+       Nothing -> empty
+       Just (Statics info_clbl info_dat) ->
+           pprDataExterns info_dat $$
+           pprWordArray info_is_in_rodata info_clbl info_dat) $$
+    (vcat [
+           blankLine,
+           extern_decls,
+           (if (externallyVisibleCLabel clbl)
+                    then mkFN_ else mkIF_) (ppr clbl) <+> lbrace,
+           nest 8 temp_decls,
+           vcat (map pprBBlock blocks),
+           rbrace ]
+    )
+  where
+        -- info tables are always in .rodata
+        info_is_in_rodata = True
+        blocks = toBlockListEntryFirst graph
+        (temp_decls, extern_decls) = pprTempAndExternDecls blocks
+
+
+-- Chunks of static data.
+
+-- We only handle (a) arrays of word-sized things and (b) strings.
+
+pprTop (CmmData section (Statics lbl [CmmString str])) =
+  pprExternDecl lbl $$
+  hcat [
+    pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
+    text "[] = ", pprStringInCStyle str, semi
+  ]
+
+pprTop (CmmData section (Statics lbl [CmmUninitialised size])) =
+  pprExternDecl lbl $$
+  hcat [
+    pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
+    brackets (int size), semi
+  ]
+
+pprTop (CmmData section (Statics lbl lits)) =
+  pprDataExterns lits $$
+  pprWordArray (isSecConstant section) lbl lits
+
+-- --------------------------------------------------------------------------
+-- BasicBlocks are self-contained entities: they always end in a jump.
+--
+-- Like nativeGen/AsmCodeGen, we could probably reorder blocks to turn
+-- as many jumps as possible into fall throughs.
+--
+
+pprBBlock :: CmmBlock -> SDoc
+pprBBlock block =
+  nest 4 (pprBlockId (entryLabel block) <> colon) $$
+  nest 8 (vcat (map pprStmt (blockToList nodes)) $$ pprStmt last)
+ where
+  (_, nodes, last)  = blockSplit block
+
+-- --------------------------------------------------------------------------
+-- Info tables. Just arrays of words.
+-- See codeGen/ClosureInfo, and nativeGen/PprMach
+
+pprWordArray :: Bool -> CLabel -> [CmmStatic] -> SDoc
+pprWordArray is_ro lbl ds
+  = sdocWithDynFlags $ \dflags ->
+    -- TODO: align closures only
+    pprExternDecl lbl $$
+    hcat [ pprLocalness lbl, pprConstness is_ro, text "StgWord"
+         , space, ppr lbl, text "[]"
+         -- See Note [StgWord alignment]
+         , pprAlignment (wordWidth dflags)
+         , text "= {" ]
+    $$ nest 8 (commafy (pprStatics dflags ds))
+    $$ text "};"
+
+pprAlignment :: Width -> SDoc
+pprAlignment words =
+     text "__attribute__((aligned(" <> int (widthInBytes words) <> text ")))"
+
+-- Note [StgWord alignment]
+-- C codegen builds static closures as StgWord C arrays (pprWordArray).
+-- Their real C type is 'StgClosure'. Macros like UNTAG_CLOSURE assume
+-- pointers to 'StgClosure' are aligned at pointer size boundary:
+--  4 byte boundary on 32 systems
+--  and 8 bytes on 64-bit systems
+-- see TAG_MASK and TAG_BITS definition and usage.
+--
+-- It's a reasonable assumption also known as natural alignment.
+-- Although some architectures have different alignment rules.
+-- One of known exceptions is m68k (#11395, comment:16) where:
+--   __alignof__(StgWord) == 2, sizeof(StgWord) == 4
+--
+-- Thus we explicitly increase alignment by using
+--    __attribute__((aligned(4)))
+-- declaration.
+
+--
+-- has to be static, if it isn't globally visible
+--
+pprLocalness :: CLabel -> SDoc
+pprLocalness lbl | not $ externallyVisibleCLabel lbl = text "static "
+                 | otherwise = empty
+
+pprConstness :: Bool -> SDoc
+pprConstness is_ro | is_ro = text "const "
+                   | otherwise = empty
+
+-- --------------------------------------------------------------------------
+-- Statements.
+--
+
+pprStmt :: CmmNode e x -> SDoc
+
+pprStmt stmt =
+    sdocWithDynFlags $ \dflags ->
+    case stmt of
+    CmmEntry{}   -> empty
+    CmmComment _ -> empty -- (hang (text "/*") 3 (ftext s)) $$ ptext (sLit "*/")
+                          -- XXX if the string contains "*/", we need to fix it
+                          -- XXX we probably want to emit these comments when
+                          -- some debugging option is on.  They can get quite
+                          -- large.
+
+    CmmTick _ -> empty
+    CmmUnwind{} -> empty
+
+    CmmAssign dest src -> pprAssign dflags dest src
+
+    CmmStore  dest src
+        | typeWidth rep == W64 && wordWidth dflags /= W64
+        -> (if isFloatType rep then text "ASSIGN_DBL"
+                               else ptext (sLit ("ASSIGN_Word64"))) <>
+           parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi
+
+        | otherwise
+        -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ]
+        where
+          rep = cmmExprType dflags src
+
+    CmmUnsafeForeignCall target@(ForeignTarget fn conv) results args ->
+        fnCall
+        where
+        (res_hints, arg_hints) = foreignTargetHints target
+        hresults = zip results res_hints
+        hargs    = zip args arg_hints
+
+        ForeignConvention cconv _ _ ret = conv
+
+        cast_fn = parens (cCast (pprCFunType (char '*') cconv hresults hargs) fn)
+
+        -- See wiki:commentary/compiler/backends/ppr-c#prototypes
+        fnCall =
+            case fn of
+              CmmLit (CmmLabel lbl)
+                | StdCallConv <- cconv ->
+                    pprCall (ppr lbl) cconv hresults hargs
+                        -- stdcall functions must be declared with
+                        -- a function type, otherwise the C compiler
+                        -- doesn't add the @n suffix to the label.  We
+                        -- can't add the @n suffix ourselves, because
+                        -- it isn't valid C.
+                | CmmNeverReturns <- ret ->
+                    pprCall cast_fn cconv hresults hargs <> semi
+                | not (isMathFun lbl) ->
+                    pprForeignCall (ppr lbl) cconv hresults hargs
+              _ ->
+                    pprCall cast_fn cconv hresults hargs <> semi
+                        -- for a dynamic call, no declaration is necessary.
+
+    CmmUnsafeForeignCall (PrimTarget MO_Touch) _results _args -> empty
+    CmmUnsafeForeignCall (PrimTarget (MO_Prefetch_Data _)) _results _args -> empty
+
+    CmmUnsafeForeignCall target@(PrimTarget op) results args ->
+        fn_call
+      where
+        cconv = CCallConv
+        fn = pprCallishMachOp_for_C op
+
+        (res_hints, arg_hints) = foreignTargetHints target
+        hresults = zip results res_hints
+        hargs    = zip args arg_hints
+
+        fn_call
+          -- The mem primops carry an extra alignment arg.
+          -- We could maybe emit an alignment directive using this info.
+          -- We also need to cast mem primops to prevent conflicts with GCC
+          -- builtins (see bug #5967).
+          | Just _align <- machOpMemcpyishAlign op
+          = (text ";EFF_(" <> fn <> char ')' <> semi) $$
+            pprForeignCall fn cconv hresults hargs
+          | otherwise
+          = pprCall fn cconv hresults hargs
+
+    CmmBranch ident          -> pprBranch ident
+    CmmCondBranch expr yes no _ -> pprCondBranch expr yes no
+    CmmCall { cml_target = expr } -> mkJMP_ (pprExpr expr) <> semi
+    CmmSwitch arg ids        -> sdocWithDynFlags $ \dflags ->
+                                pprSwitch dflags arg ids
+
+    _other -> pprPanic "PprC.pprStmt" (ppr stmt)
+
+type Hinted a = (a, ForeignHint)
+
+pprForeignCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual]
+               -> SDoc
+pprForeignCall fn cconv results args = fn_call
+  where
+    fn_call = braces (
+                 pprCFunType (char '*' <> text "ghcFunPtr") cconv results args <> semi
+              $$ text "ghcFunPtr" <+> equals <+> cast_fn <> semi
+              $$ pprCall (text "ghcFunPtr") cconv results args <> semi
+             )
+    cast_fn = parens (parens (pprCFunType (char '*') cconv results args) <> fn)
+
+pprCFunType :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc
+pprCFunType ppr_fn cconv ress args
+  = sdocWithDynFlags $ \dflags ->
+    let res_type [] = text "void"
+        res_type [(one, hint)] = machRepHintCType (localRegType one) hint
+        res_type _ = panic "pprCFunType: only void or 1 return value supported"
+
+        arg_type (expr, hint) = machRepHintCType (cmmExprType dflags expr) hint
+    in res_type ress <+>
+       parens (ccallConvAttribute cconv <> ppr_fn) <>
+       parens (commafy (map arg_type args))
+
+-- ---------------------------------------------------------------------
+-- unconditional branches
+pprBranch :: BlockId -> SDoc
+pprBranch ident = text "goto" <+> pprBlockId ident <> semi
+
+
+-- ---------------------------------------------------------------------
+-- conditional branches to local labels
+pprCondBranch :: CmmExpr -> BlockId -> BlockId -> SDoc
+pprCondBranch expr yes no
+        = hsep [ text "if" , parens(pprExpr expr) ,
+                        text "goto", pprBlockId yes <> semi,
+                        text "else goto", pprBlockId no <> semi ]
+
+-- ---------------------------------------------------------------------
+-- a local table branch
+--
+-- we find the fall-through cases
+--
+pprSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> SDoc
+pprSwitch dflags e ids
+  = (hang (text "switch" <+> parens ( pprExpr e ) <+> lbrace)
+                4 (vcat ( map caseify pairs ) $$ def)) $$ rbrace
+  where
+    (pairs, mbdef) = switchTargetsFallThrough ids
+
+    -- fall through case
+    caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix
+        where
+        do_fallthrough ix =
+                 hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
+                        text "/* fall through */" ]
+
+        final_branch ix =
+                hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
+                       text "goto" , (pprBlockId ident) <> semi ]
+
+    caseify (_     , _    ) = panic "pprSwitch: switch with no cases!"
+
+    def | Just l <- mbdef = text "default: goto" <+> pprBlockId l <> semi
+        | otherwise       = empty
+
+-- ---------------------------------------------------------------------
+-- Expressions.
+--
+
+-- C Types: the invariant is that the C expression generated by
+--
+--      pprExpr e
+--
+-- has a type in C which is also given by
+--
+--      machRepCType (cmmExprType e)
+--
+-- (similar invariants apply to the rest of the pretty printer).
+
+pprExpr :: CmmExpr -> SDoc
+pprExpr e = case e of
+    CmmLit lit -> pprLit lit
+
+
+    CmmLoad e ty -> sdocWithDynFlags $ \dflags -> pprLoad dflags e ty
+    CmmReg reg      -> pprCastReg reg
+    CmmRegOff reg 0 -> pprCastReg reg
+
+    -- CmmRegOff is an alias of MO_Add
+    CmmRegOff reg i -> sdocWithDynFlags $ \dflags ->
+                       pprCastReg reg <> char '+' <>
+                       pprHexVal (fromIntegral i) (wordWidth dflags)
+
+    CmmMachOp mop args -> pprMachOpApp mop args
+
+    CmmStackSlot _ _   -> panic "pprExpr: CmmStackSlot not supported!"
+
+
+pprLoad :: DynFlags -> CmmExpr -> CmmType -> SDoc
+pprLoad dflags e ty
+  | width == W64, wordWidth dflags /= W64
+  = (if isFloatType ty then text "PK_DBL"
+                       else text "PK_Word64")
+    <> parens (mkP_ <> pprExpr1 e)
+
+  | otherwise
+  = case e of
+        CmmReg r | isPtrReg r && width == wordWidth dflags && not (isFloatType ty)
+                 -> char '*' <> pprAsPtrReg r
+
+        CmmRegOff r 0 | isPtrReg r && width == wordWidth dflags && not (isFloatType ty)
+                      -> char '*' <> pprAsPtrReg r
+
+        CmmRegOff r off | isPtrReg r && width == wordWidth dflags
+                        , off `rem` wORD_SIZE dflags == 0 && not (isFloatType ty)
+        -- ToDo: check that the offset is a word multiple?
+        --       (For tagging to work, I had to avoid unaligned loads. --ARY)
+                        -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift dflags))
+
+        _other -> cLoad e ty
+  where
+    width = typeWidth ty
+
+pprExpr1 :: CmmExpr -> SDoc
+pprExpr1 (CmmLit lit)     = pprLit1 lit
+pprExpr1 e@(CmmReg _reg)  = pprExpr e
+pprExpr1 other            = parens (pprExpr other)
+
+-- --------------------------------------------------------------------------
+-- MachOp applications
+
+pprMachOpApp :: MachOp -> [CmmExpr] -> SDoc
+
+pprMachOpApp op args
+  | isMulMayOfloOp op
+  = text "mulIntMayOflo" <> parens (commafy (map pprExpr args))
+  where isMulMayOfloOp (MO_U_MulMayOflo _) = True
+        isMulMayOfloOp (MO_S_MulMayOflo _) = True
+        isMulMayOfloOp _ = False
+
+pprMachOpApp mop args
+  | Just ty <- machOpNeedsCast mop
+  = ty <> parens (pprMachOpApp' mop args)
+  | otherwise
+  = pprMachOpApp' mop args
+
+-- Comparisons in C have type 'int', but we want type W_ (this is what
+-- resultRepOfMachOp says).  The other C operations inherit their type
+-- from their operands, so no casting is required.
+machOpNeedsCast :: MachOp -> Maybe SDoc
+machOpNeedsCast mop
+  | isComparisonMachOp mop = Just mkW_
+  | otherwise              = Nothing
+
+pprMachOpApp' :: MachOp -> [CmmExpr] -> SDoc
+pprMachOpApp' mop args
+ = case args of
+    -- dyadic
+    [x,y] -> pprArg x <+> pprMachOp_for_C mop <+> pprArg y
+
+    -- unary
+    [x]   -> pprMachOp_for_C mop <> parens (pprArg x)
+
+    _     -> panic "PprC.pprMachOp : machop with wrong number of args"
+
+  where
+        -- Cast needed for signed integer ops
+    pprArg e | signedOp    mop = sdocWithDynFlags $ \dflags ->
+                                 cCast (machRep_S_CType (typeWidth (cmmExprType dflags e))) e
+             | needsFCasts mop = sdocWithDynFlags $ \dflags ->
+                                 cCast (machRep_F_CType (typeWidth (cmmExprType dflags e))) e
+             | otherwise    = pprExpr1 e
+    needsFCasts (MO_F_Eq _)   = False
+    needsFCasts (MO_F_Ne _)   = False
+    needsFCasts (MO_F_Neg _)  = True
+    needsFCasts (MO_F_Quot _) = True
+    needsFCasts mop  = floatComparison mop
+
+-- --------------------------------------------------------------------------
+-- Literals
+
+pprLit :: CmmLit -> SDoc
+pprLit lit = case lit of
+    CmmInt i rep      -> pprHexVal i rep
+
+    CmmFloat f w       -> parens (machRep_F_CType w) <> str
+        where d = fromRational f :: Double
+              str | isInfinite d && d < 0 = text "-INFINITY"
+                  | isInfinite d          = text "INFINITY"
+                  | isNaN d               = text "NAN"
+                  | otherwise             = text (show d)
+                -- these constants come from <math.h>
+                -- see #1861
+
+    CmmVec {} -> panic "PprC printing vector literal"
+
+    CmmBlock bid       -> mkW_ <> pprCLabelAddr (infoTblLbl bid)
+    CmmHighStackMark   -> panic "PprC printing high stack mark"
+    CmmLabel clbl      -> mkW_ <> pprCLabelAddr clbl
+    CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i
+    CmmLabelDiffOff clbl1 _ i _   -- non-word widths not supported via C
+        -- WARNING:
+        --  * the lit must occur in the info table clbl2
+        --  * clbl1 must be an SRT, a slow entry point or a large bitmap
+        -> mkW_ <> pprCLabelAddr clbl1 <> char '+' <> int i
+
+    where
+        pprCLabelAddr lbl = char '&' <> ppr lbl
+
+pprLit1 :: CmmLit -> SDoc
+pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit)
+pprLit1 lit@(CmmLabelDiffOff _ _ _ _) = parens (pprLit lit)
+pprLit1 lit@(CmmFloat _ _)    = parens (pprLit lit)
+pprLit1 other = pprLit other
+
+-- ---------------------------------------------------------------------------
+-- Static data
+
+pprStatics :: DynFlags -> [CmmStatic] -> [SDoc]
+pprStatics _ [] = []
+pprStatics dflags (CmmStaticLit (CmmFloat f W32) : rest)
+  -- odd numbers of floats are padded to a word by mkVirtHeapOffsetsWithPadding
+  | wORD_SIZE dflags == 8, CmmStaticLit (CmmInt 0 W32) : rest' <- rest
+  = pprLit1 (floatToWord dflags f) : pprStatics dflags rest'
+  -- adjacent floats aren't padded but combined into a single word
+  | wORD_SIZE dflags == 8, CmmStaticLit (CmmFloat g W32) : rest' <- rest
+  = pprLit1 (floatPairToWord dflags f g) : pprStatics dflags rest'
+  | wORD_SIZE dflags == 4
+  = pprLit1 (floatToWord dflags f) : pprStatics dflags rest
+  | otherwise
+  = pprPanic "pprStatics: float" (vcat (map ppr' rest))
+    where ppr' (CmmStaticLit l) = sdocWithDynFlags $ \dflags ->
+                                  ppr (cmmLitType dflags l)
+          ppr' _other           = text "bad static!"
+pprStatics dflags (CmmStaticLit (CmmFloat f W64) : rest)
+  = map pprLit1 (doubleToWords dflags f) ++ pprStatics dflags rest
+
+pprStatics dflags (CmmStaticLit (CmmInt i W64) : rest)
+  | wordWidth dflags == W32
+  = if wORDS_BIGENDIAN dflags
+    then pprStatics dflags (CmmStaticLit (CmmInt q W32) :
+                            CmmStaticLit (CmmInt r W32) : rest)
+    else pprStatics dflags (CmmStaticLit (CmmInt r W32) :
+                            CmmStaticLit (CmmInt q W32) : rest)
+  where r = i .&. 0xffffffff
+        q = i `shiftR` 32
+pprStatics dflags (CmmStaticLit (CmmInt a W32) :
+                   CmmStaticLit (CmmInt b W32) : rest)
+  | wordWidth dflags == W64
+  = if wORDS_BIGENDIAN dflags
+    then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 32) .|. b) W64) :
+                            rest)
+    else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 32) .|. a) W64) :
+                            rest)
+pprStatics dflags (CmmStaticLit (CmmInt a W16) :
+                   CmmStaticLit (CmmInt b W16) : rest)
+  | wordWidth dflags == W32
+  = if wORDS_BIGENDIAN dflags
+    then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 16) .|. b) W32) :
+                            rest)
+    else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 16) .|. a) W32) :
+                            rest)
+pprStatics dflags (CmmStaticLit (CmmInt _ w) : _)
+  | w /= wordWidth dflags
+  = pprPanic "pprStatics: cannot emit a non-word-sized static literal" (ppr w)
+pprStatics dflags (CmmStaticLit lit : rest)
+  = pprLit1 lit : pprStatics dflags rest
+pprStatics _ (other : _)
+  = pprPanic "pprStatics: other" (pprStatic other)
+
+pprStatic :: CmmStatic -> SDoc
+pprStatic s = case s of
+
+    CmmStaticLit lit   -> nest 4 (pprLit lit)
+    CmmUninitialised i -> nest 4 (mkC_ <> brackets (int i))
+
+    -- these should be inlined, like the old .hc
+    CmmString s'       -> nest 4 (mkW_ <> parens(pprStringInCStyle s'))
+
+
+-- ---------------------------------------------------------------------------
+-- Block Ids
+
+pprBlockId :: BlockId -> SDoc
+pprBlockId b = char '_' <> ppr (getUnique b)
+
+-- --------------------------------------------------------------------------
+-- Print a MachOp in a way suitable for emitting via C.
+--
+
+pprMachOp_for_C :: MachOp -> SDoc
+
+pprMachOp_for_C mop = case mop of
+
+        -- Integer operations
+        MO_Add          _ -> char '+'
+        MO_Sub          _ -> char '-'
+        MO_Eq           _ -> text "=="
+        MO_Ne           _ -> text "!="
+        MO_Mul          _ -> char '*'
+
+        MO_S_Quot       _ -> char '/'
+        MO_S_Rem        _ -> char '%'
+        MO_S_Neg        _ -> char '-'
+
+        MO_U_Quot       _ -> char '/'
+        MO_U_Rem        _ -> char '%'
+
+        -- & Floating-point operations
+        MO_F_Add        _ -> char '+'
+        MO_F_Sub        _ -> char '-'
+        MO_F_Neg        _ -> char '-'
+        MO_F_Mul        _ -> char '*'
+        MO_F_Quot       _ -> char '/'
+
+        -- Signed comparisons
+        MO_S_Ge         _ -> text ">="
+        MO_S_Le         _ -> text "<="
+        MO_S_Gt         _ -> char '>'
+        MO_S_Lt         _ -> char '<'
+
+        -- & Unsigned comparisons
+        MO_U_Ge         _ -> text ">="
+        MO_U_Le         _ -> text "<="
+        MO_U_Gt         _ -> char '>'
+        MO_U_Lt         _ -> char '<'
+
+        -- & Floating-point comparisons
+        MO_F_Eq         _ -> text "=="
+        MO_F_Ne         _ -> text "!="
+        MO_F_Ge         _ -> text ">="
+        MO_F_Le         _ -> text "<="
+        MO_F_Gt         _ -> char '>'
+        MO_F_Lt         _ -> char '<'
+
+        -- Bitwise operations.  Not all of these may be supported at all
+        -- sizes, and only integral MachReps are valid.
+        MO_And          _ -> char '&'
+        MO_Or           _ -> char '|'
+        MO_Xor          _ -> char '^'
+        MO_Not          _ -> char '~'
+        MO_Shl          _ -> text "<<"
+        MO_U_Shr        _ -> text ">>" -- unsigned shift right
+        MO_S_Shr        _ -> text ">>" -- signed shift right
+
+-- Conversions.  Some of these will be NOPs, but never those that convert
+-- between ints and floats.
+-- Floating-point conversions use the signed variant.
+-- We won't know to generate (void*) casts here, but maybe from
+-- context elsewhere
+
+-- noop casts
+        MO_UU_Conv from to | from == to -> empty
+        MO_UU_Conv _from to -> parens (machRep_U_CType to)
+
+        MO_SS_Conv from to | from == to -> empty
+        MO_SS_Conv _from to -> parens (machRep_S_CType to)
+
+        MO_XX_Conv from to | from == to -> empty
+        MO_XX_Conv _from to -> parens (machRep_U_CType to)
+
+        MO_FF_Conv from to | from == to -> empty
+        MO_FF_Conv _from to -> parens (machRep_F_CType to)
+
+        MO_SF_Conv _from to -> parens (machRep_F_CType to)
+        MO_FS_Conv _from to -> parens (machRep_S_CType to)
+
+        MO_S_MulMayOflo _ -> pprTrace "offending mop:"
+                                (text "MO_S_MulMayOflo")
+                                (panic $ "PprC.pprMachOp_for_C: MO_S_MulMayOflo"
+                                      ++ " should have been handled earlier!")
+        MO_U_MulMayOflo _ -> pprTrace "offending mop:"
+                                (text "MO_U_MulMayOflo")
+                                (panic $ "PprC.pprMachOp_for_C: MO_U_MulMayOflo"
+                                      ++ " should have been handled earlier!")
+
+        MO_V_Insert {}    -> pprTrace "offending mop:"
+                                (text "MO_V_Insert")
+                                (panic $ "PprC.pprMachOp_for_C: MO_V_Insert"
+                                      ++ " should have been handled earlier!")
+        MO_V_Extract {}   -> pprTrace "offending mop:"
+                                (text "MO_V_Extract")
+                                (panic $ "PprC.pprMachOp_for_C: MO_V_Extract"
+                                      ++ " should have been handled earlier!")
+
+        MO_V_Add {}       -> pprTrace "offending mop:"
+                                (text "MO_V_Add")
+                                (panic $ "PprC.pprMachOp_for_C: MO_V_Add"
+                                      ++ " should have been handled earlier!")
+        MO_V_Sub {}       -> pprTrace "offending mop:"
+                                (text "MO_V_Sub")
+                                (panic $ "PprC.pprMachOp_for_C: MO_V_Sub"
+                                      ++ " should have been handled earlier!")
+        MO_V_Mul {}       -> pprTrace "offending mop:"
+                                (text "MO_V_Mul")
+                                (panic $ "PprC.pprMachOp_for_C: MO_V_Mul"
+                                      ++ " should have been handled earlier!")
+
+        MO_VS_Quot {}     -> pprTrace "offending mop:"
+                                (text "MO_VS_Quot")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Quot"
+                                      ++ " should have been handled earlier!")
+        MO_VS_Rem {}      -> pprTrace "offending mop:"
+                                (text "MO_VS_Rem")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Rem"
+                                      ++ " should have been handled earlier!")
+        MO_VS_Neg {}      -> pprTrace "offending mop:"
+                                (text "MO_VS_Neg")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
+                                      ++ " should have been handled earlier!")
+
+        MO_VU_Quot {}     -> pprTrace "offending mop:"
+                                (text "MO_VU_Quot")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot"
+                                      ++ " should have been handled earlier!")
+        MO_VU_Rem {}      -> pprTrace "offending mop:"
+                                (text "MO_VU_Rem")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
+                                      ++ " should have been handled earlier!")
+
+        MO_VF_Insert {}   -> pprTrace "offending mop:"
+                                (text "MO_VF_Insert")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
+                                      ++ " should have been handled earlier!")
+        MO_VF_Extract {}  -> pprTrace "offending mop:"
+                                (text "MO_VF_Extract")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
+                                      ++ " should have been handled earlier!")
+
+        MO_VF_Add {}      -> pprTrace "offending mop:"
+                                (text "MO_VF_Add")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
+                                      ++ " should have been handled earlier!")
+        MO_VF_Sub {}      -> pprTrace "offending mop:"
+                                (text "MO_VF_Sub")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Sub"
+                                      ++ " should have been handled earlier!")
+        MO_VF_Neg {}      -> pprTrace "offending mop:"
+                                (text "MO_VF_Neg")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Neg"
+                                      ++ " should have been handled earlier!")
+        MO_VF_Mul {}      -> pprTrace "offending mop:"
+                                (text "MO_VF_Mul")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Mul"
+                                      ++ " should have been handled earlier!")
+        MO_VF_Quot {}     -> pprTrace "offending mop:"
+                                (text "MO_VF_Quot")
+                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Quot"
+                                      ++ " should have been handled earlier!")
+
+        MO_AlignmentCheck {} -> panic "-falignment-santisation not supported by unregisterised backend"
+
+signedOp :: MachOp -> Bool      -- Argument type(s) are signed ints
+signedOp (MO_S_Quot _)    = True
+signedOp (MO_S_Rem  _)    = True
+signedOp (MO_S_Neg  _)    = True
+signedOp (MO_S_Ge   _)    = True
+signedOp (MO_S_Le   _)    = True
+signedOp (MO_S_Gt   _)    = True
+signedOp (MO_S_Lt   _)    = True
+signedOp (MO_S_Shr  _)    = True
+signedOp (MO_SS_Conv _ _) = True
+signedOp (MO_SF_Conv _ _) = True
+signedOp _                = False
+
+floatComparison :: MachOp -> Bool  -- comparison between float args
+floatComparison (MO_F_Eq   _) = True
+floatComparison (MO_F_Ne   _) = True
+floatComparison (MO_F_Ge   _) = True
+floatComparison (MO_F_Le   _) = True
+floatComparison (MO_F_Gt   _) = True
+floatComparison (MO_F_Lt   _) = True
+floatComparison _             = False
+
+-- ---------------------------------------------------------------------
+-- tend to be implemented by foreign calls
+
+pprCallishMachOp_for_C :: CallishMachOp -> SDoc
+
+pprCallishMachOp_for_C mop
+    = case mop of
+        MO_F64_Pwr      -> text "pow"
+        MO_F64_Sin      -> text "sin"
+        MO_F64_Cos      -> text "cos"
+        MO_F64_Tan      -> text "tan"
+        MO_F64_Sinh     -> text "sinh"
+        MO_F64_Cosh     -> text "cosh"
+        MO_F64_Tanh     -> text "tanh"
+        MO_F64_Asin     -> text "asin"
+        MO_F64_Acos     -> text "acos"
+        MO_F64_Atanh    -> text "atanh"
+        MO_F64_Asinh    -> text "asinh"
+        MO_F64_Acosh    -> text "acosh"
+        MO_F64_Atan     -> text "atan"
+        MO_F64_Log      -> text "log"
+        MO_F64_Log1P    -> text "log1p"
+        MO_F64_Exp      -> text "exp"
+        MO_F64_ExpM1    -> text "expm1"
+        MO_F64_Sqrt     -> text "sqrt"
+        MO_F64_Fabs     -> text "fabs"
+        MO_F32_Pwr      -> text "powf"
+        MO_F32_Sin      -> text "sinf"
+        MO_F32_Cos      -> text "cosf"
+        MO_F32_Tan      -> text "tanf"
+        MO_F32_Sinh     -> text "sinhf"
+        MO_F32_Cosh     -> text "coshf"
+        MO_F32_Tanh     -> text "tanhf"
+        MO_F32_Asin     -> text "asinf"
+        MO_F32_Acos     -> text "acosf"
+        MO_F32_Atan     -> text "atanf"
+        MO_F32_Asinh    -> text "asinhf"
+        MO_F32_Acosh    -> text "acoshf"
+        MO_F32_Atanh    -> text "atanhf"
+        MO_F32_Log      -> text "logf"
+        MO_F32_Log1P    -> text "log1pf"
+        MO_F32_Exp      -> text "expf"
+        MO_F32_ExpM1    -> text "expm1f"
+        MO_F32_Sqrt     -> text "sqrtf"
+        MO_F32_Fabs     -> text "fabsf"
+        MO_ReadBarrier  -> text "load_load_barrier"
+        MO_WriteBarrier -> text "write_barrier"
+        MO_Memcpy _     -> text "memcpy"
+        MO_Memset _     -> text "memset"
+        MO_Memmove _    -> text "memmove"
+        MO_Memcmp _     -> text "memcmp"
+        (MO_BSwap w)    -> ptext (sLit $ bSwapLabel w)
+        (MO_BRev w)     -> ptext (sLit $ bRevLabel w)
+        (MO_PopCnt w)   -> ptext (sLit $ popCntLabel w)
+        (MO_Pext w)     -> ptext (sLit $ pextLabel w)
+        (MO_Pdep w)     -> ptext (sLit $ pdepLabel w)
+        (MO_Clz w)      -> ptext (sLit $ clzLabel w)
+        (MO_Ctz w)      -> ptext (sLit $ ctzLabel w)
+        (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
+        (MO_Cmpxchg w)  -> ptext (sLit $ cmpxchgLabel w)
+        (MO_AtomicRead w)  -> ptext (sLit $ atomicReadLabel w)
+        (MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w)
+        (MO_UF_Conv w)  -> ptext (sLit $ word2FloatLabel w)
+
+        MO_S_Mul2     {} -> unsupported
+        MO_S_QuotRem  {} -> unsupported
+        MO_U_QuotRem  {} -> unsupported
+        MO_U_QuotRem2 {} -> unsupported
+        MO_Add2       {} -> unsupported
+        MO_AddWordC   {} -> unsupported
+        MO_SubWordC   {} -> unsupported
+        MO_AddIntC    {} -> unsupported
+        MO_SubIntC    {} -> unsupported
+        MO_U_Mul2     {} -> unsupported
+        MO_Touch         -> unsupported
+        (MO_Prefetch_Data _ ) -> unsupported
+        --- we could support prefetch via "__builtin_prefetch"
+        --- Not adding it for now
+    where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop
+                            ++ " not supported!")
+
+-- ---------------------------------------------------------------------
+-- Useful #defines
+--
+
+mkJMP_, mkFN_, mkIF_ :: SDoc -> SDoc
+
+mkJMP_ i = text "JMP_" <> parens i
+mkFN_  i = text "FN_"  <> parens i -- externally visible function
+mkIF_  i = text "IF_"  <> parens i -- locally visible
+
+-- from includes/Stg.h
+--
+mkC_,mkW_,mkP_ :: SDoc
+
+mkC_  = text "(C_)"        -- StgChar
+mkW_  = text "(W_)"        -- StgWord
+mkP_  = text "(P_)"        -- StgWord*
+
+-- ---------------------------------------------------------------------
+--
+-- Assignments
+--
+-- Generating assignments is what we're all about, here
+--
+pprAssign :: DynFlags -> CmmReg -> CmmExpr -> SDoc
+
+-- dest is a reg, rhs is a reg
+pprAssign _ r1 (CmmReg r2)
+   | isPtrReg r1 && isPtrReg r2
+   = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, semi ]
+
+-- dest is a reg, rhs is a CmmRegOff
+pprAssign dflags r1 (CmmRegOff r2 off)
+   | isPtrReg r1 && isPtrReg r2 && (off `rem` wORD_SIZE dflags == 0)
+   = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, op, int off', semi ]
+  where
+        off1 = off `shiftR` wordShift dflags
+
+        (op,off') | off >= 0  = (char '+', off1)
+                  | otherwise = (char '-', -off1)
+
+-- dest is a reg, rhs is anything.
+-- We can't cast the lvalue, so we have to cast the rhs if necessary.  Casting
+-- the lvalue elicits a warning from new GCC versions (3.4+).
+pprAssign _ r1 r2
+  | isFixedPtrReg r1             = mkAssign (mkP_ <> pprExpr1 r2)
+  | Just ty <- strangeRegType r1 = mkAssign (parens ty <> pprExpr1 r2)
+  | otherwise                    = mkAssign (pprExpr r2)
+    where mkAssign x = if r1 == CmmGlobal BaseReg
+                       then text "ASSIGN_BaseReg" <> parens x <> semi
+                       else pprReg r1 <> text " = " <> x <> semi
+
+-- ---------------------------------------------------------------------
+-- Registers
+
+pprCastReg :: CmmReg -> SDoc
+pprCastReg reg
+   | isStrangeTypeReg reg = mkW_ <> pprReg reg
+   | otherwise            = pprReg reg
+
+-- True if (pprReg reg) will give an expression with type StgPtr.  We
+-- need to take care with pointer arithmetic on registers with type
+-- StgPtr.
+isFixedPtrReg :: CmmReg -> Bool
+isFixedPtrReg (CmmLocal _) = False
+isFixedPtrReg (CmmGlobal r) = isFixedPtrGlobalReg r
+
+-- True if (pprAsPtrReg reg) will give an expression with type StgPtr
+-- JD: THIS IS HORRIBLE AND SHOULD BE RENAMED, AT THE VERY LEAST.
+-- THE GARBAGE WITH THE VNonGcPtr HELPS MATCH THE OLD CODE GENERATOR'S OUTPUT;
+-- I'M NOT SURE IF IT SHOULD REALLY STAY THAT WAY.
+isPtrReg :: CmmReg -> Bool
+isPtrReg (CmmLocal _)                         = False
+isPtrReg (CmmGlobal (VanillaReg _ VGcPtr))    = True  -- if we print via pprAsPtrReg
+isPtrReg (CmmGlobal (VanillaReg _ VNonGcPtr)) = False -- if we print via pprAsPtrReg
+isPtrReg (CmmGlobal reg)                      = isFixedPtrGlobalReg reg
+
+-- True if this global reg has type StgPtr
+isFixedPtrGlobalReg :: GlobalReg -> Bool
+isFixedPtrGlobalReg Sp    = True
+isFixedPtrGlobalReg Hp    = True
+isFixedPtrGlobalReg HpLim = True
+isFixedPtrGlobalReg SpLim = True
+isFixedPtrGlobalReg _     = False
+
+-- True if in C this register doesn't have the type given by
+-- (machRepCType (cmmRegType reg)), so it has to be cast.
+isStrangeTypeReg :: CmmReg -> Bool
+isStrangeTypeReg (CmmLocal _)   = False
+isStrangeTypeReg (CmmGlobal g)  = isStrangeTypeGlobal g
+
+isStrangeTypeGlobal :: GlobalReg -> Bool
+isStrangeTypeGlobal CCCS                = True
+isStrangeTypeGlobal CurrentTSO          = True
+isStrangeTypeGlobal CurrentNursery      = True
+isStrangeTypeGlobal BaseReg             = True
+isStrangeTypeGlobal r                   = isFixedPtrGlobalReg r
+
+strangeRegType :: CmmReg -> Maybe SDoc
+strangeRegType (CmmGlobal CCCS) = Just (text "struct CostCentreStack_ *")
+strangeRegType (CmmGlobal CurrentTSO) = Just (text "struct StgTSO_ *")
+strangeRegType (CmmGlobal CurrentNursery) = Just (text "struct bdescr_ *")
+strangeRegType (CmmGlobal BaseReg) = Just (text "struct StgRegTable_ *")
+strangeRegType _ = Nothing
+
+-- pprReg just prints the register name.
+--
+pprReg :: CmmReg -> SDoc
+pprReg r = case r of
+        CmmLocal  local  -> pprLocalReg local
+        CmmGlobal global -> pprGlobalReg global
+
+pprAsPtrReg :: CmmReg -> SDoc
+pprAsPtrReg (CmmGlobal (VanillaReg n gcp))
+  = WARN( gcp /= VGcPtr, ppr n ) char 'R' <> int n <> text ".p"
+pprAsPtrReg other_reg = pprReg other_reg
+
+pprGlobalReg :: GlobalReg -> SDoc
+pprGlobalReg gr = case gr of
+    VanillaReg n _ -> char 'R' <> int n  <> text ".w"
+        -- pprGlobalReg prints a VanillaReg as a .w regardless
+        -- Example:     R1.w = R1.w & (-0x8UL);
+        --              JMP_(*R1.p);
+    FloatReg   n   -> char 'F' <> int n
+    DoubleReg  n   -> char 'D' <> int n
+    LongReg    n   -> char 'L' <> int n
+    Sp             -> text "Sp"
+    SpLim          -> text "SpLim"
+    Hp             -> text "Hp"
+    HpLim          -> text "HpLim"
+    CCCS           -> text "CCCS"
+    CurrentTSO     -> text "CurrentTSO"
+    CurrentNursery -> text "CurrentNursery"
+    HpAlloc        -> text "HpAlloc"
+    BaseReg        -> text "BaseReg"
+    EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info"
+    GCEnter1       -> text "stg_gc_enter_1"
+    GCFun          -> text "stg_gc_fun"
+    other          -> panic $ "pprGlobalReg: Unsupported register: " ++ show other
+
+pprLocalReg :: LocalReg -> SDoc
+pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq
+
+-- -----------------------------------------------------------------------------
+-- Foreign Calls
+
+pprCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc
+pprCall ppr_fn cconv results args
+  | not (is_cishCC cconv)
+  = panic $ "pprCall: unknown calling convention"
+
+  | otherwise
+  =
+    ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi
+  where
+     ppr_assign []           rhs = rhs
+     ppr_assign [(one,hint)] rhs
+         = pprLocalReg one <> text " = "
+                 <> pprUnHint hint (localRegType one) <> rhs
+     ppr_assign _other _rhs = panic "pprCall: multiple results"
+
+     pprArg (expr, AddrHint)
+        = cCast (text "void *") expr
+        -- see comment by machRepHintCType below
+     pprArg (expr, SignedHint)
+        = sdocWithDynFlags $ \dflags ->
+          cCast (machRep_S_CType $ typeWidth $ cmmExprType dflags expr) expr
+     pprArg (expr, _other)
+        = pprExpr expr
+
+     pprUnHint AddrHint   rep = parens (machRepCType rep)
+     pprUnHint SignedHint rep = parens (machRepCType rep)
+     pprUnHint _          _   = empty
+
+-- Currently we only have these two calling conventions, but this might
+-- change in the future...
+is_cishCC :: CCallConv -> Bool
+is_cishCC CCallConv    = True
+is_cishCC CApiConv     = True
+is_cishCC StdCallConv  = True
+is_cishCC PrimCallConv = False
+is_cishCC JavaScriptCallConv = False
+
+-- ---------------------------------------------------------------------
+-- Find and print local and external declarations for a list of
+-- Cmm statements.
+--
+pprTempAndExternDecls :: [CmmBlock] -> (SDoc{-temps-}, SDoc{-externs-})
+pprTempAndExternDecls stmts
+  = (pprUFM (getUniqSet temps) (vcat . map pprTempDecl),
+     vcat (map pprExternDecl (Map.keys lbls)))
+  where (temps, lbls) = runTE (mapM_ te_BB stmts)
+
+pprDataExterns :: [CmmStatic] -> SDoc
+pprDataExterns statics
+  = vcat (map pprExternDecl (Map.keys lbls))
+  where (_, lbls) = runTE (mapM_ te_Static statics)
+
+pprTempDecl :: LocalReg -> SDoc
+pprTempDecl l@(LocalReg _ rep)
+  = hcat [ machRepCType rep, space, pprLocalReg l, semi ]
+
+pprExternDecl :: CLabel -> SDoc
+pprExternDecl lbl
+  -- do not print anything for "known external" things
+  | not (needsCDecl lbl) = empty
+  | Just sz <- foreignLabelStdcallInfo lbl = stdcall_decl sz
+  | otherwise =
+        hcat [ visibility, label_type lbl , lparen, ppr lbl, text ");"
+             -- occasionally useful to see label type
+             -- , text "/* ", pprDebugCLabel lbl, text " */"
+             ]
+ where
+  label_type lbl | isBytesLabel lbl         = text "B_"
+                 | isForeignLabel lbl && isCFunctionLabel lbl
+                                            = text "FF_"
+                 | isCFunctionLabel lbl     = text "F_"
+                 | isStaticClosureLabel lbl = text "C_"
+                 -- generic .rodata labels
+                 | isSomeRODataLabel lbl    = text "RO_"
+                 -- generic .data labels (common case)
+                 | otherwise                = text "RW_"
+
+  visibility
+     | externallyVisibleCLabel lbl = char 'E'
+     | otherwise                   = char 'I'
+
+  -- If the label we want to refer to is a stdcall function (on Windows) then
+  -- we must generate an appropriate prototype for it, so that the C compiler will
+  -- add the @n suffix to the label (#2276)
+  stdcall_decl sz = sdocWithDynFlags $ \dflags ->
+        text "extern __attribute__((stdcall)) void " <> ppr lbl
+        <> parens (commafy (replicate (sz `quot` wORD_SIZE dflags) (machRep_U_CType (wordWidth dflags))))
+        <> semi
+
+type TEState = (UniqSet LocalReg, Map CLabel ())
+newtype TE a = TE { unTE :: TEState -> (a, TEState) } deriving (Functor)
+
+instance Applicative TE where
+      pure a = TE $ \s -> (a, s)
+      (<*>) = ap
+
+instance Monad TE where
+   TE m >>= k  = TE $ \s -> case m s of (a, s') -> unTE (k a) s'
+
+te_lbl :: CLabel -> TE ()
+te_lbl lbl = TE $ \(temps,lbls) -> ((), (temps, Map.insert lbl () lbls))
+
+te_temp :: LocalReg -> TE ()
+te_temp r = TE $ \(temps,lbls) -> ((), (addOneToUniqSet temps r, lbls))
+
+runTE :: TE () -> TEState
+runTE (TE m) = snd (m (emptyUniqSet, Map.empty))
+
+te_Static :: CmmStatic -> TE ()
+te_Static (CmmStaticLit lit) = te_Lit lit
+te_Static _ = return ()
+
+te_BB :: CmmBlock -> TE ()
+te_BB block = mapM_ te_Stmt (blockToList mid) >> te_Stmt last
+  where (_, mid, last) = blockSplit block
+
+te_Lit :: CmmLit -> TE ()
+te_Lit (CmmLabel l) = te_lbl l
+te_Lit (CmmLabelOff l _) = te_lbl l
+te_Lit (CmmLabelDiffOff l1 _ _ _) = te_lbl l1
+te_Lit _ = return ()
+
+te_Stmt :: CmmNode e x -> TE ()
+te_Stmt (CmmAssign r e)         = te_Reg r >> te_Expr e
+te_Stmt (CmmStore l r)          = te_Expr l >> te_Expr r
+te_Stmt (CmmUnsafeForeignCall target rs es)
+  = do  te_Target target
+        mapM_ te_temp rs
+        mapM_ te_Expr es
+te_Stmt (CmmCondBranch e _ _ _) = te_Expr e
+te_Stmt (CmmSwitch e _)         = te_Expr e
+te_Stmt (CmmCall { cml_target = e }) = te_Expr e
+te_Stmt _                       = return ()
+
+te_Target :: ForeignTarget -> TE ()
+te_Target (ForeignTarget e _)      = te_Expr e
+te_Target (PrimTarget{})           = return ()
+
+te_Expr :: CmmExpr -> TE ()
+te_Expr (CmmLit lit)            = te_Lit lit
+te_Expr (CmmLoad e _)           = te_Expr e
+te_Expr (CmmReg r)              = te_Reg r
+te_Expr (CmmMachOp _ es)        = mapM_ te_Expr es
+te_Expr (CmmRegOff r _)         = te_Reg r
+te_Expr (CmmStackSlot _ _)      = panic "te_Expr: CmmStackSlot not supported!"
+
+te_Reg :: CmmReg -> TE ()
+te_Reg (CmmLocal l) = te_temp l
+te_Reg _            = return ()
+
+
+-- ---------------------------------------------------------------------
+-- C types for MachReps
+
+cCast :: SDoc -> CmmExpr -> SDoc
+cCast ty expr = parens ty <> pprExpr1 expr
+
+cLoad :: CmmExpr -> CmmType -> SDoc
+cLoad expr rep
+    = sdocWithPlatform $ \platform ->
+      if bewareLoadStoreAlignment (platformArch platform)
+      then let decl = machRepCType rep <+> text "x" <> semi
+               struct = text "struct" <+> braces (decl)
+               packed_attr = text "__attribute__((packed))"
+               cast = parens (struct <+> packed_attr <> char '*')
+           in parens (cast <+> pprExpr1 expr) <> text "->x"
+      else char '*' <> parens (cCast (machRepPtrCType rep) expr)
+    where -- On these platforms, unaligned loads are known to cause problems
+          bewareLoadStoreAlignment ArchAlpha    = True
+          bewareLoadStoreAlignment ArchMipseb   = True
+          bewareLoadStoreAlignment ArchMipsel   = True
+          bewareLoadStoreAlignment (ArchARM {}) = True
+          bewareLoadStoreAlignment ArchARM64    = True
+          bewareLoadStoreAlignment ArchSPARC    = True
+          bewareLoadStoreAlignment ArchSPARC64  = True
+          -- Pessimistically assume that they will also cause problems
+          -- on unknown arches
+          bewareLoadStoreAlignment ArchUnknown  = True
+          bewareLoadStoreAlignment _            = False
+
+isCmmWordType :: DynFlags -> CmmType -> Bool
+-- True of GcPtrReg/NonGcReg of native word size
+isCmmWordType dflags ty = not (isFloatType ty)
+                       && typeWidth ty == wordWidth dflags
+
+-- This is for finding the types of foreign call arguments.  For a pointer
+-- argument, we always cast the argument to (void *), to avoid warnings from
+-- the C compiler.
+machRepHintCType :: CmmType -> ForeignHint -> SDoc
+machRepHintCType _   AddrHint   = text "void *"
+machRepHintCType rep SignedHint = machRep_S_CType (typeWidth rep)
+machRepHintCType rep _other     = machRepCType rep
+
+machRepPtrCType :: CmmType -> SDoc
+machRepPtrCType r
+ = sdocWithDynFlags $ \dflags ->
+   if isCmmWordType dflags r then text "P_"
+                             else machRepCType r <> char '*'
+
+machRepCType :: CmmType -> SDoc
+machRepCType ty | isFloatType ty = machRep_F_CType w
+                | otherwise      = machRep_U_CType w
+                where
+                  w = typeWidth ty
+
+machRep_F_CType :: Width -> SDoc
+machRep_F_CType W32 = text "StgFloat" -- ToDo: correct?
+machRep_F_CType W64 = text "StgDouble"
+machRep_F_CType _   = panic "machRep_F_CType"
+
+machRep_U_CType :: Width -> SDoc
+machRep_U_CType w
+ = sdocWithDynFlags $ \dflags ->
+   case w of
+   _ | w == wordWidth dflags -> text "W_"
+   W8  -> text "StgWord8"
+   W16 -> text "StgWord16"
+   W32 -> text "StgWord32"
+   W64 -> text "StgWord64"
+   _   -> panic "machRep_U_CType"
+
+machRep_S_CType :: Width -> SDoc
+machRep_S_CType w
+ = sdocWithDynFlags $ \dflags ->
+   case w of
+   _ | w == wordWidth dflags -> text "I_"
+   W8  -> text "StgInt8"
+   W16 -> text "StgInt16"
+   W32 -> text "StgInt32"
+   W64 -> text "StgInt64"
+   _   -> panic "machRep_S_CType"
+
+
+-- ---------------------------------------------------------------------
+-- print strings as valid C strings
+
+pprStringInCStyle :: ByteString -> SDoc
+pprStringInCStyle s = doubleQuotes (text (concatMap charToC (BS.unpack s)))
+
+-- ---------------------------------------------------------------------------
+-- Initialising static objects with floating-point numbers.  We can't
+-- just emit the floating point number, because C will cast it to an int
+-- by rounding it.  We want the actual bit-representation of the float.
+--
+-- Consider a concrete C example:
+--    double d = 2.5e-10;
+--    float f  = 2.5e-10f;
+--
+--    int * i2 = &d;      printf ("i2: %08X %08X\n", i2[0], i2[1]);
+--    long long * l = &d; printf (" l: %016llX\n",   l[0]);
+--    int * i = &f;       printf (" i: %08X\n",      i[0]);
+-- Result on 64-bit LE (x86_64):
+--     i2: E826D695 3DF12E0B
+--      l: 3DF12E0BE826D695
+--      i: 2F89705F
+-- Result on 32-bit BE (m68k):
+--     i2: 3DF12E0B E826D695
+--      l: 3DF12E0BE826D695
+--      i: 2F89705F
+--
+-- The trick here is to notice that binary representation does not
+-- change much: only Word32 values get swapped on LE hosts / targets.
+
+-- This is a hack to turn the floating point numbers into ints that we
+-- can safely initialise to static locations.
+
+castFloatToWord32Array :: STUArray s Int Float -> ST s (STUArray s Int Word32)
+castFloatToWord32Array = U.castSTUArray
+
+castDoubleToWord64Array :: STUArray s Int Double -> ST s (STUArray s Int Word64)
+castDoubleToWord64Array = U.castSTUArray
+
+floatToWord :: DynFlags -> Rational -> CmmLit
+floatToWord dflags r
+  = runST (do
+        arr <- newArray_ ((0::Int),0)
+        writeArray arr 0 (fromRational r)
+        arr' <- castFloatToWord32Array arr
+        w32 <- readArray arr' 0
+        return (CmmInt (toInteger w32 `shiftL` wo) (wordWidth dflags))
+    )
+    where wo | wordWidth dflags == W64
+             , wORDS_BIGENDIAN dflags    = 32
+             | otherwise                 = 0
+
+floatPairToWord :: DynFlags -> Rational -> Rational -> CmmLit
+floatPairToWord dflags r1 r2
+  = runST (do
+        arr <- newArray_ ((0::Int),1)
+        writeArray arr 0 (fromRational r1)
+        writeArray arr 1 (fromRational r2)
+        arr' <- castFloatToWord32Array arr
+        w32_1 <- readArray arr' 0
+        w32_2 <- readArray arr' 1
+        return (pprWord32Pair w32_1 w32_2)
+    )
+    where pprWord32Pair w32_1 w32_2
+              | wORDS_BIGENDIAN dflags =
+                  CmmInt ((shiftL i1 32) .|. i2) W64
+              | otherwise =
+                  CmmInt ((shiftL i2 32) .|. i1) W64
+              where i1 = toInteger w32_1
+                    i2 = toInteger w32_2
+
+doubleToWords :: DynFlags -> Rational -> [CmmLit]
+doubleToWords dflags r
+  = runST (do
+        arr <- newArray_ ((0::Int),1)
+        writeArray arr 0 (fromRational r)
+        arr' <- castDoubleToWord64Array arr
+        w64 <- readArray arr' 0
+        return (pprWord64 w64)
+    )
+    where targetWidth = wordWidth dflags
+          targetBE    = wORDS_BIGENDIAN dflags
+          pprWord64 w64
+              | targetWidth == W64 =
+                  [ CmmInt (toInteger w64) targetWidth ]
+              | targetWidth == W32 =
+                  [ CmmInt (toInteger targetW1) targetWidth
+                  , CmmInt (toInteger targetW2) targetWidth
+                  ]
+              | otherwise = panic "doubleToWords.pprWord64"
+              where (targetW1, targetW2)
+                        | targetBE  = (wHi, wLo)
+                        | otherwise = (wLo, wHi)
+                    wHi = w64 `shiftR` 32
+                    wLo = w64 .&. 0xFFFFffff
+
+-- ---------------------------------------------------------------------------
+-- Utils
+
+wordShift :: DynFlags -> Int
+wordShift dflags = widthInLog (wordWidth dflags)
+
+commafy :: [SDoc] -> SDoc
+commafy xs = hsep $ punctuate comma xs
+
+-- Print in C hex format: 0x13fa
+pprHexVal :: Integer -> Width -> SDoc
+pprHexVal w rep
+  | w < 0     = parens (char '-' <>
+                    text "0x" <> intToDoc (-w) <> repsuffix rep)
+  | otherwise =     text "0x" <> intToDoc   w  <> repsuffix rep
+  where
+        -- type suffix for literals:
+        -- Integer literals are unsigned in Cmm/C.  We explicitly cast to
+        -- signed values for doing signed operations, but at all other
+        -- times values are unsigned.  This also helps eliminate occasional
+        -- warnings about integer overflow from gcc.
+
+      repsuffix W64 = sdocWithDynFlags $ \dflags ->
+               if cINT_SIZE       dflags == 8 then char 'U'
+          else if cLONG_SIZE      dflags == 8 then text "UL"
+          else if cLONG_LONG_SIZE dflags == 8 then text "ULL"
+          else panic "pprHexVal: Can't find a 64-bit type"
+      repsuffix _ = char 'U'
+
+      intToDoc :: Integer -> SDoc
+      intToDoc i = case truncInt i of
+                       0 -> char '0'
+                       v -> go v
+
+      -- We need to truncate value as Cmm backend does not drop
+      -- redundant bits to ease handling of negative values.
+      -- Thus the following Cmm code on 64-bit arch, like amd64:
+      --     CInt v;
+      --     v = {something};
+      --     if (v == %lobits32(-1)) { ...
+      -- leads to the following C code:
+      --     StgWord64 v = (StgWord32)({something});
+      --     if (v == 0xFFFFffffFFFFffffU) { ...
+      -- Such code is incorrect as it promotes both operands to StgWord64
+      -- and the whole condition is always false.
+      truncInt :: Integer -> Integer
+      truncInt i =
+          case rep of
+              W8  -> i `rem` (2^(8 :: Int))
+              W16 -> i `rem` (2^(16 :: Int))
+              W32 -> i `rem` (2^(32 :: Int))
+              W64 -> i `rem` (2^(64 :: Int))
+              _   -> panic ("pprHexVal/truncInt: C backend can't encode "
+                            ++ show rep ++ " literals")
+
+      go 0 = empty
+      go w' = go q <> dig
+           where
+             (q,r) = w' `quotRem` 16
+             dig | r < 10    = char (chr (fromInteger r + ord '0'))
+                 | otherwise = char (chr (fromInteger r - 10 + ord 'a'))
diff --git a/compiler/GHC/Data/Bitmap.hs b/compiler/GHC/Data/Bitmap.hs
new file mode 100644
index 0000000000..a8eba5e2e8
--- /dev/null
+++ b/compiler/GHC/Data/Bitmap.hs
@@ -0,0 +1,134 @@
+{-# LANGUAGE BangPatterns #-}
+
+--
+-- (c) The University of Glasgow 2003-2006
+--
+
+-- Functions for constructing bitmaps, which are used in various
+-- places in generated code (stack frame liveness masks, function
+-- argument liveness masks, SRT bitmaps).
+
+module GHC.Data.Bitmap (
+        Bitmap, mkBitmap,
+        intsToBitmap, intsToReverseBitmap,
+        mAX_SMALL_BITMAP_SIZE,
+        seqBitmap,
+  ) where
+
+import GhcPrelude
+
+import GHC.Runtime.Layout
+import DynFlags
+import Util
+
+import Data.Bits
+
+{-|
+A bitmap represented by a sequence of 'StgWord's on the /target/
+architecture.  These are used for bitmaps in info tables and other
+generated code which need to be emitted as sequences of StgWords.
+-}
+type Bitmap = [StgWord]
+
+-- | Make a bitmap from a sequence of bits
+mkBitmap :: DynFlags -> [Bool] -> Bitmap
+mkBitmap _ [] = []
+mkBitmap dflags stuff = chunkToBitmap dflags chunk : mkBitmap dflags rest
+  where (chunk, rest) = splitAt (wORD_SIZE_IN_BITS dflags) stuff
+
+chunkToBitmap :: DynFlags -> [Bool] -> StgWord
+chunkToBitmap dflags chunk =
+  foldl' (.|.) (toStgWord dflags 0) [ oneAt n | (True,n) <- zip chunk [0..] ]
+  where
+    oneAt :: Int -> StgWord
+    oneAt i = toStgWord dflags 1 `shiftL` i
+
+-- | Make a bitmap where the slots specified are the /ones/ in the bitmap.
+-- eg. @[0,1,3], size 4 ==> 0xb@.
+--
+-- The list of @Int@s /must/ be already sorted.
+intsToBitmap :: DynFlags
+             -> Int        -- ^ size in bits
+             -> [Int]      -- ^ sorted indices of ones
+             -> Bitmap
+intsToBitmap dflags size = go 0
+  where
+    word_sz = wORD_SIZE_IN_BITS dflags
+    oneAt :: Int -> StgWord
+    oneAt i = toStgWord dflags 1 `shiftL` i
+
+    -- It is important that we maintain strictness here.
+    -- See Note [Strictness when building Bitmaps].
+    go :: Int -> [Int] -> Bitmap
+    go !pos slots
+      | size <= pos = []
+      | otherwise =
+        (foldl' (.|.) (toStgWord dflags 0) (map (\i->oneAt (i - pos)) these)) :
+          go (pos + word_sz) rest
+      where
+        (these,rest) = span (< (pos + word_sz)) slots
+
+-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap.
+-- eg. @[0,1,3], size 4 ==> 0x4@  (we leave any bits outside the size as zero,
+-- just to make the bitmap easier to read).
+--
+-- The list of @Int@s /must/ be already sorted and duplicate-free.
+intsToReverseBitmap :: DynFlags
+                    -> Int      -- ^ size in bits
+                    -> [Int]    -- ^ sorted indices of zeros free of duplicates
+                    -> Bitmap
+intsToReverseBitmap dflags size = go 0
+  where
+    word_sz = wORD_SIZE_IN_BITS dflags
+    oneAt :: Int -> StgWord
+    oneAt i = toStgWord dflags 1 `shiftL` i
+
+    -- It is important that we maintain strictness here.
+    -- See Note [Strictness when building Bitmaps].
+    go :: Int -> [Int] -> Bitmap
+    go !pos slots
+      | size <= pos = []
+      | otherwise =
+        (foldl' xor (toStgWord dflags init) (map (\i->oneAt (i - pos)) these)) :
+          go (pos + word_sz) rest
+      where
+        (these,rest) = span (< (pos + word_sz)) slots
+        remain = size - pos
+        init
+          | remain >= word_sz = -1
+          | otherwise         = (1 `shiftL` remain) - 1
+
+{-
+
+Note [Strictness when building Bitmaps]
+========================================
+
+One of the places where @Bitmap@ is used is in in building Static Reference
+Tables (SRTs) (in @GHC.Cmm.Info.Build.procpointSRT@). In #7450 it was noticed
+that some test cases (particularly those whose C-- have large numbers of CAFs)
+produced large quantities of allocations from this function.
+
+The source traced back to 'intsToBitmap', which was lazily subtracting the word
+size from the elements of the tail of the @slots@ list and recursively invoking
+itself with the result. This resulted in large numbers of subtraction thunks
+being built up. Here we take care to avoid passing new thunks to the recursive
+call. Instead we pass the unmodified tail along with an explicit position
+accumulator, which get subtracted in the fold when we compute the Word.
+
+-}
+
+{- |
+Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h.
+Some kinds of bitmap pack a size\/bitmap into a single word if
+possible, or fall back to an external pointer when the bitmap is too
+large.  This value represents the largest size of bitmap that can be
+packed into a single word.
+-}
+mAX_SMALL_BITMAP_SIZE :: DynFlags -> Int
+mAX_SMALL_BITMAP_SIZE dflags
+ | wORD_SIZE dflags == 4 = 27
+ | otherwise             = 58
+
+seqBitmap :: Bitmap -> a -> a
+seqBitmap = seqList
+
diff --git a/compiler/GHC/Platform/Regs.hs b/compiler/GHC/Platform/Regs.hs
index c304d4f5ad..51f7658db2 100644
--- a/compiler/GHC/Platform/Regs.hs
+++ b/compiler/GHC/Platform/Regs.hs
@@ -5,7 +5,7 @@ module GHC.Platform.Regs
 
 import GhcPrelude
 
-import CmmExpr
+import GHC.Cmm.Expr
 import GHC.Platform
 import Reg
 
diff --git a/compiler/GHC/Runtime/Layout.hs b/compiler/GHC/Runtime/Layout.hs
new file mode 100644
index 0000000000..8f245479c1
--- /dev/null
+++ b/compiler/GHC/Runtime/Layout.hs
@@ -0,0 +1,563 @@
+-- (c) The University of Glasgow 2006
+-- (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+--
+-- Storage manager representation of closures
+
+{-# LANGUAGE CPP,GeneralizedNewtypeDeriving #-}
+
+module GHC.Runtime.Layout (
+        -- * Words and bytes
+        WordOff, ByteOff,
+        wordsToBytes, bytesToWordsRoundUp,
+        roundUpToWords, roundUpTo,
+
+        StgWord, fromStgWord, toStgWord,
+        StgHalfWord, fromStgHalfWord, toStgHalfWord,
+        halfWordSize, halfWordSizeInBits,
+
+        -- * Closure representation
+        SMRep(..), -- CmmInfo sees the rep; no one else does
+        IsStatic,
+        ClosureTypeInfo(..), ArgDescr(..), Liveness,
+        ConstrDescription,
+
+        -- ** Construction
+        mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
+        smallArrPtrsRep, arrWordsRep,
+
+        -- ** Predicates
+        isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
+        isStackRep,
+
+        -- ** Size-related things
+        heapClosureSizeW,
+        fixedHdrSizeW, arrWordsHdrSize, arrWordsHdrSizeW, arrPtrsHdrSize,
+        arrPtrsHdrSizeW, profHdrSize, thunkHdrSize, nonHdrSize, nonHdrSizeW,
+        smallArrPtrsHdrSize, smallArrPtrsHdrSizeW, hdrSize, hdrSizeW,
+        fixedHdrSize,
+
+        -- ** RTS closure types
+        rtsClosureType, rET_SMALL, rET_BIG,
+        aRG_GEN, aRG_GEN_BIG,
+
+        -- ** Arrays
+        card, cardRoundUp, cardTableSizeB, cardTableSizeW
+    ) where
+
+import GhcPrelude
+
+import BasicTypes( ConTagZ )
+import DynFlags
+import Outputable
+import GHC.Platform
+import FastString
+
+import Data.Word
+import Data.Bits
+import Data.ByteString (ByteString)
+
+{-
+************************************************************************
+*                                                                      *
+                Words and bytes
+*                                                                      *
+************************************************************************
+-}
+
+-- | Word offset, or word count
+type WordOff = Int
+
+-- | Byte offset, or byte count
+type ByteOff = Int
+
+-- | Round up the given byte count to the next byte count that's a
+-- multiple of the machine's word size.
+roundUpToWords :: DynFlags -> ByteOff -> ByteOff
+roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags)
+
+-- | Round up @base@ to a multiple of @size@.
+roundUpTo :: ByteOff -> ByteOff -> ByteOff
+roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1))
+
+-- | Convert the given number of words to a number of bytes.
+--
+-- This function morally has type @WordOff -> ByteOff@, but uses @Num
+-- a@ to allow for overloading.
+wordsToBytes :: Num a => DynFlags -> a -> a
+wordsToBytes dflags n = fromIntegral (wORD_SIZE dflags) * n
+{-# SPECIALIZE wordsToBytes :: DynFlags -> Int -> Int #-}
+{-# SPECIALIZE wordsToBytes :: DynFlags -> Word -> Word #-}
+{-# SPECIALIZE wordsToBytes :: DynFlags -> Integer -> Integer #-}
+
+-- | First round the given byte count up to a multiple of the
+-- machine's word size and then convert the result to words.
+bytesToWordsRoundUp :: DynFlags -> ByteOff -> WordOff
+bytesToWordsRoundUp dflags n = (n + word_size - 1) `quot` word_size
+ where word_size = wORD_SIZE dflags
+-- StgWord is a type representing an StgWord on the target platform.
+-- A Word64 is large enough to hold a Word for either a 32bit or 64bit platform
+newtype StgWord = StgWord Word64
+    deriving (Eq, Bits)
+
+fromStgWord :: StgWord -> Integer
+fromStgWord (StgWord i) = toInteger i
+
+toStgWord :: DynFlags -> Integer -> StgWord
+toStgWord dflags i
+    = case platformWordSize (targetPlatform dflags) of
+      -- These conversions mean that things like toStgWord (-1)
+      -- do the right thing
+      PW4 -> StgWord (fromIntegral (fromInteger i :: Word32))
+      PW8 -> StgWord (fromInteger i)
+
+instance Outputable StgWord where
+    ppr (StgWord i) = integer (toInteger i)
+
+--
+
+-- A Word32 is large enough to hold half a Word for either a 32bit or
+-- 64bit platform
+newtype StgHalfWord = StgHalfWord Word32
+    deriving Eq
+
+fromStgHalfWord :: StgHalfWord -> Integer
+fromStgHalfWord (StgHalfWord w) = toInteger w
+
+toStgHalfWord :: DynFlags -> Integer -> StgHalfWord
+toStgHalfWord dflags i
+    = case platformWordSize (targetPlatform dflags) of
+      -- These conversions mean that things like toStgHalfWord (-1)
+      -- do the right thing
+      PW4 -> StgHalfWord (fromIntegral (fromInteger i :: Word16))
+      PW8 -> StgHalfWord (fromInteger i :: Word32)
+
+instance Outputable StgHalfWord where
+    ppr (StgHalfWord w) = integer (toInteger w)
+
+-- | Half word size in bytes
+halfWordSize :: DynFlags -> ByteOff
+halfWordSize dflags = platformWordSizeInBytes (targetPlatform dflags) `div` 2
+
+halfWordSizeInBits :: DynFlags -> Int
+halfWordSizeInBits dflags = platformWordSizeInBits (targetPlatform dflags) `div` 2
+
+{-
+************************************************************************
+*                                                                      *
+\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
+*                                                                      *
+************************************************************************
+-}
+
+-- | A description of the layout of a closure.  Corresponds directly
+-- to the closure types in includes/rts/storage/ClosureTypes.h.
+data SMRep
+  = HeapRep              -- GC routines consult sizes in info tbl
+        IsStatic
+        !WordOff         --  # ptr words
+        !WordOff         --  # non-ptr words INCLUDING SLOP (see mkHeapRep below)
+        ClosureTypeInfo  -- type-specific info
+
+  | ArrayPtrsRep
+        !WordOff        -- # ptr words
+        !WordOff        -- # card table words
+
+  | SmallArrayPtrsRep
+        !WordOff        -- # ptr words
+
+  | ArrayWordsRep
+        !WordOff        -- # bytes expressed in words, rounded up
+
+  | StackRep            -- Stack frame (RET_SMALL or RET_BIG)
+        Liveness
+
+  | RTSRep              -- The RTS needs to declare info tables with specific
+        Int             -- type tags, so this form lets us override the default
+        SMRep           -- tag for an SMRep.
+
+-- | True <=> This is a static closure.  Affects how we garbage-collect it.
+-- Static closure have an extra static link field at the end.
+-- Constructors do not have a static variant; see Note [static constructors]
+type IsStatic = Bool
+
+-- From an SMRep you can get to the closure type defined in
+-- includes/rts/storage/ClosureTypes.h. Described by the function
+-- rtsClosureType below.
+
+data ClosureTypeInfo
+  = Constr        ConTagZ ConstrDescription
+  | Fun           FunArity ArgDescr
+  | Thunk
+  | ThunkSelector SelectorOffset
+  | BlackHole
+  | IndStatic
+
+type ConstrDescription = ByteString -- result of dataConIdentity
+type FunArity          = Int
+type SelectorOffset    = Int
+
+-------------------------
+-- We represent liveness bitmaps as a Bitmap (whose internal
+-- representation really is a bitmap).  These are pinned onto case return
+-- vectors to indicate the state of the stack for the garbage collector.
+--
+-- In the compiled program, liveness bitmaps that fit inside a single
+-- word (StgWord) are stored as a single word, while larger bitmaps are
+-- stored as a pointer to an array of words.
+
+type Liveness = [Bool]   -- One Bool per word; True  <=> non-ptr or dead
+                         --                    False <=> ptr
+
+-------------------------
+-- An ArgDescr describes the argument pattern of a function
+
+data ArgDescr
+  = ArgSpec             -- Fits one of the standard patterns
+        !Int            -- RTS type identifier ARG_P, ARG_N, ...
+
+  | ArgGen              -- General case
+        Liveness        -- Details about the arguments
+
+
+-----------------------------------------------------------------------------
+-- Construction
+
+mkHeapRep :: DynFlags -> IsStatic -> WordOff -> WordOff -> ClosureTypeInfo
+          -> SMRep
+mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info
+  = HeapRep is_static
+            ptr_wds
+            (nonptr_wds + slop_wds)
+            cl_type_info
+  where
+     slop_wds
+      | is_static = 0
+      | otherwise = max 0 (minClosureSize dflags - (hdr_size + payload_size))
+
+     hdr_size     = closureTypeHdrSize dflags cl_type_info
+     payload_size = ptr_wds + nonptr_wds
+
+mkRTSRep :: Int -> SMRep -> SMRep
+mkRTSRep = RTSRep
+
+mkStackRep :: [Bool] -> SMRep
+mkStackRep liveness = StackRep liveness
+
+blackHoleRep :: SMRep
+blackHoleRep = HeapRep False 0 0 BlackHole
+
+indStaticRep :: SMRep
+indStaticRep = HeapRep True 1 0 IndStatic
+
+arrPtrsRep :: DynFlags -> WordOff -> SMRep
+arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
+
+smallArrPtrsRep :: WordOff -> SMRep
+smallArrPtrsRep elems = SmallArrayPtrsRep elems
+
+arrWordsRep :: DynFlags -> ByteOff -> SMRep
+arrWordsRep dflags bytes = ArrayWordsRep (bytesToWordsRoundUp dflags bytes)
+
+-----------------------------------------------------------------------------
+-- Predicates
+
+isStaticRep :: SMRep -> IsStatic
+isStaticRep (HeapRep is_static _ _ _) = is_static
+isStaticRep (RTSRep _ rep)            = isStaticRep rep
+isStaticRep _                         = False
+
+isStackRep :: SMRep -> Bool
+isStackRep StackRep{}     = True
+isStackRep (RTSRep _ rep) = isStackRep rep
+isStackRep _              = False
+
+isConRep :: SMRep -> Bool
+isConRep (HeapRep _ _ _ Constr{}) = True
+isConRep _                        = False
+
+isThunkRep :: SMRep -> Bool
+isThunkRep (HeapRep _ _ _ Thunk)           = True
+isThunkRep (HeapRep _ _ _ ThunkSelector{}) = True
+isThunkRep (HeapRep _ _ _ BlackHole)       = True
+isThunkRep (HeapRep _ _ _ IndStatic)       = True
+isThunkRep _                               = False
+
+isFunRep :: SMRep -> Bool
+isFunRep (HeapRep _ _ _ Fun{}) = True
+isFunRep _                     = False
+
+isStaticNoCafCon :: SMRep -> Bool
+-- This should line up exactly with CONSTR_NOCAF below
+-- See Note [Static NoCaf constructors]
+isStaticNoCafCon (HeapRep _ 0 _ Constr{}) = True
+isStaticNoCafCon _                        = False
+
+
+-----------------------------------------------------------------------------
+-- Size-related things
+
+fixedHdrSize :: DynFlags -> ByteOff
+fixedHdrSize dflags = wordsToBytes dflags (fixedHdrSizeW dflags)
+
+-- | Size of a closure header (StgHeader in includes/rts/storage/Closures.h)
+fixedHdrSizeW :: DynFlags -> WordOff
+fixedHdrSizeW dflags = sTD_HDR_SIZE dflags + profHdrSize dflags
+
+-- | Size of the profiling part of a closure header
+-- (StgProfHeader in includes/rts/storage/Closures.h)
+profHdrSize  :: DynFlags -> WordOff
+profHdrSize dflags
+ | gopt Opt_SccProfilingOn dflags = pROF_HDR_SIZE dflags
+ | otherwise                      = 0
+
+-- | The garbage collector requires that every closure is at least as
+--   big as this.
+minClosureSize :: DynFlags -> WordOff
+minClosureSize dflags = fixedHdrSizeW dflags + mIN_PAYLOAD_SIZE dflags
+
+arrWordsHdrSize :: DynFlags -> ByteOff
+arrWordsHdrSize dflags
+ = fixedHdrSize dflags + sIZEOF_StgArrBytes_NoHdr dflags
+
+arrWordsHdrSizeW :: DynFlags -> WordOff
+arrWordsHdrSizeW dflags =
+    fixedHdrSizeW dflags +
+    (sIZEOF_StgArrBytes_NoHdr dflags `quot` wORD_SIZE dflags)
+
+arrPtrsHdrSize :: DynFlags -> ByteOff
+arrPtrsHdrSize dflags
+ = fixedHdrSize dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
+
+arrPtrsHdrSizeW :: DynFlags -> WordOff
+arrPtrsHdrSizeW dflags =
+    fixedHdrSizeW dflags +
+    (sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
+
+smallArrPtrsHdrSize :: DynFlags -> ByteOff
+smallArrPtrsHdrSize dflags
+ = fixedHdrSize dflags + sIZEOF_StgSmallMutArrPtrs_NoHdr dflags
+
+smallArrPtrsHdrSizeW :: DynFlags -> WordOff
+smallArrPtrsHdrSizeW dflags =
+    fixedHdrSizeW dflags +
+    (sIZEOF_StgSmallMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
+
+-- Thunks have an extra header word on SMP, so the update doesn't
+-- splat the payload.
+thunkHdrSize :: DynFlags -> WordOff
+thunkHdrSize dflags = fixedHdrSizeW dflags + smp_hdr
+        where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
+
+hdrSize :: DynFlags -> SMRep -> ByteOff
+hdrSize dflags rep = wordsToBytes dflags (hdrSizeW dflags rep)
+
+hdrSizeW :: DynFlags -> SMRep -> WordOff
+hdrSizeW dflags (HeapRep _ _ _ ty)    = closureTypeHdrSize dflags ty
+hdrSizeW dflags (ArrayPtrsRep _ _)    = arrPtrsHdrSizeW dflags
+hdrSizeW dflags (SmallArrayPtrsRep _) = smallArrPtrsHdrSizeW dflags
+hdrSizeW dflags (ArrayWordsRep _)     = arrWordsHdrSizeW dflags
+hdrSizeW _ _                          = panic "SMRep.hdrSizeW"
+
+nonHdrSize :: DynFlags -> SMRep -> ByteOff
+nonHdrSize dflags rep = wordsToBytes dflags (nonHdrSizeW rep)
+
+nonHdrSizeW :: SMRep -> WordOff
+nonHdrSizeW (HeapRep _ p np _) = p + np
+nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
+nonHdrSizeW (SmallArrayPtrsRep elems) = elems
+nonHdrSizeW (ArrayWordsRep words) = words
+nonHdrSizeW (StackRep bs)      = length bs
+nonHdrSizeW (RTSRep _ rep)     = nonHdrSizeW rep
+
+-- | The total size of the closure, in words.
+heapClosureSizeW :: DynFlags -> SMRep -> WordOff
+heapClosureSizeW dflags (HeapRep _ p np ty)
+ = closureTypeHdrSize dflags ty + p + np
+heapClosureSizeW dflags (ArrayPtrsRep elems ct)
+ = arrPtrsHdrSizeW dflags + elems + ct
+heapClosureSizeW dflags (SmallArrayPtrsRep elems)
+ = smallArrPtrsHdrSizeW dflags + elems
+heapClosureSizeW dflags (ArrayWordsRep words)
+ = arrWordsHdrSizeW dflags + words
+heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
+
+closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
+closureTypeHdrSize dflags ty = case ty of
+                  Thunk           -> thunkHdrSize dflags
+                  ThunkSelector{} -> thunkHdrSize dflags
+                  BlackHole       -> thunkHdrSize dflags
+                  IndStatic       -> thunkHdrSize dflags
+                  _               -> fixedHdrSizeW dflags
+        -- All thunks use thunkHdrSize, even if they are non-updatable.
+        -- this is because we don't have separate closure types for
+        -- updatable vs. non-updatable thunks, so the GC can't tell the
+        -- difference.  If we ever have significant numbers of non-
+        -- updatable thunks, it might be worth fixing this.
+
+-- ---------------------------------------------------------------------------
+-- Arrays
+
+-- | The byte offset into the card table of the card for a given element
+card :: DynFlags -> Int -> Int
+card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
+
+-- | Convert a number of elements to a number of cards, rounding up
+cardRoundUp :: DynFlags -> Int -> Int
+cardRoundUp dflags i =
+  card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
+
+-- | The size of a card table, in bytes
+cardTableSizeB :: DynFlags -> Int -> ByteOff
+cardTableSizeB dflags elems = cardRoundUp dflags elems
+
+-- | The size of a card table, in words
+cardTableSizeW :: DynFlags -> Int -> WordOff
+cardTableSizeW dflags elems =
+  bytesToWordsRoundUp dflags (cardTableSizeB dflags elems)
+
+-----------------------------------------------------------------------------
+-- deriving the RTS closure type from an SMRep
+
+#include "../includes/rts/storage/ClosureTypes.h"
+#include "../includes/rts/storage/FunTypes.h"
+-- Defines CONSTR, CONSTR_1_0 etc
+
+-- | Derives the RTS closure type from an 'SMRep'
+rtsClosureType :: SMRep -> Int
+rtsClosureType rep
+    = case rep of
+      RTSRep ty _ -> ty
+
+      -- See Note [static constructors]
+      HeapRep _     1 0 Constr{} -> CONSTR_1_0
+      HeapRep _     0 1 Constr{} -> CONSTR_0_1
+      HeapRep _     2 0 Constr{} -> CONSTR_2_0
+      HeapRep _     1 1 Constr{} -> CONSTR_1_1
+      HeapRep _     0 2 Constr{} -> CONSTR_0_2
+      HeapRep _     0 _ Constr{} -> CONSTR_NOCAF
+           -- See Note [Static NoCaf constructors]
+      HeapRep _     _ _ Constr{} -> CONSTR
+
+      HeapRep False 1 0 Fun{} -> FUN_1_0
+      HeapRep False 0 1 Fun{} -> FUN_0_1
+      HeapRep False 2 0 Fun{} -> FUN_2_0
+      HeapRep False 1 1 Fun{} -> FUN_1_1
+      HeapRep False 0 2 Fun{} -> FUN_0_2
+      HeapRep False _ _ Fun{} -> FUN
+
+      HeapRep False 1 0 Thunk -> THUNK_1_0
+      HeapRep False 0 1 Thunk -> THUNK_0_1
+      HeapRep False 2 0 Thunk -> THUNK_2_0
+      HeapRep False 1 1 Thunk -> THUNK_1_1
+      HeapRep False 0 2 Thunk -> THUNK_0_2
+      HeapRep False _ _ Thunk -> THUNK
+
+      HeapRep False _ _ ThunkSelector{} ->  THUNK_SELECTOR
+
+      HeapRep True _ _ Fun{}      -> FUN_STATIC
+      HeapRep True _ _ Thunk      -> THUNK_STATIC
+      HeapRep False _ _ BlackHole -> BLACKHOLE
+      HeapRep False _ _ IndStatic -> IND_STATIC
+
+      _ -> panic "rtsClosureType"
+
+-- We export these ones
+rET_SMALL, rET_BIG, aRG_GEN, aRG_GEN_BIG :: Int
+rET_SMALL   = RET_SMALL
+rET_BIG     = RET_BIG
+aRG_GEN     = ARG_GEN
+aRG_GEN_BIG = ARG_GEN_BIG
+
+{-
+Note [static constructors]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We used to have a CONSTR_STATIC closure type, and each constructor had
+two info tables: one with CONSTR (or CONSTR_1_0 etc.), and one with
+CONSTR_STATIC.
+
+This distinction was removed, because when copying a data structure
+into a compact region, we must copy static constructors into the
+compact region too.  If we didn't do this, we would need to track the
+references from the compact region out to the static constructors,
+because they might (indirectly) refer to CAFs.
+
+Since static constructors will be copied to the heap, if we wanted to
+use different info tables for static and dynamic constructors, we
+would have to switch the info pointer when copying the constructor
+into the compact region, which means we would need an extra field of
+the static info table to point to the dynamic one.
+
+However, since the distinction between static and dynamic closure
+types is never actually needed (other than for assertions), we can
+just drop the distinction and use the same info table for both.
+
+The GC *does* need to distinguish between static and dynamic closures,
+but it does this using the HEAP_ALLOCED() macro which checks whether
+the address of the closure resides within the dynamic heap.
+HEAP_ALLOCED() doesn't read the closure's info table.
+
+Note [Static NoCaf constructors]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we know that a top-level binding 'x' is not Caffy (ie no CAFs are
+reachable from 'x'), then a statically allocated constructor (Just x)
+is also not Caffy, and the garbage collector need not follow its
+argument fields.  Exploiting this would require two static info tables
+for Just, for the two cases where the argument was Caffy or non-Caffy.
+
+Currently we don't do this; instead we treat nullary constructors
+as non-Caffy, and the others as potentially Caffy.
+
+
+************************************************************************
+*                                                                      *
+             Pretty printing of SMRep and friends
+*                                                                      *
+************************************************************************
+-}
+
+instance Outputable ClosureTypeInfo where
+   ppr = pprTypeInfo
+
+instance Outputable SMRep where
+   ppr (HeapRep static ps nps tyinfo)
+     = hang (header <+> lbrace) 2 (ppr tyinfo <+> rbrace)
+     where
+       header = text "HeapRep"
+                <+> if static then text "static" else empty
+                <+> pp_n "ptrs" ps <+> pp_n "nonptrs" nps
+       pp_n :: String -> Int -> SDoc
+       pp_n _ 0 = empty
+       pp_n s n = int n <+> text s
+
+   ppr (ArrayPtrsRep size _) = text "ArrayPtrsRep" <+> ppr size
+
+   ppr (SmallArrayPtrsRep size) = text "SmallArrayPtrsRep" <+> ppr size
+
+   ppr (ArrayWordsRep words) = text "ArrayWordsRep" <+> ppr words
+
+   ppr (StackRep bs) = text "StackRep" <+> ppr bs
+
+   ppr (RTSRep ty rep) = text "tag:" <> ppr ty <+> ppr rep
+
+instance Outputable ArgDescr where
+  ppr (ArgSpec n) = text "ArgSpec" <+> ppr n
+  ppr (ArgGen ls) = text "ArgGen" <+> ppr ls
+
+pprTypeInfo :: ClosureTypeInfo -> SDoc
+pprTypeInfo (Constr tag descr)
+  = text "Con" <+>
+    braces (sep [ text "tag:" <+> ppr tag
+                , text "descr:" <> text (show descr) ])
+
+pprTypeInfo (Fun arity args)
+  = text "Fun" <+>
+    braces (sep [ text "arity:" <+> ppr arity
+                , ptext (sLit ("fun_type:")) <+> ppr args ])
+
+pprTypeInfo (ThunkSelector offset)
+  = text "ThunkSel" <+> ppr offset
+
+pprTypeInfo Thunk     = text "Thunk"
+pprTypeInfo BlackHole = text "BlackHole"
+pprTypeInfo IndStatic = text "IndStatic"
diff --git a/compiler/GHC/Stg/Lift/Analysis.hs b/compiler/GHC/Stg/Lift/Analysis.hs
index 02d439cef7..ccbad37210 100644
--- a/compiler/GHC/Stg/Lift/Analysis.hs
+++ b/compiler/GHC/Stg/Lift/Analysis.hs
@@ -26,7 +26,7 @@ import BasicTypes
 import Demand
 import DynFlags
 import Id
-import SMRep ( WordOff )
+import GHC.Runtime.Layout ( WordOff )
 import GHC.Stg.Syntax
 import qualified GHC.StgToCmm.ArgRep  as StgToCmm.ArgRep
 import qualified GHC.StgToCmm.Closure as StgToCmm.Closure
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
index 10a9dc2c6a..f489ce6456 100644
--- a/compiler/GHC/StgToCmm.hs
+++ b/compiler/GHC/StgToCmm.hs
@@ -26,9 +26,9 @@ import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Hpc
 import GHC.StgToCmm.Ticky
 
-import Cmm
-import CmmUtils
-import CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
 
 import GHC.Stg.Syntax
 import DynFlags
@@ -48,7 +48,7 @@ import BasicTypes
 import VarSet ( isEmptyDVarSet )
 
 import OrdList
-import MkGraph
+import GHC.Cmm.Graph
 
 import Data.IORef
 import Control.Monad (when,void)
diff --git a/compiler/GHC/StgToCmm/ArgRep.hs b/compiler/GHC/StgToCmm/ArgRep.hs
index cc2fe8306a..347d908b44 100644
--- a/compiler/GHC/StgToCmm/ArgRep.hs
+++ b/compiler/GHC/StgToCmm/ArgRep.hs
@@ -19,7 +19,7 @@ import GhcPrelude
 
 import GHC.StgToCmm.Closure ( idPrimRep )
 
-import SMRep            ( WordOff )
+import GHC.Runtime.Layout            ( WordOff )
 import Id               ( Id )
 import TyCon            ( PrimRep(..), primElemRepSizeB )
 import BasicTypes       ( RepArity )
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index b1cb34ace7..a78ab5cb41 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -28,14 +28,14 @@ import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Foreign    (emitPrimCall)
 
-import MkGraph
+import GHC.Cmm.Graph
 import CoreSyn          ( AltCon(..), tickishIsCode )
-import BlockId
-import SMRep
-import Cmm
-import CmmInfo
-import CmmUtils
-import CLabel
+import GHC.Cmm.BlockId
+import GHC.Runtime.Layout
+import GHC.Cmm
+import GHC.Cmm.Info
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
 import GHC.Stg.Syntax
 import CostCentre
 import Id
@@ -105,7 +105,7 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
 
         -- We don't generate the static closure here, because we might
         -- want to add references to static closures to it later.  The
-        -- static closure is generated by CmmBuildInfoTables.updInfoSRTs,
+        -- static closure is generated by GHC.Cmm.Info.Build.updInfoSRTs,
         -- See Note [SRTs], specifically the [FUN] optimisation.
 
         ; let fv_details :: [(NonVoid Id, ByteOff)]
@@ -622,7 +622,7 @@ emitBlackHoleCode node = do
   -- unconditionally disabled. -- krc 1/2007
 
   -- Note the eager-blackholing check is here rather than in blackHoleOnEntry,
-  -- because emitBlackHoleCode is called from CmmParse.
+  -- because emitBlackHoleCode is called from GHC.Cmm.Parser.
 
   let  eager_blackholing =  not (gopt Opt_SccProfilingOn dflags)
                          && gopt Opt_EagerBlackHoling dflags
diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs
index f3dccd9745..58c46f8fa2 100644
--- a/compiler/GHC/StgToCmm/CgUtils.hs
+++ b/compiler/GHC/StgToCmm/CgUtils.hs
@@ -19,11 +19,11 @@ module GHC.StgToCmm.CgUtils (
 import GhcPrelude
 
 import GHC.Platform.Regs
-import Cmm
-import Hoopl.Block
-import Hoopl.Graph
-import CmmUtils
-import CLabel
+import GHC.Cmm
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
 import DynFlags
 import Outputable
 
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index df8cb046c4..724ca6000a 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -67,13 +67,13 @@ module GHC.StgToCmm.Closure (
 import GhcPrelude
 
 import GHC.Stg.Syntax
-import SMRep
-import Cmm
-import PprCmmExpr() -- For Outputable instances
+import GHC.Runtime.Layout
+import GHC.Cmm
+import GHC.Cmm.Ppr.Expr() -- For Outputable instances
 
 import CostCentre
-import BlockId
-import CLabel
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
 import Id
 import IdInfo
 import DataCon
diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs
index 1e929663df..2bbeabace6 100644
--- a/compiler/GHC/StgToCmm/DataCon.hs
+++ b/compiler/GHC/StgToCmm/DataCon.hs
@@ -29,11 +29,11 @@ import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
 
-import CmmExpr
-import CmmUtils
-import CLabel
-import MkGraph
-import SMRep
+import GHC.Cmm.Expr
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
+import GHC.Cmm.Graph
+import GHC.Runtime.Layout
 import CostCentre
 import Module
 import DataCon
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
index 45b09a3d26..b2c1371840 100644
--- a/compiler/GHC/StgToCmm/Env.hs
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -31,14 +31,14 @@ import GHC.StgToCmm.Monad
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
 
-import CLabel
+import GHC.Cmm.CLabel
 
-import BlockId
-import CmmExpr
-import CmmUtils
+import GHC.Cmm.BlockId
+import GHC.Cmm.Expr
+import GHC.Cmm.Utils
 import DynFlags
 import Id
-import MkGraph
+import GHC.Cmm.Graph
 import Name
 import Outputable
 import GHC.Stg.Syntax
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 3836aa3d2a..0c2d9b8ae5 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -30,10 +30,10 @@ import GHC.StgToCmm.Closure
 
 import GHC.Stg.Syntax
 
-import MkGraph
-import BlockId
-import Cmm hiding ( succ )
-import CmmInfo
+import GHC.Cmm.Graph
+import GHC.Cmm.BlockId
+import GHC.Cmm hiding ( succ )
+import GHC.Cmm.Info
 import CoreSyn
 import DataCon
 import DynFlags         ( mAX_PTR_TAG )
diff --git a/compiler/GHC/StgToCmm/ExtCode.hs b/compiler/GHC/StgToCmm/ExtCode.hs
index 4a5225eec6..2679ce4992 100644
--- a/compiler/GHC/StgToCmm/ExtCode.hs
+++ b/compiler/GHC/StgToCmm/ExtCode.hs
@@ -42,11 +42,11 @@ import GhcPrelude
 import qualified GHC.StgToCmm.Monad as F
 import GHC.StgToCmm.Monad (FCode, newUnique)
 
-import Cmm
-import CLabel
-import MkGraph
+import GHC.Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm.Graph
 
-import BlockId
+import GHC.Cmm.BlockId
 import DynFlags
 import FastString
 import Module
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index 3ef0872c2e..62a948d13c 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -9,7 +9,7 @@
 module GHC.StgToCmm.Foreign (
   cgForeignCall,
   emitPrimCall, emitCCall,
-  emitForeignCall,     -- For CmmParse
+  emitForeignCall,
   emitSaveThreadState,
   saveThreadState,
   emitLoadThreadState,
@@ -28,14 +28,14 @@ import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Layout
 
-import BlockId (newBlockId)
-import Cmm
-import CmmUtils
-import MkGraph
+import GHC.Cmm.BlockId (newBlockId)
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Graph
 import Type
 import GHC.Types.RepType
-import CLabel
-import SMRep
+import GHC.Cmm.CLabel
+import GHC.Runtime.Layout
 import ForeignCall
 import DynFlags
 import Maybes
@@ -202,7 +202,7 @@ emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
 emitPrimCall res op args
   = void $ emitForeignCall PlayRisky res (PrimTarget op) args
 
--- alternative entry point, used by CmmParse
+-- alternative entry point, used by GHC.Cmm.Parser
 emitForeignCall
         :: Safety
         -> [CmmFormal]          -- where to put the results
@@ -257,9 +257,9 @@ load_target_into_temp other_target@(PrimTarget _) =
 -- Note [Register Parameter Passing]).
 --
 -- However, we can't pattern-match on the expression here, because
--- this is used in a loop by CmmParse, and testing the expression
+-- this is used in a loop by GHC.Cmm.Parser, and testing the expression
 -- results in a black hole.  So we always create a temporary, and rely
--- on CmmSink to clean it up later.  (Yuck, ToDo).  The generated code
+-- on GHC.Cmm.Sink to clean it up later.  (Yuck, ToDo).  The generated code
 -- ends up being the same, at least for the RTS .cmm code.
 --
 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
index d36cad5788..492a4460f8 100644
--- a/compiler/GHC/StgToCmm/Heap.hs
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -23,7 +23,7 @@ module GHC.StgToCmm.Heap (
 import GhcPrelude hiding ((<*>))
 
 import GHC.Stg.Syntax
-import CLabel
+import GHC.Cmm.CLabel
 import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Monad
@@ -32,13 +32,13 @@ import GHC.StgToCmm.Ticky
 import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Env
 
-import MkGraph
+import GHC.Cmm.Graph
 
-import Hoopl.Label
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
+import GHC.Cmm.Dataflow.Label
+import GHC.Runtime.Layout
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
 import CostCentre
 import IdInfo( CafInfo(..), mayHaveCafRefs )
 import Id ( Id )
@@ -337,7 +337,7 @@ entryHeapCheck cl_info nodeSet arity args code
                  Just (_, ArgGen _) -> False
                  _otherwise         -> True
 
--- | lower-level version for CmmParse
+-- | lower-level version for GHC.Cmm.Parser
 entryHeapCheck' :: Bool           -- is a known function pattern
                 -> CmmExpr        -- expression for the closure pointer
                 -> Int            -- Arity -- not same as len args b/c of voids
diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs
index e33d39245c..a3f4112206 100644
--- a/compiler/GHC/StgToCmm/Hpc.hs
+++ b/compiler/GHC/StgToCmm/Hpc.hs
@@ -12,11 +12,11 @@ import GhcPrelude
 
 import GHC.StgToCmm.Monad
 
-import MkGraph
-import CmmExpr
-import CLabel
+import GHC.Cmm.Graph
+import GHC.Cmm.Expr
+import GHC.Cmm.CLabel
 import Module
-import CmmUtils
+import GHC.Cmm.Utils
 import GHC.StgToCmm.Utils
 import HscTypes
 import DynFlags
diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs
index 6d7825eb93..e78221de3a 100644
--- a/compiler/GHC/StgToCmm/Layout.hs
+++ b/compiler/GHC/StgToCmm/Layout.hs
@@ -41,13 +41,13 @@ import GHC.StgToCmm.Ticky
 import GHC.StgToCmm.Monad
 import GHC.StgToCmm.Utils
 
-import MkGraph
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
-import CmmInfo
-import CLabel
+import GHC.Cmm.Graph
+import GHC.Runtime.Layout
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Info
+import GHC.Cmm.CLabel
 import GHC.Stg.Syntax
 import Id
 import TyCon             ( PrimRep(..), primRepSizeB )
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index 716cbdab78..4f7d2e1220 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -61,14 +61,14 @@ module GHC.StgToCmm.Monad (
 
 import GhcPrelude hiding( sequence, succ )
 
-import Cmm
+import GHC.Cmm
 import GHC.StgToCmm.Closure
 import DynFlags
-import Hoopl.Collections
-import MkGraph
-import BlockId
-import CLabel
-import SMRep
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Graph as CmmGraph
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Runtime.Layout
 import Module
 import Id
 import VarEnv
@@ -369,7 +369,7 @@ addCodeBlocksFrom :: CgState -> CgState -> CgState
 -- Add code blocks from the latter to the former
 -- (The cgs_stmts will often be empty, but not always; see codeOnly)
 s1 `addCodeBlocksFrom` s2
-  = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2,
+  = s1 { cgs_stmts = cgs_stmts s1 CmmGraph.<*> cgs_stmts s2,
          cgs_tops  = cgs_tops  s1 `appOL` cgs_tops  s2 }
 
 
@@ -715,7 +715,7 @@ emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
 emit :: CmmAGraph -> FCode ()
 emit ag
   = do  { state <- getState
-        ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } }
+        ; setState $ state { cgs_stmts = cgs_stmts state CmmGraph.<*> ag } }
 
 emitDecl :: CmmDecl -> FCode ()
 emitDecl decl
@@ -743,7 +743,7 @@ emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True
         -- do layout
   = do  { dflags <- getDynFlags
         ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args
-              graph' = entry MkGraph.<*> graph
+              graph' = entry CmmGraph.<*> graph
         ; emitProc mb_info lbl live (graph', tscope) offset True
         }
 emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame"
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index e469e15a5d..06264099df 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -36,17 +36,17 @@ import GHC.StgToCmm.Prof ( costCentreFrom )
 import DynFlags
 import GHC.Platform
 import BasicTypes
-import BlockId
-import MkGraph
+import GHC.Cmm.BlockId
+import GHC.Cmm.Graph
 import GHC.Stg.Syntax
-import Cmm
+import GHC.Cmm
 import Module   ( rtsUnitId )
 import Type     ( Type, tyConAppTyCon )
 import TyCon
-import CLabel
-import CmmUtils
+import GHC.Cmm.CLabel
+import GHC.Cmm.Utils
 import PrimOp
-import SMRep
+import GHC.Runtime.Layout
 import FastString
 import Outputable
 import Util
@@ -1525,7 +1525,7 @@ emitPrimOp dflags = \case
   -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
   -- (shift, .&.).
   --
-  -- Currently we only support optimization (performed in CmmOpt) when the
+  -- Currently we only support optimization (performed in GHC.Cmm.Opt) when the
   -- constant is a power of 2. #9041 tracks the implementation of the general
   -- optimization.
   --
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index 4743b79622..cf5ce5acfb 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -28,12 +28,12 @@ import GhcPrelude
 import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Monad
-import SMRep
+import GHC.Runtime.Layout
 
-import MkGraph
-import Cmm
-import CmmUtils
-import CLabel
+import GHC.Cmm.Graph
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
 
 import CostCentre
 import DynFlags
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
index 9eeb134cc9..6e2e2d3a6b 100644
--- a/compiler/GHC/StgToCmm/Ticky.hs
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -23,9 +23,9 @@ Some of the relevant source files:
 
   * some codeGen/ modules import this one
 
-  * this module imports cmm/CLabel.hs to manage labels
+  * this module imports GHC.Cmm.CLabel to manage labels
 
-  * cmm/CmmParse.y expands some macros using generators defined in
+  * GHC.Cmm.Parser expands some macros using generators defined in
     this module
 
   * includes/stg/Ticky.h declares all of the global counters
@@ -112,11 +112,11 @@ import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Monad
 
 import GHC.Stg.Syntax
-import CmmExpr
-import MkGraph
-import CmmUtils
-import CLabel
-import SMRep
+import GHC.Cmm.Expr
+import GHC.Cmm.Graph
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
+import GHC.Runtime.Layout
 
 import Module
 import Name
@@ -517,7 +517,7 @@ tickyAllocHeap genuine hp
 
 
 --------------------------------------------------------------------------------
--- these three are only called from CmmParse.y (ie ultimately from the RTS)
+-- these three are only called from GHC.Cmm.Parser (ie ultimately from the RTS)
 
 -- the units are bytes
 
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 34fb93468c..7a784ea85c 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -52,20 +52,20 @@ import GhcPrelude
 
 import GHC.StgToCmm.Monad
 import GHC.StgToCmm.Closure
-import Cmm
-import BlockId
-import MkGraph
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.Graph as CmmGraph
 import GHC.Platform.Regs
-import CLabel
-import CmmUtils
-import CmmSwitch
+import GHC.Cmm.CLabel
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
 import GHC.StgToCmm.CgUtils
 
 import ForeignCall
 import IdInfo
 import Type
 import TyCon
-import SMRep
+import GHC.Runtime.Layout
 import Module
 import Literal
 import Digraph
@@ -458,8 +458,8 @@ mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
         -- In that situation we can be sure the (:) case
         -- can't happen, so no need to test
 
--- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
--- See Note [Cmm Switches, the general plan] in CmmSwitch
+-- SOMETHING MORE COMPLICATED: defer to GHC.Cmm.Switch.Implement
+-- See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch
 mk_discrete_switch signed tag_expr branches mb_deflt range
   = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
 
@@ -568,7 +568,7 @@ label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId
 -- and returns L
 label_code join_lbl (code,tsc) = do
     lbl <- newBlockId
-    emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc)
+    emitOutOfLine lbl (code CmmGraph.<*> mkBranch join_lbl, tsc)
     return lbl
 
 --------------
diff --git a/compiler/basicTypes/Unique.hs b/compiler/basicTypes/Unique.hs
index 4a646aa70a..f14f22d625 100644
--- a/compiler/basicTypes/Unique.hs
+++ b/compiler/basicTypes/Unique.hs
@@ -276,7 +276,7 @@ The alternatives are:
      is controlled. See Module.ModuleEnv
   3) Change the algorithm to use nonDetCmpUnique and document why it's still
      deterministic
-  4) Use TrieMap as done in CmmCommonBlockElim.groupByLabel
+  4) Use TrieMap as done in GHC.Cmm.CommonBlockElim.groupByLabel
 -}
 
 instance Eq Unique where
diff --git a/compiler/cmm/Bitmap.hs b/compiler/cmm/Bitmap.hs
deleted file mode 100644
index 42acc5f3cd..0000000000
--- a/compiler/cmm/Bitmap.hs
+++ /dev/null
@@ -1,134 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-
---
--- (c) The University of Glasgow 2003-2006
---
-
--- Functions for constructing bitmaps, which are used in various
--- places in generated code (stack frame liveness masks, function
--- argument liveness masks, SRT bitmaps).
-
-module Bitmap (
-        Bitmap, mkBitmap,
-        intsToBitmap, intsToReverseBitmap,
-        mAX_SMALL_BITMAP_SIZE,
-        seqBitmap,
-  ) where
-
-import GhcPrelude
-
-import SMRep
-import DynFlags
-import Util
-
-import Data.Bits
-
-{-|
-A bitmap represented by a sequence of 'StgWord's on the /target/
-architecture.  These are used for bitmaps in info tables and other
-generated code which need to be emitted as sequences of StgWords.
--}
-type Bitmap = [StgWord]
-
--- | Make a bitmap from a sequence of bits
-mkBitmap :: DynFlags -> [Bool] -> Bitmap
-mkBitmap _ [] = []
-mkBitmap dflags stuff = chunkToBitmap dflags chunk : mkBitmap dflags rest
-  where (chunk, rest) = splitAt (wORD_SIZE_IN_BITS dflags) stuff
-
-chunkToBitmap :: DynFlags -> [Bool] -> StgWord
-chunkToBitmap dflags chunk =
-  foldl' (.|.) (toStgWord dflags 0) [ oneAt n | (True,n) <- zip chunk [0..] ]
-  where
-    oneAt :: Int -> StgWord
-    oneAt i = toStgWord dflags 1 `shiftL` i
-
--- | Make a bitmap where the slots specified are the /ones/ in the bitmap.
--- eg. @[0,1,3], size 4 ==> 0xb@.
---
--- The list of @Int@s /must/ be already sorted.
-intsToBitmap :: DynFlags
-             -> Int        -- ^ size in bits
-             -> [Int]      -- ^ sorted indices of ones
-             -> Bitmap
-intsToBitmap dflags size = go 0
-  where
-    word_sz = wORD_SIZE_IN_BITS dflags
-    oneAt :: Int -> StgWord
-    oneAt i = toStgWord dflags 1 `shiftL` i
-
-    -- It is important that we maintain strictness here.
-    -- See Note [Strictness when building Bitmaps].
-    go :: Int -> [Int] -> Bitmap
-    go !pos slots
-      | size <= pos = []
-      | otherwise =
-        (foldl' (.|.) (toStgWord dflags 0) (map (\i->oneAt (i - pos)) these)) :
-          go (pos + word_sz) rest
-      where
-        (these,rest) = span (< (pos + word_sz)) slots
-
--- | Make a bitmap where the slots specified are the /zeros/ in the bitmap.
--- eg. @[0,1,3], size 4 ==> 0x4@  (we leave any bits outside the size as zero,
--- just to make the bitmap easier to read).
---
--- The list of @Int@s /must/ be already sorted and duplicate-free.
-intsToReverseBitmap :: DynFlags
-                    -> Int      -- ^ size in bits
-                    -> [Int]    -- ^ sorted indices of zeros free of duplicates
-                    -> Bitmap
-intsToReverseBitmap dflags size = go 0
-  where
-    word_sz = wORD_SIZE_IN_BITS dflags
-    oneAt :: Int -> StgWord
-    oneAt i = toStgWord dflags 1 `shiftL` i
-
-    -- It is important that we maintain strictness here.
-    -- See Note [Strictness when building Bitmaps].
-    go :: Int -> [Int] -> Bitmap
-    go !pos slots
-      | size <= pos = []
-      | otherwise =
-        (foldl' xor (toStgWord dflags init) (map (\i->oneAt (i - pos)) these)) :
-          go (pos + word_sz) rest
-      where
-        (these,rest) = span (< (pos + word_sz)) slots
-        remain = size - pos
-        init
-          | remain >= word_sz = -1
-          | otherwise         = (1 `shiftL` remain) - 1
-
-{-
-
-Note [Strictness when building Bitmaps]
-========================================
-
-One of the places where @Bitmap@ is used is in in building Static Reference
-Tables (SRTs) (in @CmmBuildInfoTables.procpointSRT@). In #7450 it was noticed
-that some test cases (particularly those whose C-- have large numbers of CAFs)
-produced large quantities of allocations from this function.
-
-The source traced back to 'intsToBitmap', which was lazily subtracting the word
-size from the elements of the tail of the @slots@ list and recursively invoking
-itself with the result. This resulted in large numbers of subtraction thunks
-being built up. Here we take care to avoid passing new thunks to the recursive
-call. Instead we pass the unmodified tail along with an explicit position
-accumulator, which get subtracted in the fold when we compute the Word.
-
--}
-
-{- |
-Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h.
-Some kinds of bitmap pack a size\/bitmap into a single word if
-possible, or fall back to an external pointer when the bitmap is too
-large.  This value represents the largest size of bitmap that can be
-packed into a single word.
--}
-mAX_SMALL_BITMAP_SIZE :: DynFlags -> Int
-mAX_SMALL_BITMAP_SIZE dflags
- | wORD_SIZE dflags == 4 = 27
- | otherwise             = 58
-
-seqBitmap :: Bitmap -> a -> a
-seqBitmap = seqList
-
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs
deleted file mode 100644
index 4f4e0e8c53..0000000000
--- a/compiler/cmm/BlockId.hs
+++ /dev/null
@@ -1,46 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-
-{- BlockId module should probably go away completely, being superseded by Label -}
-module BlockId
-  ( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet
-  , newBlockId
-  , blockLbl, infoTblLbl
-  ) where
-
-import GhcPrelude
-
-import CLabel
-import IdInfo
-import Name
-import Unique
-import UniqSupply
-
-import Hoopl.Label (Label, mkHooplLabel)
-
-----------------------------------------------------------------
---- Block Ids, their environments, and their sets
-
-{- Note [Unique BlockId]
-~~~~~~~~~~~~~~~~~~~~~~~~
-Although a 'BlockId' is a local label, for reasons of implementation,
-'BlockId's must be unique within an entire compilation unit.  The reason
-is that each local label is mapped to an assembly-language label, and in
-most assembly languages allow, a label is visible throughout the entire
-compilation unit in which it appears.
--}
-
-type BlockId = Label
-
-mkBlockId :: Unique -> BlockId
-mkBlockId unique = mkHooplLabel $ getKey unique
-
-newBlockId :: MonadUnique m => m BlockId
-newBlockId = mkBlockId <$> getUniqueM
-
-blockLbl :: BlockId -> CLabel
-blockLbl label = mkLocalBlockLabel (getUnique label)
-
-infoTblLbl :: BlockId -> CLabel
-infoTblLbl label
-  = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
diff --git a/compiler/cmm/BlockId.hs-boot b/compiler/cmm/BlockId.hs-boot
deleted file mode 100644
index 3ad4141184..0000000000
--- a/compiler/cmm/BlockId.hs-boot
+++ /dev/null
@@ -1,8 +0,0 @@
-module BlockId (BlockId, mkBlockId) where
-
-import Hoopl.Label (Label)
-import Unique (Unique)
-
-type BlockId = Label
-
-mkBlockId :: Unique -> BlockId
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
deleted file mode 100644
index fb2f06716d..0000000000
--- a/compiler/cmm/CLabel.hs
+++ /dev/null
@@ -1,1571 +0,0 @@
------------------------------------------------------------------------------
---
--- Object-file symbols (called CLabel for histerical raisins).
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-{-# LANGUAGE CPP #-}
-
-module CLabel (
-        CLabel, -- abstract type
-        ForeignLabelSource(..),
-        pprDebugCLabel,
-
-        mkClosureLabel,
-        mkSRTLabel,
-        mkInfoTableLabel,
-        mkEntryLabel,
-        mkRednCountsLabel,
-        mkConInfoTableLabel,
-        mkApEntryLabel,
-        mkApInfoTableLabel,
-        mkClosureTableLabel,
-        mkBytesLabel,
-
-        mkLocalBlockLabel,
-        mkLocalClosureLabel,
-        mkLocalInfoTableLabel,
-        mkLocalClosureTableLabel,
-
-        mkBlockInfoTableLabel,
-
-        mkBitmapLabel,
-        mkStringLitLabel,
-
-        mkAsmTempLabel,
-        mkAsmTempDerivedLabel,
-        mkAsmTempEndLabel,
-        mkAsmTempDieLabel,
-
-        mkDirty_MUT_VAR_Label,
-        mkNonmovingWriteBarrierEnabledLabel,
-        mkUpdInfoLabel,
-        mkBHUpdInfoLabel,
-        mkIndStaticInfoLabel,
-        mkMainCapabilityLabel,
-        mkMAP_FROZEN_CLEAN_infoLabel,
-        mkMAP_FROZEN_DIRTY_infoLabel,
-        mkMAP_DIRTY_infoLabel,
-        mkSMAP_FROZEN_CLEAN_infoLabel,
-        mkSMAP_FROZEN_DIRTY_infoLabel,
-        mkSMAP_DIRTY_infoLabel,
-        mkBadAlignmentLabel,
-        mkArrWords_infoLabel,
-        mkSRTInfoLabel,
-
-        mkTopTickyCtrLabel,
-        mkCAFBlackHoleInfoTableLabel,
-        mkRtsPrimOpLabel,
-        mkRtsSlowFastTickyCtrLabel,
-
-        mkSelectorInfoLabel,
-        mkSelectorEntryLabel,
-
-        mkCmmInfoLabel,
-        mkCmmEntryLabel,
-        mkCmmRetInfoLabel,
-        mkCmmRetLabel,
-        mkCmmCodeLabel,
-        mkCmmDataLabel,
-        mkCmmClosureLabel,
-
-        mkRtsApFastLabel,
-
-        mkPrimCallLabel,
-
-        mkForeignLabel,
-        addLabelSize,
-
-        foreignLabelStdcallInfo,
-        isBytesLabel,
-        isForeignLabel,
-        isSomeRODataLabel,
-        isStaticClosureLabel,
-        mkCCLabel, mkCCSLabel,
-
-        DynamicLinkerLabelInfo(..),
-        mkDynamicLinkerLabel,
-        dynamicLinkerLabelInfo,
-
-        mkPicBaseLabel,
-        mkDeadStripPreventer,
-
-        mkHpcTicksLabel,
-
-        -- * Predicates
-        hasCAF,
-        needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel,
-        isMathFun,
-        isCFunctionLabel, isGcPtrLabel, labelDynamic,
-        isLocalCLabel, mayRedirectTo,
-
-        -- * Conversions
-        toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName,
-
-        pprCLabel,
-        isInfoTableLabel,
-        isConInfoTableLabel
-    ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import IdInfo
-import BasicTypes
-import {-# SOURCE #-} BlockId (BlockId, mkBlockId)
-import Packages
-import Module
-import Name
-import Unique
-import PrimOp
-import CostCentre
-import Outputable
-import FastString
-import DynFlags
-import GHC.Platform
-import UniqSet
-import Util
-import PprCore ( {- instances -} )
-
--- -----------------------------------------------------------------------------
--- The CLabel type
-
-{- |
-  'CLabel' is an abstract type that supports the following operations:
-
-  - Pretty printing
-
-  - In a C file, does it need to be declared before use?  (i.e. is it
-    guaranteed to be already in scope in the places we need to refer to it?)
-
-  - If it needs to be declared, what type (code or data) should it be
-    declared to have?
-
-  - Is it visible outside this object file or not?
-
-  - Is it "dynamic" (see details below)
-
-  - Eq and Ord, so that we can make sets of CLabels (currently only
-    used in outputting C as far as I can tell, to avoid generating
-    more than one declaration for any given label).
-
-  - Converting an info table label into an entry label.
-
-  CLabel usage is a bit messy in GHC as they are used in a number of different
-  contexts:
-
-  - By the C-- AST to identify labels
-
-  - By the unregisterised C code generator ("PprC") for naming functions (hence
-    the name 'CLabel')
-
-  - By the native and LLVM code generators to identify labels
-
-  For extra fun, each of these uses a slightly different subset of constructors
-  (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and
-  LLVM backends).
-
-  In general, we use 'IdLabel' to represent Haskell things early in the
-  pipeline. However, later optimization passes will often represent blocks they
-  create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the
-  label.
--}
-
-data CLabel
-  = -- | A label related to the definition of a particular Id or Con in a .hs file.
-    IdLabel
-        Name
-        CafInfo
-        IdLabelInfo             -- encodes the suffix of the label
-
-  -- | A label from a .cmm file that is not associated with a .hs level Id.
-  | CmmLabel
-        UnitId               -- what package the label belongs to.
-        FastString              -- identifier giving the prefix of the label
-        CmmLabelInfo            -- encodes the suffix of the label
-
-  -- | A label with a baked-in \/ algorithmically generated name that definitely
-  --    comes from the RTS. The code for it must compile into libHSrts.a \/ libHSrts.so
-  --    If it doesn't have an algorithmically generated name then use a CmmLabel
-  --    instead and give it an appropriate UnitId argument.
-  | RtsLabel
-        RtsLabelInfo
-
-  -- | A label associated with a block. These aren't visible outside of the
-  -- compilation unit in which they are defined. These are generally used to
-  -- name blocks produced by Cmm-to-Cmm passes and the native code generator,
-  -- where we don't have a 'Name' to associate the label to and therefore can't
-  -- use 'IdLabel'.
-  | LocalBlockLabel
-        {-# UNPACK #-} !Unique
-
-  -- | A 'C' (or otherwise foreign) label.
-  --
-  | ForeignLabel
-        FastString              -- name of the imported label.
-
-        (Maybe Int)             -- possible '@n' suffix for stdcall functions
-                                -- When generating C, the '@n' suffix is omitted, but when
-                                -- generating assembler we must add it to the label.
-
-        ForeignLabelSource      -- what package the foreign label is in.
-
-        FunctionOrData
-
-  -- | Local temporary label used for native (or LLVM) code generation; must not
-  -- appear outside of these contexts. Use primarily for debug information
-  | AsmTempLabel
-        {-# UNPACK #-} !Unique
-
-  -- | A label \"derived\" from another 'CLabel' by the addition of a suffix.
-  -- Must not occur outside of the NCG or LLVM code generators.
-  | AsmTempDerivedLabel
-        CLabel
-        FastString              -- suffix
-
-  | StringLitLabel
-        {-# UNPACK #-} !Unique
-
-  | CC_Label  CostCentre
-  | CCS_Label CostCentreStack
-
-
-  -- | These labels are generated and used inside the NCG only.
-  --    They are special variants of a label used for dynamic linking
-  --    see module PositionIndependentCode for details.
-  | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel
-
-  -- | This label is generated and used inside the NCG only.
-  --    It is used as a base for PIC calculations on some platforms.
-  --    It takes the form of a local numeric assembler label '1'; and
-  --    is pretty-printed as 1b, referring to the previous definition
-  --    of 1: in the assembler source file.
-  | PicBaseLabel
-
-  -- | A label before an info table to prevent excessive dead-stripping on darwin
-  | DeadStripPreventer CLabel
-
-
-  -- | Per-module table of tick locations
-  | HpcTicksLabel Module
-
-  -- | Static reference table
-  | SRTLabel
-        {-# UNPACK #-} !Unique
-
-  -- | A bitmap (function or case return)
-  | LargeBitmapLabel
-        {-# UNPACK #-} !Unique
-
-  deriving Eq
-
--- This is laborious, but necessary. We can't derive Ord because
--- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
--- implementation. See Note [No Ord for Unique]
--- This is non-deterministic but we do not currently support deterministic
--- code-generation. See Note [Unique Determinism and code generation]
-instance Ord CLabel where
-  compare (IdLabel a1 b1 c1) (IdLabel a2 b2 c2) =
-    compare a1 a2 `thenCmp`
-    compare b1 b2 `thenCmp`
-    compare c1 c2
-  compare (CmmLabel a1 b1 c1) (CmmLabel a2 b2 c2) =
-    compare a1 a2 `thenCmp`
-    compare b1 b2 `thenCmp`
-    compare c1 c2
-  compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2
-  compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2
-  compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) =
-    compare a1 a2 `thenCmp`
-    compare b1 b2 `thenCmp`
-    compare c1 c2 `thenCmp`
-    compare d1 d2
-  compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2
-  compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) =
-    compare a1 a2 `thenCmp`
-    compare b1 b2
-  compare (StringLitLabel u1) (StringLitLabel u2) =
-    nonDetCmpUnique u1 u2
-  compare (CC_Label a1) (CC_Label a2) =
-    compare a1 a2
-  compare (CCS_Label a1) (CCS_Label a2) =
-    compare a1 a2
-  compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) =
-    compare a1 a2 `thenCmp`
-    compare b1 b2
-  compare PicBaseLabel PicBaseLabel = EQ
-  compare (DeadStripPreventer a1) (DeadStripPreventer a2) =
-    compare a1 a2
-  compare (HpcTicksLabel a1) (HpcTicksLabel a2) =
-    compare a1 a2
-  compare (SRTLabel u1) (SRTLabel u2) =
-    nonDetCmpUnique u1 u2
-  compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) =
-    nonDetCmpUnique u1 u2
-  compare IdLabel{} _ = LT
-  compare _ IdLabel{} = GT
-  compare CmmLabel{} _ = LT
-  compare _ CmmLabel{} = GT
-  compare RtsLabel{} _ = LT
-  compare _ RtsLabel{} = GT
-  compare LocalBlockLabel{} _ = LT
-  compare _ LocalBlockLabel{} = GT
-  compare ForeignLabel{} _ = LT
-  compare _ ForeignLabel{} = GT
-  compare AsmTempLabel{} _ = LT
-  compare _ AsmTempLabel{} = GT
-  compare AsmTempDerivedLabel{} _ = LT
-  compare _ AsmTempDerivedLabel{} = GT
-  compare StringLitLabel{} _ = LT
-  compare _ StringLitLabel{} = GT
-  compare CC_Label{} _ = LT
-  compare _ CC_Label{} = GT
-  compare CCS_Label{} _ = LT
-  compare _ CCS_Label{} = GT
-  compare DynamicLinkerLabel{} _ = LT
-  compare _ DynamicLinkerLabel{} = GT
-  compare PicBaseLabel{} _ = LT
-  compare _ PicBaseLabel{} = GT
-  compare DeadStripPreventer{} _ = LT
-  compare _ DeadStripPreventer{} = GT
-  compare HpcTicksLabel{} _ = LT
-  compare _ HpcTicksLabel{} = GT
-  compare SRTLabel{} _ = LT
-  compare _ SRTLabel{} = GT
-
--- | Record where a foreign label is stored.
-data ForeignLabelSource
-
-   -- | Label is in a named package
-   = ForeignLabelInPackage      UnitId
-
-   -- | Label is in some external, system package that doesn't also
-   --   contain compiled Haskell code, and is not associated with any .hi files.
-   --   We don't have to worry about Haskell code being inlined from
-   --   external packages. It is safe to treat the RTS package as "external".
-   | ForeignLabelInExternalPackage
-
-   -- | Label is in the package currently being compiled.
-   --   This is only used for creating hacky tmp labels during code generation.
-   --   Don't use it in any code that might be inlined across a package boundary
-   --   (ie, core code) else the information will be wrong relative to the
-   --   destination module.
-   | ForeignLabelInThisPackage
-
-   deriving (Eq, Ord)
-
-
--- | For debugging problems with the CLabel representation.
---      We can't make a Show instance for CLabel because lots of its components don't have instances.
---      The regular Outputable instance only shows the label name, and not its other info.
---
-pprDebugCLabel :: CLabel -> SDoc
-pprDebugCLabel lbl
- = case lbl of
-        IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel"
-                                       <> whenPprDebug (text ":" <> text (show info)))
-        CmmLabel pkg _name _info
-         -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg)
-
-        RtsLabel{}      -> ppr lbl <> (parens $ text "RtsLabel")
-
-        ForeignLabel _name mSuffix src funOrData
-            -> ppr lbl <> (parens $ text "ForeignLabel"
-                                <+> ppr mSuffix
-                                <+> ppr src
-                                <+> ppr funOrData)
-
-        _               -> ppr lbl <> (parens $ text "other CLabel")
-
-
-data IdLabelInfo
-  = Closure             -- ^ Label for closure
-  | InfoTable           -- ^ Info tables for closures; always read-only
-  | Entry               -- ^ Entry point
-  | Slow                -- ^ Slow entry point
-
-  | LocalInfoTable      -- ^ Like InfoTable but not externally visible
-  | LocalEntry          -- ^ Like Entry but not externally visible
-
-  | RednCounts          -- ^ Label of place to keep Ticky-ticky  info for this Id
-
-  | ConEntry            -- ^ Constructor entry point
-  | ConInfoTable        -- ^ Corresponding info table
-
-  | ClosureTable        -- ^ Table of closures for Enum tycons
-
-  | Bytes               -- ^ Content of a string literal. See
-                        -- Note [Bytes label].
-  | BlockInfoTable      -- ^ Like LocalInfoTable but for a proc-point block
-                        -- instead of a closure entry-point.
-                        -- See Note [Proc-point local block entry-point].
-
-  deriving (Eq, Ord, Show)
-
-
-data RtsLabelInfo
-  = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-}  -- ^ Selector thunks
-  | RtsSelectorEntry     Bool{-updatable-} Int{-offset-}
-
-  | RtsApInfoTable       Bool{-updatable-} Int{-arity-}    -- ^ AP thunks
-  | RtsApEntry           Bool{-updatable-} Int{-arity-}
-
-  | RtsPrimOp PrimOp
-  | RtsApFast     FastString    -- ^ _fast versions of generic apply
-  | RtsSlowFastTickyCtr String
-
-  deriving (Eq, Ord)
-  -- NOTE: Eq on PtrString compares the pointer only, so this isn't
-  -- a real equality.
-
-
--- | What type of Cmm label we're dealing with.
---      Determines the suffix appended to the name when a CLabel.CmmLabel
---      is pretty printed.
-data CmmLabelInfo
-  = CmmInfo                     -- ^ misc rts info tables,      suffix _info
-  | CmmEntry                    -- ^ misc rts entry points,     suffix _entry
-  | CmmRetInfo                  -- ^ misc rts ret info tables,  suffix _info
-  | CmmRet                      -- ^ misc rts return points,    suffix _ret
-  | CmmData                     -- ^ misc rts data bits, eg CHARLIKE_closure
-  | CmmCode                     -- ^ misc rts code
-  | CmmClosure                  -- ^ closures eg CHARLIKE_closure
-  | CmmPrimCall                 -- ^ a prim call to some hand written Cmm code
-  deriving (Eq, Ord)
-
-data DynamicLinkerLabelInfo
-  = CodeStub                    -- MachO: Lfoo$stub, ELF: foo@plt
-  | SymbolPtr                   -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo
-  | GotSymbolPtr                -- ELF: foo@got
-  | GotSymbolOffset             -- ELF: foo@gotoff
-
-  deriving (Eq, Ord)
-
-
--- -----------------------------------------------------------------------------
--- Constructing CLabels
--- -----------------------------------------------------------------------------
-
--- Constructing IdLabels
--- These are always local:
-
-mkSRTLabel     :: Unique -> CLabel
-mkSRTLabel u = SRTLabel u
-
-mkRednCountsLabel :: Name -> CLabel
-mkRednCountsLabel       name    =
-  IdLabel name NoCafRefs RednCounts  -- Note [ticky for LNE]
-
--- These have local & (possibly) external variants:
-mkLocalClosureLabel      :: Name -> CafInfo -> CLabel
-mkLocalInfoTableLabel    :: Name -> CafInfo -> CLabel
-mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel
-mkLocalClosureLabel     name c  = IdLabel name  c Closure
-mkLocalInfoTableLabel   name c  = IdLabel name  c LocalInfoTable
-mkLocalClosureTableLabel name c = IdLabel name  c ClosureTable
-
-mkClosureLabel              :: Name -> CafInfo -> CLabel
-mkInfoTableLabel            :: Name -> CafInfo -> CLabel
-mkEntryLabel                :: Name -> CafInfo -> CLabel
-mkClosureTableLabel         :: Name -> CafInfo -> CLabel
-mkConInfoTableLabel         :: Name -> CafInfo -> CLabel
-mkBytesLabel                :: Name -> CLabel
-mkClosureLabel name         c     = IdLabel name c Closure
-mkInfoTableLabel name       c     = IdLabel name c InfoTable
-mkEntryLabel name           c     = IdLabel name c Entry
-mkClosureTableLabel name    c     = IdLabel name c ClosureTable
-mkConInfoTableLabel name    c     = IdLabel name c ConInfoTable
-mkBytesLabel name                 = IdLabel name NoCafRefs Bytes
-
-mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
-mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-                               -- See Note [Proc-point local block entry-point].
-
--- Constructing Cmm Labels
-mkDirty_MUT_VAR_Label,
-    mkNonmovingWriteBarrierEnabledLabel,
-    mkUpdInfoLabel,
-    mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
-    mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
-    mkMAP_DIRTY_infoLabel,
-    mkArrWords_infoLabel,
-    mkTopTickyCtrLabel,
-    mkCAFBlackHoleInfoTableLabel,
-    mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
-    mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
-mkDirty_MUT_VAR_Label           = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
-mkNonmovingWriteBarrierEnabledLabel
-                                = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
-mkUpdInfoLabel                  = CmmLabel rtsUnitId (fsLit "stg_upd_frame")         CmmInfo
-mkBHUpdInfoLabel                = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" )     CmmInfo
-mkIndStaticInfoLabel            = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC")        CmmInfo
-mkMainCapabilityLabel           = CmmLabel rtsUnitId (fsLit "MainCapability")        CmmData
-mkMAP_FROZEN_CLEAN_infoLabel    = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
-mkMAP_FROZEN_DIRTY_infoLabel    = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
-mkMAP_DIRTY_infoLabel           = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo
-mkTopTickyCtrLabel              = CmmLabel rtsUnitId (fsLit "top_ct")                CmmData
-mkCAFBlackHoleInfoTableLabel    = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE")     CmmInfo
-mkArrWords_infoLabel            = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS")         CmmInfo
-mkSMAP_FROZEN_CLEAN_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
-mkSMAP_FROZEN_DIRTY_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
-mkSMAP_DIRTY_infoLabel          = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
-mkBadAlignmentLabel             = CmmLabel rtsUnitId (fsLit "stg_badAlignment")      CmmEntry
-
-mkSRTInfoLabel :: Int -> CLabel
-mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo
- where
-   lbl =
-     case n of
-       1 -> fsLit "stg_SRT_1"
-       2 -> fsLit "stg_SRT_2"
-       3 -> fsLit "stg_SRT_3"
-       4 -> fsLit "stg_SRT_4"
-       5 -> fsLit "stg_SRT_5"
-       6 -> fsLit "stg_SRT_6"
-       7 -> fsLit "stg_SRT_7"
-       8 -> fsLit "stg_SRT_8"
-       9 -> fsLit "stg_SRT_9"
-       10 -> fsLit "stg_SRT_10"
-       11 -> fsLit "stg_SRT_11"
-       12 -> fsLit "stg_SRT_12"
-       13 -> fsLit "stg_SRT_13"
-       14 -> fsLit "stg_SRT_14"
-       15 -> fsLit "stg_SRT_15"
-       16 -> fsLit "stg_SRT_16"
-       _ -> panic "mkSRTInfoLabel"
-
------
-mkCmmInfoLabel,   mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel,
-  mkCmmCodeLabel, mkCmmDataLabel,  mkCmmClosureLabel
-        :: UnitId -> FastString -> CLabel
-
-mkCmmInfoLabel      pkg str     = CmmLabel pkg str CmmInfo
-mkCmmEntryLabel     pkg str     = CmmLabel pkg str CmmEntry
-mkCmmRetInfoLabel   pkg str     = CmmLabel pkg str CmmRetInfo
-mkCmmRetLabel       pkg str     = CmmLabel pkg str CmmRet
-mkCmmCodeLabel      pkg str     = CmmLabel pkg str CmmCode
-mkCmmDataLabel      pkg str     = CmmLabel pkg str CmmData
-mkCmmClosureLabel   pkg str     = CmmLabel pkg str CmmClosure
-
-mkLocalBlockLabel :: Unique -> CLabel
-mkLocalBlockLabel u = LocalBlockLabel u
-
--- Constructing RtsLabels
-mkRtsPrimOpLabel :: PrimOp -> CLabel
-mkRtsPrimOpLabel primop         = RtsLabel (RtsPrimOp primop)
-
-mkSelectorInfoLabel  :: Bool -> Int -> CLabel
-mkSelectorEntryLabel :: Bool -> Int -> CLabel
-mkSelectorInfoLabel  upd off    = RtsLabel (RtsSelectorInfoTable upd off)
-mkSelectorEntryLabel upd off    = RtsLabel (RtsSelectorEntry     upd off)
-
-mkApInfoTableLabel :: Bool -> Int -> CLabel
-mkApEntryLabel     :: Bool -> Int -> CLabel
-mkApInfoTableLabel   upd off    = RtsLabel (RtsApInfoTable       upd off)
-mkApEntryLabel       upd off    = RtsLabel (RtsApEntry           upd off)
-
-
--- A call to some primitive hand written Cmm code
-mkPrimCallLabel :: PrimCall -> CLabel
-mkPrimCallLabel (PrimCall str pkg)
-        = CmmLabel pkg str CmmPrimCall
-
-
--- Constructing ForeignLabels
-
--- | Make a foreign label
-mkForeignLabel
-        :: FastString           -- name
-        -> Maybe Int            -- size prefix
-        -> ForeignLabelSource   -- what package it's in
-        -> FunctionOrData
-        -> CLabel
-
-mkForeignLabel = ForeignLabel
-
-
--- | Update the label size field in a ForeignLabel
-addLabelSize :: CLabel -> Int -> CLabel
-addLabelSize (ForeignLabel str _ src  fod) sz
-    = ForeignLabel str (Just sz) src fod
-addLabelSize label _
-    = label
-
--- | Whether label is a top-level string literal
-isBytesLabel :: CLabel -> Bool
-isBytesLabel (IdLabel _ _ Bytes) = True
-isBytesLabel _lbl = False
-
--- | Whether label is a non-haskell label (defined in C code)
-isForeignLabel :: CLabel -> Bool
-isForeignLabel (ForeignLabel _ _ _ _) = True
-isForeignLabel _lbl = False
-
--- | Whether label is a static closure label (can come from haskell or cmm)
-isStaticClosureLabel :: CLabel -> Bool
--- Closure defined in haskell (.hs)
-isStaticClosureLabel (IdLabel _ _ Closure) = True
--- Closure defined in cmm
-isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True
-isStaticClosureLabel _lbl = False
-
--- | Whether label is a .rodata label
-isSomeRODataLabel :: CLabel -> Bool
--- info table defined in haskell (.hs)
-isSomeRODataLabel (IdLabel _ _ ClosureTable) = True
-isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True
-isSomeRODataLabel (IdLabel _ _ InfoTable) = True
-isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True
-isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True
--- info table defined in cmm (.cmm)
-isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True
-isSomeRODataLabel _lbl = False
-
--- | Whether label is points to some kind of info table
-isInfoTableLabel :: CLabel -> Bool
-isInfoTableLabel (IdLabel _ _ InfoTable)      = True
-isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True
-isInfoTableLabel (IdLabel _ _ ConInfoTable)   = True
-isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True
-isInfoTableLabel _                            = False
-
--- | Whether label is points to constructor info table
-isConInfoTableLabel :: CLabel -> Bool
-isConInfoTableLabel (IdLabel _ _ ConInfoTable)   = True
-isConInfoTableLabel _                            = False
-
--- | Get the label size field from a ForeignLabel
-foreignLabelStdcallInfo :: CLabel -> Maybe Int
-foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info
-foreignLabelStdcallInfo _lbl = Nothing
-
-
--- Constructing Large*Labels
-mkBitmapLabel   :: Unique -> CLabel
-mkBitmapLabel   uniq            = LargeBitmapLabel uniq
-
--- Constructing Cost Center Labels
-mkCCLabel  :: CostCentre      -> CLabel
-mkCCSLabel :: CostCentreStack -> CLabel
-mkCCLabel           cc          = CC_Label cc
-mkCCSLabel          ccs         = CCS_Label ccs
-
-mkRtsApFastLabel :: FastString -> CLabel
-mkRtsApFastLabel str = RtsLabel (RtsApFast str)
-
-mkRtsSlowFastTickyCtrLabel :: String -> CLabel
-mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
-
-
--- Constructing Code Coverage Labels
-mkHpcTicksLabel :: Module -> CLabel
-mkHpcTicksLabel                = HpcTicksLabel
-
-
--- Constructing labels used for dynamic linking
-mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel
-mkDynamicLinkerLabel            = DynamicLinkerLabel
-
-dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel)
-dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl)
-dynamicLinkerLabelInfo _        = Nothing
-
-mkPicBaseLabel :: CLabel
-mkPicBaseLabel                  = PicBaseLabel
-
-
--- Constructing miscellaneous other labels
-mkDeadStripPreventer :: CLabel -> CLabel
-mkDeadStripPreventer lbl        = DeadStripPreventer lbl
-
-mkStringLitLabel :: Unique -> CLabel
-mkStringLitLabel                = StringLitLabel
-
-mkAsmTempLabel :: Uniquable a => a -> CLabel
-mkAsmTempLabel a                = AsmTempLabel (getUnique a)
-
-mkAsmTempDerivedLabel :: CLabel -> FastString -> CLabel
-mkAsmTempDerivedLabel = AsmTempDerivedLabel
-
-mkAsmTempEndLabel :: CLabel -> CLabel
-mkAsmTempEndLabel l = mkAsmTempDerivedLabel l (fsLit "_end")
-
--- | Construct a label for a DWARF Debug Information Entity (DIE)
--- describing another symbol.
-mkAsmTempDieLabel :: CLabel -> CLabel
-mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die")
-
--- -----------------------------------------------------------------------------
--- Convert between different kinds of label
-
-toClosureLbl :: CLabel -> CLabel
-toClosureLbl (IdLabel n c _) = IdLabel n c Closure
-toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure
-toClosureLbl l = pprPanic "toClosureLbl" (ppr l)
-
-toSlowEntryLbl :: CLabel -> CLabel
-toSlowEntryLbl (IdLabel n _ BlockInfoTable)
-  = pprPanic "toSlowEntryLbl" (ppr n)
-toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow
-toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l)
-
-toEntryLbl :: CLabel -> CLabel
-toEntryLbl (IdLabel n c LocalInfoTable)  = IdLabel n c LocalEntry
-toEntryLbl (IdLabel n c ConInfoTable)    = IdLabel n c ConEntry
-toEntryLbl (IdLabel n _ BlockInfoTable)  = mkLocalBlockLabel (nameUnique n)
-                              -- See Note [Proc-point local block entry-point].
-toEntryLbl (IdLabel n c _)               = IdLabel n c Entry
-toEntryLbl (CmmLabel m str CmmInfo)      = CmmLabel m str CmmEntry
-toEntryLbl (CmmLabel m str CmmRetInfo)   = CmmLabel m str CmmRet
-toEntryLbl l = pprPanic "toEntryLbl" (ppr l)
-
-toInfoLbl :: CLabel -> CLabel
-toInfoLbl (IdLabel n c LocalEntry)     = IdLabel n c LocalInfoTable
-toInfoLbl (IdLabel n c ConEntry)       = IdLabel n c ConInfoTable
-toInfoLbl (IdLabel n c _)              = IdLabel n c InfoTable
-toInfoLbl (CmmLabel m str CmmEntry)    = CmmLabel m str CmmInfo
-toInfoLbl (CmmLabel m str CmmRet)      = CmmLabel m str CmmRetInfo
-toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l)
-
-hasHaskellName :: CLabel -> Maybe Name
-hasHaskellName (IdLabel n _ _) = Just n
-hasHaskellName _               = Nothing
-
--- -----------------------------------------------------------------------------
--- Does a CLabel's referent itself refer to a CAF?
-hasCAF :: CLabel -> Bool
-hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
-hasCAF (IdLabel _ MayHaveCafRefs _) = True
-hasCAF _                            = False
-
--- Note [ticky for LNE]
--- ~~~~~~~~~~~~~~~~~~~~~
-
--- Until 14 Feb 2013, every ticky counter was associated with a
--- closure. Thus, ticky labels used IdLabel. It is odd that
--- CmmBuildInfoTables.cafTransfers would consider such a ticky label
--- reason to add the name to the CAFEnv (and thus eventually the SRT),
--- but it was harmless because the ticky was only used if the closure
--- was also.
---
--- Since we now have ticky counters for LNEs, it is no longer the case
--- that every ticky counter has an actual closure. So I changed the
--- generation of ticky counters' CLabels to not result in their
--- associated id ending up in the SRT.
---
--- NB IdLabel is still appropriate for ticky ids (as opposed to
--- CmmLabel) because the LNE's counter is still related to an .hs Id,
--- that Id just isn't for a proper closure.
-
--- -----------------------------------------------------------------------------
--- Does a CLabel need declaring before use or not?
---
--- See wiki:commentary/compiler/backends/ppr-c#prototypes
-
-needsCDecl :: CLabel -> Bool
-  -- False <=> it's pre-declared; don't bother
-  -- don't bother declaring Bitmap labels, we always make sure
-  -- they are defined before use.
-needsCDecl (SRTLabel _)                 = True
-needsCDecl (LargeBitmapLabel _)         = False
-needsCDecl (IdLabel _ _ _)              = True
-needsCDecl (LocalBlockLabel _)          = True
-
-needsCDecl (StringLitLabel _)           = False
-needsCDecl (AsmTempLabel _)             = False
-needsCDecl (AsmTempDerivedLabel _ _)    = False
-needsCDecl (RtsLabel _)                 = False
-
-needsCDecl (CmmLabel pkgId _ _)
-        -- Prototypes for labels defined in the runtime system are imported
-        --      into HC files via includes/Stg.h.
-        | pkgId == rtsUnitId         = False
-
-        -- For other labels we inline one into the HC file directly.
-        | otherwise                     = True
-
-needsCDecl l@(ForeignLabel{})           = not (isMathFun l)
-needsCDecl (CC_Label _)                 = True
-needsCDecl (CCS_Label _)                = True
-needsCDecl (HpcTicksLabel _)            = True
-needsCDecl (DynamicLinkerLabel {})      = panic "needsCDecl DynamicLinkerLabel"
-needsCDecl PicBaseLabel                 = panic "needsCDecl PicBaseLabel"
-needsCDecl (DeadStripPreventer {})      = panic "needsCDecl DeadStripPreventer"
-
--- | If a label is a local block label then return just its 'BlockId', otherwise
--- 'Nothing'.
-maybeLocalBlockLabel :: CLabel -> Maybe BlockId
-maybeLocalBlockLabel (LocalBlockLabel uq)  = Just $ mkBlockId uq
-maybeLocalBlockLabel _                     = Nothing
-
-
--- | Check whether a label corresponds to a C function that has
---      a prototype in a system header somewhere, or is built-in
---      to the C compiler. For these labels we avoid generating our
---      own C prototypes.
-isMathFun :: CLabel -> Bool
-isMathFun (ForeignLabel fs _ _ _)       = fs `elementOfUniqSet` math_funs
-isMathFun _ = False
-
-math_funs :: UniqSet FastString
-math_funs = mkUniqSet [
-        -- _ISOC99_SOURCE
-        (fsLit "acos"),         (fsLit "acosf"),        (fsLit "acosh"),
-        (fsLit "acoshf"),       (fsLit "acoshl"),       (fsLit "acosl"),
-        (fsLit "asin"),         (fsLit "asinf"),        (fsLit "asinl"),
-        (fsLit "asinh"),        (fsLit "asinhf"),       (fsLit "asinhl"),
-        (fsLit "atan"),         (fsLit "atanf"),        (fsLit "atanl"),
-        (fsLit "atan2"),        (fsLit "atan2f"),       (fsLit "atan2l"),
-        (fsLit "atanh"),        (fsLit "atanhf"),       (fsLit "atanhl"),
-        (fsLit "cbrt"),         (fsLit "cbrtf"),        (fsLit "cbrtl"),
-        (fsLit "ceil"),         (fsLit "ceilf"),        (fsLit "ceill"),
-        (fsLit "copysign"),     (fsLit "copysignf"),    (fsLit "copysignl"),
-        (fsLit "cos"),          (fsLit "cosf"),         (fsLit "cosl"),
-        (fsLit "cosh"),         (fsLit "coshf"),        (fsLit "coshl"),
-        (fsLit "erf"),          (fsLit "erff"),         (fsLit "erfl"),
-        (fsLit "erfc"),         (fsLit "erfcf"),        (fsLit "erfcl"),
-        (fsLit "exp"),          (fsLit "expf"),         (fsLit "expl"),
-        (fsLit "exp2"),         (fsLit "exp2f"),        (fsLit "exp2l"),
-        (fsLit "expm1"),        (fsLit "expm1f"),       (fsLit "expm1l"),
-        (fsLit "fabs"),         (fsLit "fabsf"),        (fsLit "fabsl"),
-        (fsLit "fdim"),         (fsLit "fdimf"),        (fsLit "fdiml"),
-        (fsLit "floor"),        (fsLit "floorf"),       (fsLit "floorl"),
-        (fsLit "fma"),          (fsLit "fmaf"),         (fsLit "fmal"),
-        (fsLit "fmax"),         (fsLit "fmaxf"),        (fsLit "fmaxl"),
-        (fsLit "fmin"),         (fsLit "fminf"),        (fsLit "fminl"),
-        (fsLit "fmod"),         (fsLit "fmodf"),        (fsLit "fmodl"),
-        (fsLit "frexp"),        (fsLit "frexpf"),       (fsLit "frexpl"),
-        (fsLit "hypot"),        (fsLit "hypotf"),       (fsLit "hypotl"),
-        (fsLit "ilogb"),        (fsLit "ilogbf"),       (fsLit "ilogbl"),
-        (fsLit "ldexp"),        (fsLit "ldexpf"),       (fsLit "ldexpl"),
-        (fsLit "lgamma"),       (fsLit "lgammaf"),      (fsLit "lgammal"),
-        (fsLit "llrint"),       (fsLit "llrintf"),      (fsLit "llrintl"),
-        (fsLit "llround"),      (fsLit "llroundf"),     (fsLit "llroundl"),
-        (fsLit "log"),          (fsLit "logf"),         (fsLit "logl"),
-        (fsLit "log10l"),       (fsLit "log10"),        (fsLit "log10f"),
-        (fsLit "log1pl"),       (fsLit "log1p"),        (fsLit "log1pf"),
-        (fsLit "log2"),         (fsLit "log2f"),        (fsLit "log2l"),
-        (fsLit "logb"),         (fsLit "logbf"),        (fsLit "logbl"),
-        (fsLit "lrint"),        (fsLit "lrintf"),       (fsLit "lrintl"),
-        (fsLit "lround"),       (fsLit "lroundf"),      (fsLit "lroundl"),
-        (fsLit "modf"),         (fsLit "modff"),        (fsLit "modfl"),
-        (fsLit "nan"),          (fsLit "nanf"),         (fsLit "nanl"),
-        (fsLit "nearbyint"),    (fsLit "nearbyintf"),   (fsLit "nearbyintl"),
-        (fsLit "nextafter"),    (fsLit "nextafterf"),   (fsLit "nextafterl"),
-        (fsLit "nexttoward"),   (fsLit "nexttowardf"),  (fsLit "nexttowardl"),
-        (fsLit "pow"),          (fsLit "powf"),         (fsLit "powl"),
-        (fsLit "remainder"),    (fsLit "remainderf"),   (fsLit "remainderl"),
-        (fsLit "remquo"),       (fsLit "remquof"),      (fsLit "remquol"),
-        (fsLit "rint"),         (fsLit "rintf"),        (fsLit "rintl"),
-        (fsLit "round"),        (fsLit "roundf"),       (fsLit "roundl"),
-        (fsLit "scalbln"),      (fsLit "scalblnf"),     (fsLit "scalblnl"),
-        (fsLit "scalbn"),       (fsLit "scalbnf"),      (fsLit "scalbnl"),
-        (fsLit "sin"),          (fsLit "sinf"),         (fsLit "sinl"),
-        (fsLit "sinh"),         (fsLit "sinhf"),        (fsLit "sinhl"),
-        (fsLit "sqrt"),         (fsLit "sqrtf"),        (fsLit "sqrtl"),
-        (fsLit "tan"),          (fsLit "tanf"),         (fsLit "tanl"),
-        (fsLit "tanh"),         (fsLit "tanhf"),        (fsLit "tanhl"),
-        (fsLit "tgamma"),       (fsLit "tgammaf"),      (fsLit "tgammal"),
-        (fsLit "trunc"),        (fsLit "truncf"),       (fsLit "truncl"),
-        -- ISO C 99 also defines these function-like macros in math.h:
-        -- fpclassify, isfinite, isinf, isnormal, signbit, isgreater,
-        -- isgreaterequal, isless, islessequal, islessgreater, isunordered
-
-        -- additional symbols from _BSD_SOURCE
-        (fsLit "drem"),         (fsLit "dremf"),        (fsLit "dreml"),
-        (fsLit "finite"),       (fsLit "finitef"),      (fsLit "finitel"),
-        (fsLit "gamma"),        (fsLit "gammaf"),       (fsLit "gammal"),
-        (fsLit "isinf"),        (fsLit "isinff"),       (fsLit "isinfl"),
-        (fsLit "isnan"),        (fsLit "isnanf"),       (fsLit "isnanl"),
-        (fsLit "j0"),           (fsLit "j0f"),          (fsLit "j0l"),
-        (fsLit "j1"),           (fsLit "j1f"),          (fsLit "j1l"),
-        (fsLit "jn"),           (fsLit "jnf"),          (fsLit "jnl"),
-        (fsLit "lgamma_r"),     (fsLit "lgammaf_r"),    (fsLit "lgammal_r"),
-        (fsLit "scalb"),        (fsLit "scalbf"),       (fsLit "scalbl"),
-        (fsLit "significand"),  (fsLit "significandf"), (fsLit "significandl"),
-        (fsLit "y0"),           (fsLit "y0f"),          (fsLit "y0l"),
-        (fsLit "y1"),           (fsLit "y1f"),          (fsLit "y1l"),
-        (fsLit "yn"),           (fsLit "ynf"),          (fsLit "ynl"),
-
-        -- These functions are described in IEEE Std 754-2008 -
-        -- Standard for Floating-Point Arithmetic and ISO/IEC TS 18661
-        (fsLit "nextup"),       (fsLit "nextupf"),      (fsLit "nextupl"),
-        (fsLit "nextdown"),     (fsLit "nextdownf"),    (fsLit "nextdownl")
-    ]
-
--- -----------------------------------------------------------------------------
--- | Is a CLabel visible outside this object file or not?
---      From the point of view of the code generator, a name is
---      externally visible if it has to be declared as exported
---      in the .o file's symbol table; that is, made non-static.
-externallyVisibleCLabel :: CLabel -> Bool -- not C "static"
-externallyVisibleCLabel (StringLitLabel _)      = False
-externallyVisibleCLabel (AsmTempLabel _)        = False
-externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False
-externallyVisibleCLabel (RtsLabel _)            = True
-externallyVisibleCLabel (LocalBlockLabel _)     = False
-externallyVisibleCLabel (CmmLabel _ _ _)        = True
-externallyVisibleCLabel (ForeignLabel{})        = True
-externallyVisibleCLabel (IdLabel name _ info)   = isExternalName name && externallyVisibleIdLabel info
-externallyVisibleCLabel (CC_Label _)            = True
-externallyVisibleCLabel (CCS_Label _)           = True
-externallyVisibleCLabel (DynamicLinkerLabel _ _)  = False
-externallyVisibleCLabel (HpcTicksLabel _)       = True
-externallyVisibleCLabel (LargeBitmapLabel _)    = False
-externallyVisibleCLabel (SRTLabel _)            = False
-externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel"
-externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer"
-
-externallyVisibleIdLabel :: IdLabelInfo -> Bool
-externallyVisibleIdLabel LocalInfoTable  = False
-externallyVisibleIdLabel LocalEntry      = False
-externallyVisibleIdLabel BlockInfoTable  = False
-externallyVisibleIdLabel _               = True
-
--- -----------------------------------------------------------------------------
--- Finding the "type" of a CLabel
-
--- For generating correct types in label declarations:
-
-data CLabelType
-  = CodeLabel   -- Address of some executable instructions
-  | DataLabel   -- Address of data, not a GC ptr
-  | GcPtrLabel  -- Address of a (presumably static) GC object
-
-isCFunctionLabel :: CLabel -> Bool
-isCFunctionLabel lbl = case labelType lbl of
-                        CodeLabel -> True
-                        _other    -> False
-
-isGcPtrLabel :: CLabel -> Bool
-isGcPtrLabel lbl = case labelType lbl of
-                        GcPtrLabel -> True
-                        _other     -> False
-
-
--- | Work out the general type of data at the address of this label
---    whether it be code, data, or static GC object.
-labelType :: CLabel -> CLabelType
-labelType (IdLabel _ _ info)                    = idInfoLabelType info
-labelType (CmmLabel _ _ CmmData)                = DataLabel
-labelType (CmmLabel _ _ CmmClosure)             = GcPtrLabel
-labelType (CmmLabel _ _ CmmCode)                = CodeLabel
-labelType (CmmLabel _ _ CmmInfo)                = DataLabel
-labelType (CmmLabel _ _ CmmEntry)               = CodeLabel
-labelType (CmmLabel _ _ CmmPrimCall)            = CodeLabel
-labelType (CmmLabel _ _ CmmRetInfo)             = DataLabel
-labelType (CmmLabel _ _ CmmRet)                 = CodeLabel
-labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
-labelType (RtsLabel (RtsApInfoTable _ _))       = DataLabel
-labelType (RtsLabel (RtsApFast _))              = CodeLabel
-labelType (RtsLabel _)                          = DataLabel
-labelType (LocalBlockLabel _)                   = CodeLabel
-labelType (SRTLabel _)                          = DataLabel
-labelType (ForeignLabel _ _ _ IsFunction)       = CodeLabel
-labelType (ForeignLabel _ _ _ IsData)           = DataLabel
-labelType (AsmTempLabel _)                      = panic "labelType(AsmTempLabel)"
-labelType (AsmTempDerivedLabel _ _)             = panic "labelType(AsmTempDerivedLabel)"
-labelType (StringLitLabel _)                    = DataLabel
-labelType (CC_Label _)                          = DataLabel
-labelType (CCS_Label _)                         = DataLabel
-labelType (DynamicLinkerLabel _ _)              = DataLabel -- Is this right?
-labelType PicBaseLabel                          = DataLabel
-labelType (DeadStripPreventer _)                = DataLabel
-labelType (HpcTicksLabel _)                     = DataLabel
-labelType (LargeBitmapLabel _)                  = DataLabel
-
-idInfoLabelType :: IdLabelInfo -> CLabelType
-idInfoLabelType info =
-  case info of
-    InfoTable     -> DataLabel
-    LocalInfoTable -> DataLabel
-    BlockInfoTable -> DataLabel
-    Closure       -> GcPtrLabel
-    ConInfoTable  -> DataLabel
-    ClosureTable  -> DataLabel
-    RednCounts    -> DataLabel
-    Bytes         -> DataLabel
-    _             -> CodeLabel
-
-
--- -----------------------------------------------------------------------------
-
--- | Is a 'CLabel' defined in the current module being compiled?
---
--- Sometimes we can optimise references within a compilation unit in ways that
--- we couldn't for inter-module references. This provides a conservative
--- estimate of whether a 'CLabel' lives in the current module.
-isLocalCLabel :: Module -> CLabel -> Bool
-isLocalCLabel this_mod lbl =
-  case lbl of
-    IdLabel name _ _
-      | isInternalName name -> True
-      | otherwise           -> nameModule name == this_mod
-    LocalBlockLabel _       -> True
-    _                       -> False
-
--- -----------------------------------------------------------------------------
-
--- | Does a 'CLabel' need dynamic linkage?
---
--- When referring to data in code, we need to know whether
--- that data resides in a DLL or not. [Win32 only.]
--- @labelDynamic@ returns @True@ if the label is located
--- in a DLL, be it a data reference or not.
-labelDynamic :: DynFlags -> Module -> CLabel -> Bool
-labelDynamic dflags this_mod lbl =
-  case lbl of
-   -- is the RTS in a DLL or not?
-   RtsLabel _ ->
-     externalDynamicRefs && (this_pkg /= rtsUnitId)
-
-   IdLabel n _ _ ->
-     isDllName dflags this_mod n
-
-   -- When compiling in the "dyn" way, each package is to be linked into
-   -- its own shared library.
-   CmmLabel pkg _ _
-    | os == OSMinGW32 ->
-       externalDynamicRefs && (this_pkg /= pkg)
-    | otherwise ->
-       gopt Opt_ExternalDynamicRefs dflags
-
-   LocalBlockLabel _    -> False
-
-   ForeignLabel _ _ source _  ->
-       if os == OSMinGW32
-       then case source of
-            -- Foreign label is in some un-named foreign package (or DLL).
-            ForeignLabelInExternalPackage -> True
-
-            -- Foreign label is linked into the same package as the
-            -- source file currently being compiled.
-            ForeignLabelInThisPackage -> False
-
-            -- Foreign label is in some named package.
-            -- When compiling in the "dyn" way, each package is to be
-            -- linked into its own DLL.
-            ForeignLabelInPackage pkgId ->
-                externalDynamicRefs && (this_pkg /= pkgId)
-
-       else -- On Mac OS X and on ELF platforms, false positives are OK,
-            -- so we claim that all foreign imports come from dynamic
-            -- libraries
-            True
-
-   CC_Label cc ->
-     externalDynamicRefs && not (ccFromThisModule cc this_mod)
-
-   -- CCS_Label always contains a CostCentre defined in the current module
-   CCS_Label _ -> False
-
-   HpcTicksLabel m ->
-     externalDynamicRefs && this_mod /= m
-
-   -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves.
-   _                 -> False
-  where
-    externalDynamicRefs = gopt Opt_ExternalDynamicRefs dflags
-    os = platformOS (targetPlatform dflags)
-    this_pkg = moduleUnitId this_mod
-
-
------------------------------------------------------------------------------
--- Printing out CLabels.
-
-{-
-Convention:
-
-      <name>_<type>
-
-where <name> is <Module>_<name> for external names and <unique> for
-internal names. <type> is one of the following:
-
-         info                   Info table
-         srt                    Static reference table
-         entry                  Entry code (function, closure)
-         slow                   Slow entry code (if any)
-         ret                    Direct return address
-         vtbl                   Vector table
-         <n>_alt                Case alternative (tag n)
-         dflt                   Default case alternative
-         btm                    Large bitmap vector
-         closure                Static closure
-         con_entry              Dynamic Constructor entry code
-         con_info               Dynamic Constructor info table
-         static_entry           Static Constructor entry code
-         static_info            Static Constructor info table
-         sel_info               Selector info table
-         sel_entry              Selector entry code
-         cc                     Cost centre
-         ccs                    Cost centre stack
-
-Many of these distinctions are only for documentation reasons.  For
-example, _ret is only distinguished from _entry to make it easy to
-tell whether a code fragment is a return point or a closure/function
-entry.
-
-Note [Closure and info labels]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For a function 'foo, we have:
-   foo_info    : Points to the info table describing foo's closure
-                 (and entry code for foo with tables next to code)
-   foo_closure : Static (no-free-var) closure only:
-                 points to the statically-allocated closure
-
-For a data constructor (such as Just or Nothing), we have:
-    Just_con_info: Info table for the data constructor itself
-                   the first word of a heap-allocated Just
-    Just_info:     Info table for the *worker function*, an
-                   ordinary Haskell function of arity 1 that
-                   allocates a (Just x) box:
-                      Just = \x -> Just x
-    Just_closure:  The closure for this worker
-
-    Nothing_closure: a statically allocated closure for Nothing
-    Nothing_static_info: info table for Nothing_closure
-
-All these must be exported symbol, EXCEPT Just_info.  We don't need to
-export this because in other modules we either have
-       * A reference to 'Just'; use Just_closure
-       * A saturated call 'Just x'; allocate using Just_con_info
-Not exporting these Just_info labels reduces the number of symbols
-somewhat.
-
-Note [Bytes label]
-~~~~~~~~~~~~~~~~~~
-For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which
-points to a static data block containing the content of the literal.
-
-Note [Proc-point local block entry-points]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A label for a proc-point local block entry-point has no "_entry" suffix. With
-`infoTblLbl` we derive an info table label from a proc-point block ID. If
-we convert such an info table label into an entry label we must produce
-the label without an "_entry" suffix. So an info table label records
-the fact that it was derived from a block ID in `IdLabelInfo` as
-`BlockInfoTable`.
-
-The info table label and the local block label are both local labels
-and are not externally visible.
--}
-
-instance Outputable CLabel where
-  ppr c = sdocWithDynFlags $ \dynFlags -> pprCLabel dynFlags c
-
-pprCLabel :: DynFlags -> CLabel -> SDoc
-
-pprCLabel _ (LocalBlockLabel u)
-  =  tempLabelPrefixOrUnderscore <> pprUniqueAlways u
-
-pprCLabel dynFlags (AsmTempLabel u)
- | not (platformUnregisterised $ targetPlatform dynFlags)
-  =  tempLabelPrefixOrUnderscore <> pprUniqueAlways u
-
-pprCLabel dynFlags (AsmTempDerivedLabel l suf)
- | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
-   = ptext (asmTempLabelPrefix $ targetPlatform dynFlags)
-     <> case l of AsmTempLabel u    -> pprUniqueAlways u
-                  LocalBlockLabel u -> pprUniqueAlways u
-                  _other            -> pprCLabel dynFlags l
-     <> ftext suf
-
-pprCLabel dynFlags (DynamicLinkerLabel info lbl)
- | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
-   = pprDynamicLinkerAsmLabel (targetPlatform dynFlags) info lbl
-
-pprCLabel dynFlags PicBaseLabel
- | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
-   = text "1b"
-
-pprCLabel dynFlags (DeadStripPreventer lbl)
- | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
-   =
-   {-
-      `lbl` can be temp one but we need to ensure that dsp label will stay
-      in the final binary so we prepend non-temp prefix ("dsp_") and
-      optional `_` (underscore) because this is how you mark non-temp symbols
-      on some platforms (Darwin)
-   -}
-   maybe_underscore dynFlags $ text "dsp_"
-   <> pprCLabel dynFlags lbl <> text "_dsp"
-
-pprCLabel dynFlags (StringLitLabel u)
- | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
-  = pprUniqueAlways u <> ptext (sLit "_str")
-
-pprCLabel dynFlags lbl
-   = getPprStyle $ \ sty ->
-     if platformMisc_ghcWithNativeCodeGen (platformMisc dynFlags) && asmStyle sty
-     then maybe_underscore dynFlags $ pprAsmCLbl (targetPlatform dynFlags) lbl
-     else pprCLbl lbl
-
-maybe_underscore :: DynFlags -> SDoc -> SDoc
-maybe_underscore dynFlags doc =
-  if platformMisc_leadingUnderscore $ platformMisc dynFlags
-  then pp_cSEP <> doc
-  else doc
-
-pprAsmCLbl :: Platform -> CLabel -> SDoc
-pprAsmCLbl platform (ForeignLabel fs (Just sz) _ _)
- | platformOS platform == OSMinGW32
-    -- In asm mode, we need to put the suffix on a stdcall ForeignLabel.
-    -- (The C compiler does this itself).
-    = ftext fs <> char '@' <> int sz
-pprAsmCLbl _ lbl
-   = pprCLbl lbl
-
-pprCLbl :: CLabel -> SDoc
-pprCLbl (StringLitLabel u)
-  = pprUniqueAlways u <> text "_str"
-
-pprCLbl (SRTLabel u)
-  = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt"
-
-pprCLbl (LargeBitmapLabel u)  =
-  tempLabelPrefixOrUnderscore
-  <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"
--- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7')
--- until that gets resolved we'll just force them to start
--- with a letter so the label will be legal assembly code.
-
-
-pprCLbl (CmmLabel _ str CmmCode)        = ftext str
-pprCLbl (CmmLabel _ str CmmData)        = ftext str
-pprCLbl (CmmLabel _ str CmmPrimCall)    = ftext str
-
-pprCLbl (LocalBlockLabel u)             =
-    tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u
-
-pprCLbl (RtsLabel (RtsApFast str))   = ftext str <> text "_fast"
-
-pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
-  = sdocWithDynFlags $ \dflags ->
-    ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
-    hcat [text "stg_sel_", text (show offset),
-          ptext (if upd_reqd
-                 then (sLit "_upd_info")
-                 else (sLit "_noupd_info"))
-        ]
-
-pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset))
-  = sdocWithDynFlags $ \dflags ->
-    ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
-    hcat [text "stg_sel_", text (show offset),
-                ptext (if upd_reqd
-                        then (sLit "_upd_entry")
-                        else (sLit "_noupd_entry"))
-        ]
-
-pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity))
-  = sdocWithDynFlags $ \dflags ->
-    ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
-    hcat [text "stg_ap_", text (show arity),
-                ptext (if upd_reqd
-                        then (sLit "_upd_info")
-                        else (sLit "_noupd_info"))
-        ]
-
-pprCLbl (RtsLabel (RtsApEntry upd_reqd arity))
-  = sdocWithDynFlags $ \dflags ->
-    ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
-    hcat [text "stg_ap_", text (show arity),
-                ptext (if upd_reqd
-                        then (sLit "_upd_entry")
-                        else (sLit "_noupd_entry"))
-        ]
-
-pprCLbl (CmmLabel _ fs CmmInfo)
-  = ftext fs <> text "_info"
-
-pprCLbl (CmmLabel _ fs CmmEntry)
-  = ftext fs <> text "_entry"
-
-pprCLbl (CmmLabel _ fs CmmRetInfo)
-  = ftext fs <> text "_info"
-
-pprCLbl (CmmLabel _ fs CmmRet)
-  = ftext fs <> text "_ret"
-
-pprCLbl (CmmLabel _ fs CmmClosure)
-  = ftext fs <> text "_closure"
-
-pprCLbl (RtsLabel (RtsPrimOp primop))
-  = text "stg_" <> ppr primop
-
-pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat))
-  = text "SLOW_CALL_fast_" <> text pat <> ptext (sLit "_ctr")
-
-pprCLbl (ForeignLabel str _ _ _)
-  = ftext str
-
-pprCLbl (IdLabel name _cafs flavor) =
-  internalNamePrefix name <> ppr name <> ppIdFlavor flavor
-
-pprCLbl (CC_Label cc)           = ppr cc
-pprCLbl (CCS_Label ccs)         = ppr ccs
-
-pprCLbl (HpcTicksLabel mod)
-  = text "_hpc_tickboxes_"  <> ppr mod <> ptext (sLit "_hpc")
-
-pprCLbl (AsmTempLabel {})       = panic "pprCLbl AsmTempLabel"
-pprCLbl (AsmTempDerivedLabel {})= panic "pprCLbl AsmTempDerivedLabel"
-pprCLbl (DynamicLinkerLabel {}) = panic "pprCLbl DynamicLinkerLabel"
-pprCLbl (PicBaseLabel {})       = panic "pprCLbl PicBaseLabel"
-pprCLbl (DeadStripPreventer {}) = panic "pprCLbl DeadStripPreventer"
-
-ppIdFlavor :: IdLabelInfo -> SDoc
-ppIdFlavor x = pp_cSEP <> text
-               (case x of
-                       Closure          -> "closure"
-                       InfoTable        -> "info"
-                       LocalInfoTable   -> "info"
-                       Entry            -> "entry"
-                       LocalEntry       -> "entry"
-                       Slow             -> "slow"
-                       RednCounts       -> "ct"
-                       ConEntry         -> "con_entry"
-                       ConInfoTable     -> "con_info"
-                       ClosureTable     -> "closure_tbl"
-                       Bytes            -> "bytes"
-                       BlockInfoTable   -> "info"
-                      )
-
-
-pp_cSEP :: SDoc
-pp_cSEP = char '_'
-
-
-instance Outputable ForeignLabelSource where
- ppr fs
-  = case fs of
-        ForeignLabelInPackage pkgId     -> parens $ text "package: " <> ppr pkgId
-        ForeignLabelInThisPackage       -> parens $ text "this package"
-        ForeignLabelInExternalPackage   -> parens $ text "external package"
-
-internalNamePrefix :: Name -> SDoc
-internalNamePrefix name = getPprStyle $ \ sty ->
-  if asmStyle sty && isRandomGenerated then
-    sdocWithPlatform $ \platform ->
-      ptext (asmTempLabelPrefix platform)
-  else
-    empty
-  where
-    isRandomGenerated = not $ isExternalName name
-
-tempLabelPrefixOrUnderscore :: SDoc
-tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform ->
-  getPprStyle $ \ sty ->
-   if asmStyle sty then
-      ptext (asmTempLabelPrefix platform)
-   else
-      char '_'
-
--- -----------------------------------------------------------------------------
--- Machine-dependent knowledge about labels.
-
-asmTempLabelPrefix :: Platform -> PtrString  -- for formatting labels
-asmTempLabelPrefix platform = case platformOS platform of
-    OSDarwin -> sLit "L"
-    OSAIX    -> sLit "__L" -- follow IBM XL C's convention
-    _        -> sLit ".L"
-
-pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc
-pprDynamicLinkerAsmLabel platform dllInfo lbl =
-    case platformOS platform of
-      OSDarwin
-        | platformArch platform == ArchX86_64 ->
-          case dllInfo of
-            CodeStub        -> char 'L' <> ppr lbl <> text "$stub"
-            SymbolPtr       -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
-            GotSymbolPtr    -> ppr lbl <> text "@GOTPCREL"
-            GotSymbolOffset -> ppr lbl
-        | otherwise ->
-          case dllInfo of
-            CodeStub  -> char 'L' <> ppr lbl <> text "$stub"
-            SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
-            _         -> panic "pprDynamicLinkerAsmLabel"
-
-      OSAIX ->
-          case dllInfo of
-            SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention
-            _         -> panic "pprDynamicLinkerAsmLabel"
-
-      _ | osElfTarget (platformOS platform) -> elfLabel
-
-      OSMinGW32 ->
-          case dllInfo of
-            SymbolPtr -> text "__imp_" <> ppr lbl
-            _         -> panic "pprDynamicLinkerAsmLabel"
-
-      _ -> panic "pprDynamicLinkerAsmLabel"
-  where
-    elfLabel
-      | platformArch platform == ArchPPC
-      = case dllInfo of
-          CodeStub  -> -- See Note [.LCTOC1 in PPC PIC code]
-                       ppr lbl <> text "+32768@plt"
-          SymbolPtr -> text ".LC_" <> ppr lbl
-          _         -> panic "pprDynamicLinkerAsmLabel"
-
-      | platformArch platform == ArchX86_64
-      = case dllInfo of
-          CodeStub        -> ppr lbl <> text "@plt"
-          GotSymbolPtr    -> ppr lbl <> text "@gotpcrel"
-          GotSymbolOffset -> ppr lbl
-          SymbolPtr       -> text ".LC_" <> ppr lbl
-
-      | platformArch platform == ArchPPC_64 ELF_V1
-        || platformArch platform == ArchPPC_64 ELF_V2
-      = case dllInfo of
-          GotSymbolPtr    -> text ".LC_"  <> ppr lbl
-                                  <> text "@toc"
-          GotSymbolOffset -> ppr lbl
-          SymbolPtr       -> text ".LC_" <> ppr lbl
-          _               -> panic "pprDynamicLinkerAsmLabel"
-
-      | otherwise
-      = case dllInfo of
-          CodeStub        -> ppr lbl <> text "@plt"
-          SymbolPtr       -> text ".LC_" <> ppr lbl
-          GotSymbolPtr    -> ppr lbl <> text "@got"
-          GotSymbolOffset -> ppr lbl <> text "@gotoff"
-
--- Figure out whether `symbol` may serve as an alias
--- to `target` within one compilation unit.
---
--- This is true if any of these holds:
--- * `target` is a module-internal haskell name.
--- * `target` is an exported name, but comes from the same
---   module as `symbol`
---
--- These are sufficient conditions for establishing e.g. a
--- GNU assembly alias ('.equiv' directive). Sadly, there is
--- no such thing as an alias to an imported symbol (conf.
--- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/)
--- See note [emit-time elimination of static indirections].
---
--- Precondition is that both labels represent the
--- same semantic value.
-
-mayRedirectTo :: CLabel -> CLabel -> Bool
-mayRedirectTo symbol target
- | Just nam <- haskellName
- , staticClosureLabel
- , isExternalName nam
- , Just mod <- nameModule_maybe nam
- , Just anam <- hasHaskellName symbol
- , Just amod <- nameModule_maybe anam
- = amod == mod
-
- | Just nam <- haskellName
- , staticClosureLabel
- , isInternalName nam
- = True
-
- | otherwise = False
-   where staticClosureLabel = isStaticClosureLabel target
-         haskellName = hasHaskellName target
-
-
-{-
-Note [emit-time elimination of static indirections]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-As described in #15155, certain static values are representationally
-equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers).
-
-             newtype A = A Int
-             {-# NOINLINE a #-}
-             a = A 42
-
-a1_rYB :: Int
-[GblId, Caf=NoCafRefs, Unf=OtherCon []]
-a1_rYB = GHC.Types.I# 42#
-
-a [InlPrag=NOINLINE] :: A
-[GblId, Unf=OtherCon []]
-a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A)
-
-Formerly we created static indirections for these (IND_STATIC), which
-consist of a statically allocated forwarding closure that contains
-the (possibly tagged) indirectee. (See CMM/assembly below.)
-This approach is suboptimal for two reasons:
-  (a) they occupy extra space,
-  (b) they need to be entered in order to obtain the indirectee,
-      thus they cannot be tagged.
-
-Fortunately there is a common case where static indirections can be
-eliminated while emitting assembly (native or LLVM), viz. when the
-indirectee is in the same module (object file) as the symbol that
-points to it. In this case an assembly-level identification can
-be created ('.equiv' directive), and as such the same object will
-be assigned two names in the symbol table. Any of the identified
-symbols can be referenced by a tagged pointer.
-
-Currently the 'mayRedirectTo' predicate will
-give a clue whether a label can be equated with another, already
-emitted, label (which can in turn be an alias). The general mechanics
-is that we identify data (IND_STATIC closures) that are amenable
-to aliasing while pretty-printing of assembly output, and emit the
-'.equiv' directive instead of static data in such a case.
-
-Here is a sketch how the output is massaged:
-
-                     Consider
-newtype A = A Int
-{-# NOINLINE a #-}
-a = A 42                                -- I# 42# is the indirectee
-                                        -- 'a' is exported
-
-                 results in STG
-
-a1_rXq :: GHC.Types.Int
-[GblId, Caf=NoCafRefs, Unf=OtherCon []] =
-    CCS_DONT_CARE GHC.Types.I#! [42#];
-
-T15155.a [InlPrag=NOINLINE] :: T15155.A
-[GblId, Unf=OtherCon []] =
-    CAF_ccs  \ u  []  a1_rXq;
-
-                 and CMM
-
-[section ""data" . a1_rXq_closure" {
-     a1_rXq_closure:
-         const GHC.Types.I#_con_info;
-         const 42;
- }]
-
-[section ""data" . T15155.a_closure" {
-     T15155.a_closure:
-         const stg_IND_STATIC_info;
-         const a1_rXq_closure+1;
-         const 0;
-         const 0;
- }]
-
-The emitted assembly is
-
-#### INDIRECTEE
-a1_rXq_closure:                         -- module local haskell value
-        .quad   GHC.Types.I#_con_info   -- an Int
-        .quad   42
-
-#### BEFORE
-.globl T15155.a_closure                 -- exported newtype wrapped value
-T15155.a_closure:
-        .quad   stg_IND_STATIC_info     -- the closure info
-        .quad   a1_rXq_closure+1        -- indirectee ('+1' being the tag)
-        .quad   0
-        .quad   0
-
-#### AFTER
-.globl T15155.a_closure                 -- exported newtype wrapped value
-.equiv a1_rXq_closure,T15155.a_closure  -- both are shared
-
-The transformation is performed because
-     T15155.a_closure `mayRedirectTo` a1_rXq_closure+1
-returns True.
--}
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
deleted file mode 100644
index e08b22fa9b..0000000000
--- a/compiler/cmm/Cmm.hs
+++ /dev/null
@@ -1,231 +0,0 @@
--- Cmm representations using Hoopl's Graph CmmNode e x.
-{-# LANGUAGE GADTs #-}
-
-module Cmm (
-     -- * Cmm top-level datatypes
-     CmmProgram, CmmGroup, GenCmmGroup,
-     CmmDecl, GenCmmDecl(..),
-     CmmGraph, GenCmmGraph(..),
-     CmmBlock,
-     RawCmmDecl, RawCmmGroup,
-     Section(..), SectionType(..), CmmStatics(..), CmmStatic(..),
-     isSecConstant,
-
-     -- ** Blocks containing lists
-     GenBasicBlock(..), blockId,
-     ListGraph(..), pprBBlock,
-
-     -- * Info Tables
-     CmmTopInfo(..), CmmStackInfo(..), CmmInfoTable(..), topInfoTable,
-     ClosureTypeInfo(..),
-     ProfilingInfo(..), ConstrDescription,
-
-     -- * Statements, expressions and types
-     module CmmNode,
-     module CmmExpr,
-  ) where
-
-import GhcPrelude
-
-import Id
-import CostCentre
-import CLabel
-import BlockId
-import CmmNode
-import SMRep
-import CmmExpr
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
-import Outputable
-import Data.ByteString (ByteString)
-
------------------------------------------------------------------------------
---  Cmm, GenCmm
------------------------------------------------------------------------------
-
--- A CmmProgram is a list of CmmGroups
--- A CmmGroup is a list of top-level declarations
-
--- When object-splitting is on, each group is compiled into a separate
--- .o file. So typically we put closely related stuff in a CmmGroup.
--- Section-splitting follows suit and makes one .text subsection for each
--- CmmGroup.
-
-type CmmProgram = [CmmGroup]
-
-type GenCmmGroup d h g = [GenCmmDecl d h g]
-type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
-type RawCmmGroup = GenCmmGroup CmmStatics (LabelMap CmmStatics) CmmGraph
-
------------------------------------------------------------------------------
---  CmmDecl, GenCmmDecl
------------------------------------------------------------------------------
-
--- GenCmmDecl is abstracted over
---   d, the type of static data elements in CmmData
---   h, the static info preceding the code of a CmmProc
---   g, the control-flow graph of a CmmProc
---
--- We expect there to be two main instances of this type:
---   (a) C--, i.e. populated with various C-- constructs
---   (b) Native code, populated with data/instructions
-
--- | A top-level chunk, abstracted over the type of the contents of
--- the basic blocks (Cmm or instructions are the likely instantiations).
-data GenCmmDecl d h g
-  = CmmProc     -- A procedure
-     h                 -- Extra header such as the info table
-     CLabel            -- Entry label
-     [GlobalReg]       -- Registers live on entry. Note that the set of live
-                       -- registers will be correct in generated C-- code, but
-                       -- not in hand-written C-- code. However,
-                       -- splitAtProcPoints calculates correct liveness
-                       -- information for CmmProcs.
-     g                 -- Control-flow graph for the procedure's code
-
-  | CmmData     -- Static data
-        Section
-        d
-
-type CmmDecl = GenCmmDecl CmmStatics CmmTopInfo CmmGraph
-
-type RawCmmDecl
-   = GenCmmDecl
-        CmmStatics
-        (LabelMap CmmStatics)
-        CmmGraph
-
------------------------------------------------------------------------------
---     Graphs
------------------------------------------------------------------------------
-
-type CmmGraph = GenCmmGraph CmmNode
-data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
-type CmmBlock = Block CmmNode C C
-
------------------------------------------------------------------------------
---     Info Tables
------------------------------------------------------------------------------
-
--- | CmmTopInfo is attached to each CmmDecl (see defn of CmmGroup), and contains
--- the extra info (beyond the executable code) that belongs to that CmmDecl.
-data CmmTopInfo   = TopInfo { info_tbls  :: LabelMap CmmInfoTable
-                            , stack_info :: CmmStackInfo }
-
-topInfoTable :: GenCmmDecl a CmmTopInfo (GenCmmGraph n) -> Maybe CmmInfoTable
-topInfoTable (CmmProc infos _ _ g) = mapLookup (g_entry g) (info_tbls infos)
-topInfoTable _                     = Nothing
-
-data CmmStackInfo
-   = StackInfo {
-       arg_space :: ByteOff,
-               -- number of bytes of arguments on the stack on entry to the
-               -- the proc.  This is filled in by GHC.StgToCmm.codeGen, and
-               -- used by the stack allocator later.
-       updfr_space :: Maybe ByteOff,
-               -- XXX: this never contains anything useful, but it should.
-               -- See comment in CmmLayoutStack.
-       do_layout :: Bool
-               -- Do automatic stack layout for this proc.  This is
-               -- True for all code generated by the code generator,
-               -- but is occasionally False for hand-written Cmm where
-               -- we want to do the stack manipulation manually.
-  }
-
--- | Info table as a haskell data type
-data CmmInfoTable
-  = CmmInfoTable {
-      cit_lbl  :: CLabel, -- Info table label
-      cit_rep  :: SMRep,
-      cit_prof :: ProfilingInfo,
-      cit_srt  :: Maybe CLabel,   -- empty, or a closure address
-      cit_clo  :: Maybe (Id, CostCentreStack)
-        -- Just (id,ccs) <=> build a static closure later
-        -- Nothing <=> don't build a static closure
-        --
-        -- Static closures for FUNs and THUNKs are *not* generated by
-        -- the code generator, because we might want to add SRT
-        -- entries to them later (for FUNs at least; THUNKs are
-        -- treated the same for consistency). See Note [SRTs] in
-        -- CmmBuildInfoTables, in particular the [FUN] optimisation.
-        --
-        -- This is strictly speaking not a part of the info table that
-        -- will be finally generated, but it's the only convenient
-        -- place to convey this information from the code generator to
-        -- where we build the static closures in
-        -- CmmBuildInfoTables.doSRTs.
-    }
-
-data ProfilingInfo
-  = NoProfilingInfo
-  | ProfilingInfo ByteString ByteString -- closure_type, closure_desc
-
------------------------------------------------------------------------------
---              Static Data
------------------------------------------------------------------------------
-
-data SectionType
-  = Text
-  | Data
-  | ReadOnlyData
-  | RelocatableReadOnlyData
-  | UninitialisedData
-  | ReadOnlyData16      -- .rodata.cst16 on x86_64, 16-byte aligned
-  | CString
-  | OtherSection String
-  deriving (Show)
-
--- | Should a data in this section be considered constant
-isSecConstant :: Section -> Bool
-isSecConstant (Section t _) = case t of
-    Text                    -> True
-    ReadOnlyData            -> True
-    RelocatableReadOnlyData -> True
-    ReadOnlyData16          -> True
-    CString                 -> True
-    Data                    -> False
-    UninitialisedData       -> False
-    (OtherSection _)        -> False
-
-data Section = Section SectionType CLabel
-
-data CmmStatic
-  = CmmStaticLit CmmLit
-        -- a literal value, size given by cmmLitRep of the literal.
-  | CmmUninitialised Int
-        -- uninitialised data, N bytes long
-  | CmmString ByteString
-        -- string of 8-bit values only, not zero terminated.
-
-data CmmStatics
-   = Statics
-       CLabel      -- Label of statics
-       [CmmStatic] -- The static data itself
-
--- -----------------------------------------------------------------------------
--- Basic blocks consisting of lists
-
--- These are used by the LLVM and NCG backends, when populating Cmm
--- with lists of instructions.
-
-data GenBasicBlock i = BasicBlock BlockId [i]
-
--- | The branch block id is that of the first block in
--- the branch, which is that branch's entry point
-blockId :: GenBasicBlock i -> BlockId
-blockId (BasicBlock blk_id _ ) = blk_id
-
-newtype ListGraph i = ListGraph [GenBasicBlock i]
-
-instance Outputable instr => Outputable (ListGraph instr) where
-    ppr (ListGraph blocks) = vcat (map ppr blocks)
-
-instance Outputable instr => Outputable (GenBasicBlock instr) where
-    ppr = pprBBlock
-
-pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc
-pprBBlock (BasicBlock ident stmts) =
-    hang (ppr ident <> colon) 4 (vcat (map ppr stmts))
-
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
deleted file mode 100644
index 81c86fdad5..0000000000
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ /dev/null
@@ -1,892 +0,0 @@
-{-# LANGUAGE GADTs, BangPatterns, RecordWildCards,
-    GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-}
-
-module CmmBuildInfoTables
-  ( CAFSet, CAFEnv, cafAnal
-  , doSRTs, ModuleSRTInfo, emptySRT
-  ) where
-
-import GhcPrelude hiding (succ)
-
-import Id
-import BlockId
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Label
-import Hoopl.Collections
-import Hoopl.Dataflow
-import Module
-import GHC.Platform
-import Digraph
-import CLabel
-import Cmm
-import CmmUtils
-import DynFlags
-import Maybes
-import Outputable
-import SMRep
-import UniqSupply
-import CostCentre
-import GHC.StgToCmm.Heap
-
-import Control.Monad
-import Data.Map (Map)
-import qualified Data.Map as Map
-import Data.Set (Set)
-import qualified Data.Set as Set
-import Data.Tuple
-import Control.Monad.Trans.State
-import Control.Monad.Trans.Class
-
-
-{- Note [SRTs]
-
-SRTs are the mechanism by which the garbage collector can determine
-the live CAFs in the program.
-
-Representation
-^^^^^^^^^^^^^^
-
-+------+
-| info |
-|      |     +-----+---+---+---+
-|   -------->|SRT_2| | | | | 0 |
-|------|     +-----+-|-+-|-+---+
-|      |             |   |
-| code |             |   |
-|      |             v   v
-
-An SRT is simply an object in the program's data segment. It has the
-same representation as a static constructor.  There are 16
-pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info,
-representing SRT objects with 1-16 pointers, respectively.
-
-The entries of an SRT object point to static closures, which are either
-- FUN_STATIC, THUNK_STATIC or CONSTR
-- Another SRT (actually just a CONSTR)
-
-The final field of the SRT is the static link field, used by the
-garbage collector to chain together static closures that it visits and
-to determine whether a static closure has been visited or not. (see
-Note [STATIC_LINK fields])
-
-By traversing the transitive closure of an SRT, the GC will reach all
-of the CAFs that are reachable from the code associated with this SRT.
-
-If we need to create an SRT with more than 16 entries, we build a
-chain of SRT objects with all but the last having 16 entries.
-
-+-----+---+- -+---+---+
-|SRT16| | |   | | | 0 |
-+-----+-|-+- -+-|-+---+
-        |       |
-        v       v
-              +----+---+---+---+
-              |SRT2| | | | | 0 |
-              +----+-|-+-|-+---+
-                     |   |
-                     |   |
-                     v   v
-
-Referring to an SRT from the info table
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The following things have SRTs:
-
-- Static functions (FUN)
-- Static thunks (THUNK), ie. CAFs
-- Continuations (RET_SMALL, etc.)
-
-In each case, the info table points to the SRT.
-
-- info->srt is zero if there's no SRT, otherwise:
-- info->srt == 1 and info->f.srt_offset points to the SRT
-
-e.g. for a FUN with an SRT:
-
-StgFunInfoTable       +------+
-  info->f.srt_offset  |  ------------> offset to SRT object
-StgStdInfoTable       +------+
-  info->layout.ptrs   | ...  |
-  info->layout.nptrs  | ...  |
-  info->srt           |  1   |
-  info->type          | ...  |
-                      |------|
-
-On x86_64, we optimise the info table representation further.  The
-offset to the SRT can be stored in 32 bits (all code lives within a
-2GB region in x86_64's small memory model), so we can save a word in
-the info table by storing the srt_offset in the srt field, which is
-half a word.
-
-On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169):
-
-- info->srt is zero if there's no SRT, otherwise:
-- info->srt is an offset from the info pointer to the SRT object
-
-StgStdInfoTable       +------+
-  info->layout.ptrs   |      |
-  info->layout.nptrs  |      |
-  info->srt           |  ------------> offset to SRT object
-                      |------|
-
-
-EXAMPLE
-^^^^^^^
-
-f = \x. ... g ...
-  where
-    g = \y. ... h ... c1 ...
-    h = \z. ... c2 ...
-
-c1 & c2 are CAFs
-
-g and h are local functions, but they have no static closures.  When
-we generate code for f, we start with a CmmGroup of four CmmDecls:
-
-   [ f_closure, f_entry, g_entry, h_entry ]
-
-we process each CmmDecl separately in cpsTop, giving us a list of
-CmmDecls. e.g. for f_entry, we might end up with
-
-   [ f_entry, f1_ret, f2_proc ]
-
-where f1_ret is a return point, and f2_proc is a proc-point.  We have
-a CAFSet for each of these CmmDecls, let's suppose they are
-
-   [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ]
-   [ g_entry{h_info, c1_closure} ]
-   [ h_entry{c2_closure} ]
-
-Next, we make an SRT for each of these functions:
-
-  f_srt : [g_info]
-  g_srt : [h_info, c1_closure]
-  h_srt : [c2_closure]
-
-Now, for g_info and h_info, we want to refer to the SRTs for g and h
-respectively, which we'll label g_srt and h_srt:
-
-  f_srt : [g_srt]
-  g_srt : [h_srt, c1_closure]
-  h_srt : [c2_closure]
-
-Now, when an SRT has a single entry, we don't actually generate an SRT
-closure for it, instead we just replace references to it with its
-single element.  So, since h_srt == c2_closure, we have
-
-  f_srt : [g_srt]
-  g_srt : [c2_closure, c1_closure]
-  h_srt : [c2_closure]
-
-and the only SRT closure we generate is
-
-  g_srt = SRT_2 [c2_closure, c1_closure]
-
-
-Optimisations
-^^^^^^^^^^^^^
-
-To reduce the code size overhead and the cost of traversing SRTs in
-the GC, we want to simplify SRTs where possible. We therefore apply
-the following optimisations.  Each has a [keyword]; search for the
-keyword in the code below to see where the optimisation is
-implemented.
-
-1. [Inline] we never create an SRT with a single entry, instead we
-   point to the single entry directly from the info table.
-
-   i.e. instead of
-
-    +------+
-    | info |
-    |      |     +-----+---+---+
-    |   -------->|SRT_1| | | 0 |
-    |------|     +-----+-|-+---+
-    |      |             |
-    | code |             |
-    |      |             v
-                         C
-
-   we can point directly to the closure:
-
-    +------+
-    | info |
-    |      |
-    |   -------->C
-    |------|
-    |      |
-    | code |
-    |      |
-
-
-   Furthermore, the SRT for any code that refers to this info table
-   can point directly to C.
-
-   The exception to this is when we're doing dynamic linking. In that
-   case, if the closure is not locally defined then we can't point to
-   it directly from the info table, because this is the text section
-   which cannot contain runtime relocations. In this case we skip this
-   optimisation and generate the singleton SRT, because SRTs are in the
-   data section and *can* have relocatable references.
-
-2. [FUN] A static function closure can also be an SRT, we simply put
-   the SRT entries as fields in the static closure.  This makes a lot
-   of sense: the static references are just like the free variables of
-   the FUN closure.
-
-   i.e. instead of
-
-   f_closure:
-   +-----+---+
-   |  |  | 0 |
-   +- |--+---+
-      |            +------+
-      |            | info |     f_srt:
-      |            |      |     +-----+---+---+---+
-      |            |   -------->|SRT_2| | | | + 0 |
-      `----------->|------|     +-----+-|-+-|-+---+
-                   |      |             |   |
-                   | code |             |   |
-                   |      |             v   v
-
-
-   We can generate:
-
-   f_closure:
-   +-----+---+---+---+
-   |  |  | | | | | 0 |
-   +- |--+-|-+-|-+---+
-      |    |   |   +------+
-      |    v   v   | info |
-      |            |      |
-      |            |   0  |
-      `----------->|------|
-                   |      |
-                   | code |
-                   |      |
-
-
-   (note: we can't do this for THUNKs, because the thunk gets
-   overwritten when it is entered, so we wouldn't be able to share
-   this SRT with other info tables that want to refer to it (see
-   [Common] below). FUNs are immutable so don't have this problem.)
-
-3. [Common] Identical SRTs can be commoned up.
-
-4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also
-   refers to C (perhaps transitively), then we can omit the reference
-   to C from A.
-
-
-Note that there are many other optimisations that we could do, but
-aren't implemented. In general, we could omit any reference from an
-SRT if everything reachable from it is also reachable from the other
-fields in the SRT. Our [Filter] optimisation is a special case of
-this.
-
-Another opportunity we don't exploit is this:
-
-A = {X,Y,Z}
-B = {Y,Z}
-C = {X,B}
-
-Here we could use C = {A} and therefore [Inline] C = A.
--}
-
--- ---------------------------------------------------------------------
-{- Note [Invalid optimisation: shortcutting]
-
-You might think that if we have something like
-
-A's SRT = {B}
-B's SRT = {X}
-
-that we could replace the reference to B in A's SRT with X.
-
-A's SRT = {X}
-B's SRT = {X}
-
-and thereby perhaps save a little work at runtime, because we don't
-have to visit B.
-
-But this is NOT valid.
-
-Consider these cases:
-
-0. B can't be a constructor, because constructors don't have SRTs
-
-1. B is a CAF. This is the easy one. Obviously we want A's SRT to
-   point to B, so that it keeps B alive.
-
-2. B is a function.  This is the tricky one. The reason we can't
-shortcut in this case is that we aren't allowed to resurrect static
-objects.
-
-== How does this cause a problem? ==
-
-The particular case that cropped up when we tried this was #15544.
-- A is a thunk
-- B is a static function
-- X is a CAF
-- suppose we GC when A is alive, and B is not otherwise reachable.
-- B is "collected", meaning that it doesn't make it onto the static
-  objects list during this GC, but nothing bad happens yet.
-- Next, suppose we enter A, and then call B. (remember that A refers to B)
-  At the entry point to B, we GC. This puts B on the stack, as part of the
-  RET_FUN stack frame that gets pushed when we GC at a function entry point.
-- This GC will now reach B
-- But because B was previous "collected", it breaks the assumption
-  that static objects are never resurrected. See Note [STATIC_LINK
-  fields] in rts/sm/Storage.h for why this is bad.
-- In practice, the GC thinks that B has already been visited, and so
-  doesn't visit X, and catastrophe ensues.
-
-== Isn't this caused by the RET_FUN business? ==
-
-Maybe, but could you prove that RET_FUN is the only way that
-resurrection can occur?
-
-So, no shortcutting.
--}
-
--- ---------------------------------------------------------------------
--- Label types
-
--- Labels that come from cafAnal can be:
---   - _closure labels for static functions or CAFs
---   - _info labels for dynamic functions, thunks, or continuations
---   - _entry labels for functions or thunks
---
--- Meanwhile the labels on top-level blocks are _entry labels.
---
--- To put everything in the same namespace we convert all labels to
--- closure labels using toClosureLbl.  Note that some of these
--- labels will not actually exist; that's ok because we're going to
--- map them to SRTEntry later, which ranges over labels that do exist.
---
-newtype CAFLabel = CAFLabel CLabel
-  deriving (Eq,Ord,Outputable)
-
-type CAFSet = Set CAFLabel
-type CAFEnv = LabelMap CAFSet
-
-mkCAFLabel :: CLabel -> CAFLabel
-mkCAFLabel lbl = CAFLabel (toClosureLbl lbl)
-
--- This is a label that we can put in an SRT.  It *must* be a closure label,
--- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR.
-newtype SRTEntry = SRTEntry CLabel
-  deriving (Eq, Ord, Outputable)
-
--- ---------------------------------------------------------------------
--- CAF analysis
-
--- |
--- For each code block:
---   - collect the references reachable from this code block to FUN,
---     THUNK or RET labels for which hasCAF == True
---
--- This gives us a `CAFEnv`: a mapping from code block to sets of labels
---
-cafAnal
-  :: LabelSet   -- The blocks representing continuations, ie. those
-                -- that will get RET info tables.  These labels will
-                -- get their own SRTs, so we don't aggregate CAFs from
-                -- references to these labels, we just use the label.
-  -> CLabel     -- The top label of the proc
-  -> CmmGraph
-  -> CAFEnv
-cafAnal contLbls topLbl cmmGraph =
-  analyzeCmmBwd cafLattice
-    (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty
-
-
-cafLattice :: DataflowLattice CAFSet
-cafLattice = DataflowLattice Set.empty add
-  where
-    add (OldFact old) (NewFact new) =
-        let !new' = old `Set.union` new
-        in changedIf (Set.size new' > Set.size old) new'
-
-
-cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet
-cafTransfers contLbls entry topLbl
-  (BlockCC eNode middle xNode) fBase =
-    let joined = cafsInNode xNode $! live'
-        !result = foldNodesBwdOO cafsInNode middle joined
-
-        facts = mapMaybe successorFact (successors xNode)
-        live' = joinFacts cafLattice facts
-
-        successorFact s
-          -- If this is a loop back to the entry, we can refer to the
-          -- entry label.
-          | s == entry = Just (add topLbl Set.empty)
-          -- If this is a continuation, we want to refer to the
-          -- SRT for the continuation's info table
-          | s `setMember` contLbls
-          = Just (Set.singleton (mkCAFLabel (infoTblLbl s)))
-          -- Otherwise, takes the CAF references from the destination
-          | otherwise
-          = lookupFact s fBase
-
-        cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
-        cafsInNode node set = foldExpDeep addCaf node set
-
-        addCaf expr !set =
-          case expr of
-              CmmLit (CmmLabel c) -> add c set
-              CmmLit (CmmLabelOff c _) -> add c set
-              CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set
-              _ -> set
-        add l s | hasCAF l  = Set.insert (mkCAFLabel l) s
-                | otherwise = s
-
-    in mapSingleton (entryLabel eNode) result
-
-
--- -----------------------------------------------------------------------------
--- ModuleSRTInfo
-
-data ModuleSRTInfo = ModuleSRTInfo
-  { thisModule :: Module
-    -- ^ Current module being compiled. Required for calling labelDynamic.
-  , dedupSRTs :: Map (Set SRTEntry) SRTEntry
-    -- ^ previous SRTs we've emitted, so we can de-duplicate.
-    -- Used to implement the [Common] optimisation.
-  , flatSRTs :: Map SRTEntry (Set SRTEntry)
-    -- ^ The reverse mapping, so that we can remove redundant
-    -- entries. e.g.  if we have an SRT [a,b,c], and we know that b
-    -- points to [c,d], we can omit c and emit [a,b].
-    -- Used to implement the [Filter] optimisation.
-  }
-instance Outputable ModuleSRTInfo where
-  ppr ModuleSRTInfo{..} =
-    text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs
-
-emptySRT :: Module -> ModuleSRTInfo
-emptySRT mod =
-  ModuleSRTInfo
-    { thisModule = mod
-    , dedupSRTs = Map.empty
-    , flatSRTs = Map.empty }
-
--- -----------------------------------------------------------------------------
--- Constructing SRTs
-
-{- Implementation notes
-
-- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable
-
-- The entry in info_tbls corresponding to g_entry is the closure info
-  table, the rest are continuations.
-
-- Each entry in info_tbls possibly needs an SRT.  We need to make a
-  label for each of these.
-
-- We get the CAFSet for each entry from the CAFEnv
-
--}
-
--- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl,
---   where the label is
---   - the info label for a continuation or dynamic closure
---   - the closure label for a top-level function (not a CAF)
-getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)]
-getLabelledBlocks (CmmData _ _) = []
-getLabelledBlocks (CmmProc top_info _ _ _) =
-  [ (blockId, mkCAFLabel (cit_lbl info))
-  | (blockId, info) <- mapToList (info_tbls top_info)
-  , let rep = cit_rep info
-  , not (isStaticRep rep) || not (isThunkRep rep)
-  ]
-
-
--- | Put the labelled blocks that we will be annotating with SRTs into
--- dependency order.  This is so that we can process them one at a
--- time, resolving references to earlier blocks to point to their
--- SRTs. CAFs themselves are not included here; see getCAFs below.
-depAnalSRTs
-  :: CAFEnv
-  -> [CmmDecl]
-  -> [SCC (Label, CAFLabel, Set CAFLabel)]
-depAnalSRTs cafEnv decls =
-  srtTrace "depAnalSRTs" (ppr graph) graph
- where
-  labelledBlocks = concatMap getLabelledBlocks decls
-  labelToBlock = Map.fromList (map swap labelledBlocks)
-  graph = stronglyConnCompFromEdgedVerticesOrd
-             [ let cafs' = Set.delete lbl cafs in
-               DigraphNode (l,lbl,cafs') l
-                 (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
-             | (l, lbl) <- labelledBlocks
-             , Just cafs <- [mapLookup l cafEnv] ]
-
-
--- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF.
--- These are treated differently from other labelled blocks:
---  - we never shortcut a reference to a CAF to the contents of its
---    SRT, since the point of SRTs is to keep CAFs alive.
---  - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
---    instead we generate their SRTs after everything else.
-getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)]
-getCAFs cafEnv decls =
-  [ (g_entry g, mkCAFLabel topLbl, cafs)
-  | CmmProc top_info topLbl _ g <- decls
-  , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
-  , let rep = cit_rep info
-  , isStaticRep rep && isThunkRep rep
-  , Just cafs <- [mapLookup (g_entry g) cafEnv]
-  ]
-
-
--- | Get the list of blocks that correspond to the entry points for
--- FUN_STATIC closures.  These are the blocks for which if we have an
--- SRT we can merge it with the static closure. [FUN]
-getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)]
-getStaticFuns decls =
-  [ (g_entry g, lbl)
-  | CmmProc top_info _ _ g <- decls
-  , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
-  , Just (id, _) <- [cit_clo info]
-  , let rep = cit_rep info
-  , isStaticRep rep && isFunRep rep
-  , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
-  ]
-
-
--- | Maps labels from 'cafAnal' to the final CLabel that will appear
--- in the SRT.
---   - closures with singleton SRTs resolve to their single entry
---   - closures with larger SRTs map to the label for that SRT
---   - CAFs must not map to anything!
---   - if a labels maps to Nothing, we found that this label's SRT
---     is empty, so we don't need to refer to it from other SRTs.
-type SRTMap = Map CAFLabel (Maybe SRTEntry)
-
--- | resolve a CAFLabel to its SRTEntry using the SRTMap
-resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry
-resolveCAF srtMap lbl@(CAFLabel l) =
-  Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
-
-
--- | Attach SRTs to all info tables in the CmmDecls, and add SRT
--- declarations to the ModuleSRTInfo.
---
-doSRTs
-  :: DynFlags
-  -> ModuleSRTInfo
-  -> [(CAFEnv, [CmmDecl])]
-  -> IO (ModuleSRTInfo, [CmmDecl])
-
-doSRTs dflags moduleSRTInfo tops = do
-  us <- mkSplitUniqSupply 'u'
-
-  -- Ignore the original grouping of decls, and combine all the
-  -- CAFEnvs into a single CAFEnv.
-  let (cafEnvs, declss) = unzip tops
-      cafEnv = mapUnions cafEnvs
-      decls = concat declss
-      staticFuns = mapFromList (getStaticFuns decls)
-
-  -- Put the decls in dependency order. Why? So that we can implement
-  -- [Inline] and [Filter].  If we need to refer to an SRT that has
-  -- a single entry, we use the entry itself, which means that we
-  -- don't need to generate the singleton SRT in the first place.  But
-  -- to do this we need to process blocks before things that depend on
-  -- them.
-  let
-    sccs = depAnalSRTs cafEnv decls
-    cafsWithSRTs = getCAFs cafEnv decls
-
-  -- On each strongly-connected group of decls, construct the SRT
-  -- closures and the SRT fields for info tables.
-  let result ::
-        [ ( [CmmDecl]              -- generated SRTs
-          , [(Label, CLabel)]      -- SRT fields for info tables
-          , [(Label, [SRTEntry])]  -- SRTs to attach to static functions
-          ) ]
-      ((result, _srtMap), moduleSRTInfo') =
-        initUs_ us $
-        flip runStateT moduleSRTInfo $
-        flip runStateT Map.empty $ do
-          nonCAFs <- mapM (doSCC dflags staticFuns) sccs
-          cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
-            oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs
-          return (nonCAFs ++ cAFs)
-
-      (declss, pairs, funSRTs) = unzip3 result
-
-  -- Next, update the info tables with the SRTs
-  let
-    srtFieldMap = mapFromList (concat pairs)
-    funSRTMap = mapFromList (concat funSRTs)
-    decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls
-
-  return (moduleSRTInfo', concat declss ++ decls')
-
-
--- | Build the SRT for a strongly-connected component of blocks
-doSCC
-  :: DynFlags
-  -> LabelMap CLabel           -- which blocks are static function entry points
-  -> SCC (Label, CAFLabel, Set CAFLabel)
-  -> StateT SRTMap
-        (StateT ModuleSRTInfo UniqSM)
-        ( [CmmDecl]              -- generated SRTs
-        , [(Label, CLabel)]      -- SRT fields for info tables
-        , [(Label, [SRTEntry])]  -- SRTs to attach to static functions
-        )
-
-doSCC dflags staticFuns  (AcyclicSCC (l, cafLbl, cafs)) =
-  oneSRT dflags staticFuns [l] [cafLbl] False cafs
-
-doSCC dflags staticFuns (CyclicSCC nodes) = do
-  -- build a single SRT for the whole cycle, see Note [recursive SRTs]
-  let (blockids, lbls, cafsets) = unzip3 nodes
-      cafs = Set.unions cafsets
-  oneSRT dflags staticFuns blockids lbls False cafs
-
-
-{- Note [recursive SRTs]
-
-If the dependency analyser has found us a recursive group of
-declarations, then we build a single SRT for the whole group, on the
-grounds that everything in the group is reachable from everything
-else, so we lose nothing by having a single SRT.
-
-However, there are a couple of wrinkles to be aware of.
-
-* The Set CAFLabel for this SRT will contain labels in the group
-itself. The SRTMap will therefore not contain entries for these labels
-yet, so we can't turn them into SRTEntries using resolveCAF. BUT we
-can just remove recursive references from the Set CAFLabel before
-generating the SRT - the SRT will still contain all the CAFLabels that
-we need to refer to from this group's SRT.
-
-* That is, EXCEPT for static function closures. For the same reason
-described in Note [Invalid optimisation: shortcutting], we cannot omit
-references to static function closures.
-  - But, since we will merge the SRT with one of the static function
-    closures (see [FUN]), we can omit references to *that* static
-    function closure from the SRT.
--}
-
--- | Build an SRT for a set of blocks
-oneSRT
-  :: DynFlags
-  -> LabelMap CLabel            -- which blocks are static function entry points
-  -> [Label]                    -- blocks in this set
-  -> [CAFLabel]                 -- labels for those blocks
-  -> Bool                       -- True <=> this SRT is for a CAF
-  -> Set CAFLabel               -- SRT for this set
-  -> StateT SRTMap
-       (StateT ModuleSRTInfo UniqSM)
-       ( [CmmDecl]                    -- SRT objects we built
-       , [(Label, CLabel)]            -- SRT fields for these blocks' itbls
-       , [(Label, [SRTEntry])]        -- SRTs to attach to static functions
-       )
-
-oneSRT dflags staticFuns blockids lbls isCAF cafs = do
-  srtMap <- get
-  topSRT <- lift get
-  let
-    -- Can we merge this SRT with a FUN_STATIC closure?
-    (maybeFunClosure, otherFunLabels) =
-      case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of
-        [] -> (Nothing, [])
-        ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs)
-
-    -- Remove recursive references from the SRT, except for (all but
-    -- one of the) static functions. See Note [recursive SRTs].
-    nonRec = cafs `Set.difference`
-      (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels)
-
-    -- First resolve all the CAFLabels to SRTEntries
-    -- Implements the [Inline] optimisation.
-    resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec)
-
-    -- The set of all SRTEntries in SRTs that we refer to from here.
-    allBelow =
-      Set.unions [ lbls | caf <- resolved
-                        , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ]
-
-    -- Remove SRTEntries that are also in an SRT that we refer to.
-    -- Implements the [Filter] optimisation.
-    filtered = Set.difference (Set.fromList resolved) allBelow
-
-  srtTrace "oneSRT:"
-     (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return ()
-
-  let
-    isStaticFun = isJust maybeFunClosure
-
-    -- For a label without a closure (e.g. a continuation), we must
-    -- update the SRTMap for the label to point to a closure. It's
-    -- important that we don't do this for static functions or CAFs,
-    -- see Note [Invalid optimisation: shortcutting].
-    updateSRTMap srtEntry =
-      when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do
-        let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls]
-        put (Map.union newSRTMap srtMap)
-
-    this_mod = thisModule topSRT
-
-  case Set.toList filtered of
-    [] -> do
-      srtTrace "oneSRT: empty" (ppr lbls) $ return ()
-      updateSRTMap Nothing
-      return ([], [], [])
-
-    -- [Inline] - when we have only one entry there is no need to
-    -- build an SRT object at all, instead we put the singleton SRT
-    -- entry in the info table.
-    [one@(SRTEntry lbl)]
-      | -- Info tables refer to SRTs by offset (as noted in the section
-        -- "Referring to an SRT from the info table" of Note [SRTs]). However,
-        -- when dynamic linking is used we cannot guarantee that the offset
-        -- between the SRT and the info table will fit in the offset field.
-        -- Consequently we build a singleton SRT in in this case.
-        not (labelDynamic dflags this_mod lbl)
-
-        -- MachO relocations can't express offsets between compilation units at
-        -- all, so we are always forced to build a singleton SRT in this case.
-          && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
-             || isLocalCLabel this_mod lbl) -> do
-
-        -- If we have a static function closure, then it becomes the
-        -- SRT object, and everything else points to it. (the only way
-        -- we could have multiple labels here is if this is a
-        -- recursive group, see Note [recursive SRTs])
-        case maybeFunClosure of
-          Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, [])
-            where
-              withLabels =
-                [ (b, if b == staticFunBlock then lbl else staticFunLbl)
-                | b <- blockids ]
-          Nothing -> do
-            updateSRTMap (Just one)
-            return ([], map (,lbl) blockids, [])
-
-    cafList ->
-      -- Check whether an SRT with the same entries has been emitted already.
-      -- Implements the [Common] optimisation.
-      case Map.lookup filtered (dedupSRTs topSRT) of
-        Just srtEntry@(SRTEntry srtLbl)  -> do
-          srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return ()
-          updateSRTMap (Just srtEntry)
-          return ([], map (,srtLbl) blockids, [])
-        Nothing -> do
-          -- No duplicates: we have to build a new SRT object
-          srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return ()
-          (decls, funSRTs, srtEntry) <-
-            case maybeFunClosure of
-              Just (fun,block) ->
-                return ( [], [(block, cafList)], SRTEntry fun )
-              Nothing -> do
-                (decls, entry) <- lift . lift $ buildSRTChain dflags cafList
-                return (decls, [], entry)
-          updateSRTMap (Just srtEntry)
-          let allBelowThis = Set.union allBelow filtered
-              oldFlatSRTs = flatSRTs topSRT
-              newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs
-              newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
-          lift (put (topSRT { dedupSRTs = newDedupSRTs
-                            , flatSRTs = newFlatSRTs }))
-          let SRTEntry lbl = srtEntry
-          return (decls, map (,lbl) blockids, funSRTs)
-
-
--- | build a static SRT object (or a chain of objects) from a list of
--- SRTEntries.
-buildSRTChain
-   :: DynFlags
-   -> [SRTEntry]
-   -> UniqSM
-        ( [CmmDecl]    -- The SRT object(s)
-        , SRTEntry     -- label to use in the info table
-        )
-buildSRTChain _ [] = panic "buildSRT: empty"
-buildSRTChain dflags cafSet =
-  case splitAt mAX_SRT_SIZE cafSet of
-    (these, []) -> do
-      (decl,lbl) <- buildSRT dflags these
-      return ([decl], lbl)
-    (these,those) -> do
-      (rest, rest_lbl) <- buildSRTChain dflags (head these : those)
-      (decl,lbl) <- buildSRT dflags (rest_lbl : tail these)
-      return (decl:rest, lbl)
-  where
-    mAX_SRT_SIZE = 16
-
-
-buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry)
-buildSRT dflags refs = do
-  id <- getUniqueM
-  let
-    lbl = mkSRTLabel id
-    srt_n_info = mkSRTInfoLabel (length refs)
-    fields =
-      mkStaticClosure dflags srt_n_info dontCareCCS
-        [ CmmLabel lbl | SRTEntry lbl <- refs ]
-        [] -- no padding
-        [mkIntCLit dflags 0] -- link field
-        [] -- no saved info
-  return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
-
-
--- | Update info tables with references to their SRTs. Also generate
--- static closures, splicing in SRT fields as necessary.
-updInfoSRTs
-  :: DynFlags
-  -> LabelMap CLabel               -- SRT labels for each block
-  -> LabelMap [SRTEntry]           -- SRTs to merge into FUN_STATIC closures
-  -> CmmDecl
-  -> [CmmDecl]
-
-updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
-  | Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
-  | otherwise = [ proc ]
-  where
-    proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g
-    newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info)
-    updInfoTbl l info_tbl
-      | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf
-      | otherwise  = info_tbl { cit_srt = mapLookup l srt_env }
-
-    -- Generate static closures [FUN].  Note that this also generates
-    -- static closures for thunks (CAFs), because it's easier to treat
-    -- them uniformly in the code generator.
-    maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl)
-    maybeStaticClosure
-      | Just info_tbl@CmmInfoTable{..} <-
-           mapLookup (g_entry g) (info_tbls top_info)
-      , Just (id, ccs) <- cit_clo
-      , isStaticRep cit_rep =
-        let
-          (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of
-            Nothing ->
-              -- if we don't add SRT entries to this closure, then we
-              -- want to set the srt field in its info table as usual
-              (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, [])
-            Just srtEntries -> srtTrace "maybeStaticFun" (ppr res)
-              (info_tbl { cit_rep = new_rep }, res)
-              where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
-          fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id)
-            srtEntries
-          new_rep = case cit_rep of
-             HeapRep sta ptrs nptrs ty ->
-               HeapRep sta (ptrs + length srtEntries) nptrs ty
-             _other -> panic "maybeStaticFun"
-          lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
-        in
-          Just (newInfo, mkDataLits (Section Data lbl) lbl fields)
-      | otherwise = Nothing
-
-updInfoSRTs _ _ _ t = [t]
-
-
-srtTrace :: String -> SDoc -> b -> b
--- srtTrace = pprTrace
-srtTrace _ _ b = b
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
deleted file mode 100644
index df1eaad005..0000000000
--- a/compiler/cmm/CmmCallConv.hs
+++ /dev/null
@@ -1,212 +0,0 @@
-module CmmCallConv (
-  ParamLocation(..),
-  assignArgumentsPos,
-  assignStack,
-  realArgRegsCover
-) where
-
-import GhcPrelude
-
-import CmmExpr
-import SMRep
-import Cmm (Convention(..))
-import PprCmm () -- For Outputable instances
-
-import DynFlags
-import GHC.Platform
-import Outputable
-
--- Calculate the 'GlobalReg' or stack locations for function call
--- parameters as used by the Cmm calling convention.
-
-data ParamLocation
-  = RegisterParam GlobalReg
-  | StackParam ByteOff
-
-instance Outputable ParamLocation where
-  ppr (RegisterParam g) = ppr g
-  ppr (StackParam p)    = ppr p
-
--- |
--- Given a list of arguments, and a function that tells their types,
--- return a list showing where each argument is passed
---
-assignArgumentsPos :: DynFlags
-                   -> ByteOff           -- stack offset to start with
-                   -> Convention
-                   -> (a -> CmmType)    -- how to get a type from an arg
-                   -> [a]               -- args
-                   -> (
-                        ByteOff              -- bytes of stack args
-                      , [(a, ParamLocation)] -- args and locations
-                      )
-
-assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
-    where
-      regs = case (reps, conv) of
-               (_,   NativeNodeCall)   -> getRegsWithNode dflags
-               (_,   NativeDirectCall) -> getRegsWithoutNode dflags
-               ([_], NativeReturn)     -> allRegs dflags
-               (_,   NativeReturn)     -> getRegsWithNode dflags
-               -- GC calling convention *must* put values in registers
-               (_,   GC)               -> allRegs dflags
-               (_,   Slow)             -> nodeOnly
-      -- The calling conventions first assign arguments to registers,
-      -- then switch to the stack when we first run out of registers
-      -- (even if there are still available registers for args of a
-      -- different type).  When returning an unboxed tuple, we also
-      -- separate the stack arguments by pointerhood.
-      (reg_assts, stk_args)  = assign_regs [] reps regs
-      (stk_off,   stk_assts) = assignStack dflags off arg_ty stk_args
-      assignments = reg_assts ++ stk_assts
-
-      assign_regs assts []     _    = (assts, [])
-      assign_regs assts (r:rs) regs | isVecType ty   = vec
-                                    | isFloatType ty = float
-                                    | otherwise      = int
-        where vec = case (w, regs) of
-                      (W128, (vs, fs, ds, ls, s:ss))
-                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
-                      (W256, (vs, fs, ds, ls, s:ss))
-                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
-                      (W512, (vs, fs, ds, ls, s:ss))
-                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
-                      _ -> (assts, (r:rs))
-              float = case (w, regs) of
-                        (W32, (vs, fs, ds, ls, s:ss))
-                            | passFloatInXmm          -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
-                        (W32, (vs, f:fs, ds, ls, ss))
-                            | not passFloatInXmm      -> k (RegisterParam f, (vs, fs, ds, ls, ss))
-                        (W64, (vs, fs, ds, ls, s:ss))
-                            | passFloatInXmm          -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
-                        (W64, (vs, fs, d:ds, ls, ss))
-                            | not passFloatInXmm      -> k (RegisterParam d, (vs, fs, ds, ls, ss))
-                        _ -> (assts, (r:rs))
-              int = case (w, regs) of
-                      (W128, _) -> panic "W128 unsupported register type"
-                      (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags)
-                          -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss))
-                      (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
-                          -> k (RegisterParam l, (vs, fs, ds, ls, ss))
-                      _   -> (assts, (r:rs))
-              k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
-              ty = arg_ty r
-              w  = typeWidth ty
-              gcp | isGcPtrType ty = VGcPtr
-                  | otherwise      = VNonGcPtr
-              passFloatInXmm = passFloatArgsInXmm dflags
-
-passFloatArgsInXmm :: DynFlags -> Bool
-passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
-                              ArchX86_64 -> True
-                              ArchX86    -> False
-                              _          -> False
-
--- We used to spill vector registers to the stack since the LLVM backend didn't
--- support vector registers in its calling convention. However, this has now
--- been fixed. This function remains only as a convenient way to re-enable
--- spilling when debugging code generation.
-passVectorInReg :: Width -> DynFlags -> Bool
-passVectorInReg _ _ = True
-
-assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
-            -> (
-                 ByteOff              -- bytes of stack args
-               , [(a, ParamLocation)] -- args and locations
-               )
-assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
- where
-      assign_stk offset assts [] = (offset, assts)
-      assign_stk offset assts (r:rs)
-        = assign_stk off' ((r, StackParam off') : assts) rs
-        where w    = typeWidth (arg_ty r)
-              off' = offset + size
-              -- Stack arguments always take a whole number of words, we never
-              -- pack them unlike constructor fields.
-              size = roundUpToWords dflags (widthInBytes w)
-
------------------------------------------------------------------------------
--- Local information about the registers available
-
-type AvailRegs = ( [VGcPtr -> GlobalReg]   -- available vanilla regs.
-                 , [GlobalReg]   -- floats
-                 , [GlobalReg]   -- doubles
-                 , [GlobalReg]   -- longs (int64 and word64)
-                 , [Int]         -- XMM (floats and doubles)
-                 )
-
--- Vanilla registers can contain pointers, Ints, Chars.
--- Floats and doubles have separate register supplies.
---
--- We take these register supplies from the *real* registers, i.e. those
--- that are guaranteed to map to machine registers.
-
-getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs
-getRegsWithoutNode dflags =
-  ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags)
-  , realFloatRegs dflags
-  , realDoubleRegs dflags
-  , realLongRegs dflags
-  , realXmmRegNos dflags)
-
--- getRegsWithNode uses R1/node even if it isn't a register
-getRegsWithNode dflags =
-  ( if null (realVanillaRegs dflags)
-    then [VanillaReg 1]
-    else realVanillaRegs dflags
-  , realFloatRegs dflags
-  , realDoubleRegs dflags
-  , realLongRegs dflags
-  , realXmmRegNos dflags)
-
-allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg]
-allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
-allXmmRegs :: DynFlags -> [Int]
-
-allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags)
-allFloatRegs   dflags = map FloatReg   $ regList (mAX_Float_REG   dflags)
-allDoubleRegs  dflags = map DoubleReg  $ regList (mAX_Double_REG  dflags)
-allLongRegs    dflags = map LongReg    $ regList (mAX_Long_REG    dflags)
-allXmmRegs     dflags =                  regList (mAX_XMM_REG     dflags)
-
-realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg]
-realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
-realXmmRegNos :: DynFlags -> [Int]
-
-realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
-realFloatRegs   dflags = map FloatReg   $ regList (mAX_Real_Float_REG   dflags)
-realDoubleRegs  dflags = map DoubleReg  $ regList (mAX_Real_Double_REG  dflags)
-realLongRegs    dflags = map LongReg    $ regList (mAX_Real_Long_REG    dflags)
-
-realXmmRegNos dflags
-    | isSse2Enabled dflags = regList (mAX_Real_XMM_REG     dflags)
-    | otherwise            = []
-
-regList :: Int -> [Int]
-regList n = [1 .. n]
-
-allRegs :: DynFlags -> AvailRegs
-allRegs dflags = (allVanillaRegs dflags,
-                  allFloatRegs dflags,
-                  allDoubleRegs dflags,
-                  allLongRegs dflags,
-                  allXmmRegs dflags)
-
-nodeOnly :: AvailRegs
-nodeOnly = ([VanillaReg 1], [], [], [], [])
-
--- This returns the set of global registers that *cover* the machine registers
--- used for argument passing. On platforms where registers can overlap---right
--- now just x86-64, where Float and Double registers overlap---passing this set
--- of registers is guaranteed to preserve the contents of all live registers. We
--- only use this functionality in hand-written C-- code in the RTS.
-realArgRegsCover :: DynFlags -> [GlobalReg]
-realArgRegsCover dflags
-    | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
-                                  realLongRegs dflags ++
-                                  map XmmReg (realXmmRegNos dflags)
-    | otherwise                 = map ($VGcPtr) (realVanillaRegs dflags) ++
-                                  realFloatRegs dflags ++
-                                  realDoubleRegs dflags ++
-                                  realLongRegs dflags ++
-                                  map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
deleted file mode 100644
index cbf7d83d36..0000000000
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ /dev/null
@@ -1,320 +0,0 @@
-{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-}
-
-module CmmCommonBlockElim
-  ( elimCommonBlocks
-  )
-where
-
-
-import GhcPrelude hiding (iterate, succ, unzip, zip)
-
-import BlockId
-import Cmm
-import CmmUtils
-import CmmSwitch (eqSwitchTargetWith)
-import CmmContFlowOpt
-
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Label
-import Hoopl.Collections
-import Data.Bits
-import Data.Maybe (mapMaybe)
-import qualified Data.List as List
-import Data.Word
-import qualified Data.Map as M
-import Outputable
-import qualified TrieMap as TM
-import UniqFM
-import Unique
-import Control.Arrow (first, second)
-
--- -----------------------------------------------------------------------------
--- Eliminate common blocks
-
--- If two blocks are identical except for the label on the first node,
--- then we can eliminate one of the blocks. To ensure that the semantics
--- of the program are preserved, we have to rewrite each predecessor of the
--- eliminated block to proceed with the block we keep.
-
--- The algorithm iterates over the blocks in the graph,
--- checking whether it has seen another block that is equal modulo labels.
--- If so, then it adds an entry in a map indicating that the new block
--- is made redundant by the old block.
--- Otherwise, it is added to the useful blocks.
-
--- To avoid comparing every block with every other block repeatedly, we group
--- them by
---   * a hash of the block, ignoring labels (explained below)
---   * the list of outgoing labels
--- The hash is invariant under relabeling, so we only ever compare within
--- the same group of blocks.
---
--- The list of outgoing labels is updated as we merge blocks (that is why they
--- are not included in the hash, which we want to calculate only once).
---
--- All in all, two blocks should never be compared if they have different
--- hashes, and at most once otherwise. Previously, we were slower, and people
--- rightfully complained: #10397
-
--- TODO: Use optimization fuel
-elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g = replaceLabels env $ copyTicks env g
-  where
-     env = iterate mapEmpty blocks_with_key
-     -- The order of blocks doesn't matter here. While we could use
-     -- revPostorder which drops unreachable blocks this is done in
-     -- ContFlowOpt already which runs before this pass. So we use
-     -- toBlockList since it is faster.
-     groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]]
-     blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
-
--- Invariant: The blocks in the list are pairwise distinct
--- (so avoid comparing them again)
-type DistinctBlocks = [CmmBlock]
-type Key = [Label]
-type Subst = LabelMap BlockId
-
--- The outer list groups by hash. We retain this grouping throughout.
-iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
-iterate subst blocks
-    | mapNull new_substs = subst
-    | otherwise = iterate subst' updated_blocks
-  where
-    grouped_blocks :: [[(Key, [DistinctBlocks])]]
-    grouped_blocks = map groupByLabel blocks
-
-    merged_blocks :: [[(Key, DistinctBlocks)]]
-    (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks
-      where
-        go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db))
-          where
-            (new_subst2, db) = mergeBlockList subst dbs
-
-    subst' = subst `mapUnion` new_substs
-    updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
-
--- Combine two lists of blocks.
--- While they are internally distinct they can still share common blocks.
-mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks)
-mergeBlocks subst existing new = go new
-  where
-    go [] = (mapEmpty, existing)
-    go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of
-        -- This block is a duplicate. Drop it, and add it to the substitution
-        Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs
-        -- This block is not a duplicate, keep it.
-        Nothing -> second (b:) $ go bs
-
-mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks)
-mergeBlockList _ [] = pprPanic "mergeBlockList" empty
-mergeBlockList subst (b:bs) = go mapEmpty b bs
-  where
-    go !new_subst1 b [] = (new_subst1, b)
-    go !new_subst1 b1 (b2:bs) = go new_subst b bs
-      where
-        (new_subst2, b) =  mergeBlocks subst b1 b2
-        new_subst = new_subst1 `mapUnion` new_subst2
-
-
--- -----------------------------------------------------------------------------
--- Hashing and equality on blocks
-
--- Below here is mostly boilerplate: hashing blocks ignoring labels,
--- and comparing blocks modulo a label mapping.
-
--- To speed up comparisons, we hash each basic block modulo jump labels.
--- The hashing is a bit arbitrary (the numbers are completely arbitrary),
--- but it should be fast and good enough.
-
--- We want to get as many small buckets as possible, as comparing blocks is
--- expensive. So include as much as possible in the hash. Ideally everything
--- that is compared with (==) in eqBlockBodyWith.
-
-type HashCode = Int
-
-hash_block :: CmmBlock -> HashCode
-hash_block block =
-  fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
-  -- UniqFM doesn't like negative Ints
-  where hash_fst _ h = h
-        hash_mid m h = hash_node m + h `shiftL` 1
-        hash_lst m h = hash_node m + h `shiftL` 1
-
-        hash_node :: CmmNode O x -> Word32
-        hash_node n | dont_care n = 0 -- don't care
-        hash_node (CmmAssign r e) = hash_reg r + hash_e e
-        hash_node (CmmStore e e') = hash_e e + hash_e e'
-        hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
-        hash_node (CmmBranch _) = 23 -- NB. ignore the label
-        hash_node (CmmCondBranch p _ _ _) = hash_e p
-        hash_node (CmmCall e _ _ _ _ _) = hash_e e
-        hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t
-        hash_node (CmmSwitch e _) = hash_e e
-        hash_node _ = error "hash_node: unknown Cmm node!"
-
-        hash_reg :: CmmReg -> Word32
-        hash_reg   (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397
-        hash_reg   (CmmGlobal _)    = 19
-
-        hash_e :: CmmExpr -> Word32
-        hash_e (CmmLit l) = hash_lit l
-        hash_e (CmmLoad e _) = 67 + hash_e e
-        hash_e (CmmReg r) = hash_reg r
-        hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check
-        hash_e (CmmRegOff r i) = hash_reg r + cvt i
-        hash_e (CmmStackSlot _ _) = 13
-
-        hash_lit :: CmmLit -> Word32
-        hash_lit (CmmInt i _) = fromInteger i
-        hash_lit (CmmFloat r _) = truncate r
-        hash_lit (CmmVec ls) = hash_list hash_lit ls
-        hash_lit (CmmLabel _) = 119 -- ugh
-        hash_lit (CmmLabelOff _ i) = cvt $ 199 + i
-        hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i
-        hash_lit (CmmBlock _) = 191 -- ugh
-        hash_lit (CmmHighStackMark) = cvt 313
-
-        hash_tgt (ForeignTarget e _) = hash_e e
-        hash_tgt (PrimTarget _) = 31 -- lots of these
-
-        hash_list f = foldl' (\z x -> f x + z) (0::Word32)
-
-        cvt = fromInteger . toInteger
-
-        hash_unique :: Uniquable a => a -> Word32
-        hash_unique = cvt . getKey . getUnique
-
--- | Ignore these node types for equality
-dont_care :: CmmNode O x -> Bool
-dont_care CmmComment {}  = True
-dont_care CmmTick {}     = True
-dont_care CmmUnwind {}   = True
-dont_care _other         = False
-
--- Utilities: equality and substitution on the graph.
-
--- Given a map ``subst'' from BlockID -> BlockID, we define equality.
-eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool
-eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
-lookupBid :: LabelMap BlockId -> BlockId -> BlockId
-lookupBid subst bid = case mapLookup bid subst of
-                        Just bid  -> lookupBid subst bid
-                        Nothing -> bid
-
--- Middle nodes and expressions can contain BlockIds, in particular in
--- CmmStackSlot and CmmBlock, so we have to use a special equality for
--- these.
---
-eqMiddleWith :: (BlockId -> BlockId -> Bool)
-             -> CmmNode O O -> CmmNode O O -> Bool
-eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
-  = r1 == r2 && eqExprWith eqBid e1 e2
-eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
-  = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
-eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
-                   (CmmUnsafeForeignCall t2 r2 a2)
-  = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2
-eqMiddleWith _ _ _ = False
-
-eqExprWith :: (BlockId -> BlockId -> Bool)
-           -> CmmExpr -> CmmExpr -> Bool
-eqExprWith eqBid = eq
- where
-  CmmLit l1          `eq` CmmLit l2          = eqLit l1 l2
-  CmmLoad e1 _       `eq` CmmLoad e2 _       = e1 `eq` e2
-  CmmReg r1          `eq` CmmReg r2          = r1==r2
-  CmmRegOff r1 i1    `eq` CmmRegOff r2 i2    = r1==r2 && i1==i2
-  CmmMachOp op1 es1  `eq` CmmMachOp op2 es2  = op1==op2 && es1 `eqs` es2
-  CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
-  _e1                `eq` _e2                = False
-
-  xs `eqs` ys = eqListWith eq xs ys
-
-  eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
-  eqLit l1 l2 = l1 == l2
-
-  eqArea Old Old = True
-  eqArea (Young id1) (Young id2) = eqBid id1 id2
-  eqArea _ _ = False
-
--- Equality on the body of a block, modulo a function mapping block
--- IDs to block IDs.
-eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool
-eqBlockBodyWith eqBid block block'
-  {-
-  | equal     = pprTrace "equal" (vcat [ppr block, ppr block']) True
-  | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False
-  -}
-  = equal
-  where (_,m,l)   = blockSplit block
-        nodes     = filter (not . dont_care) (blockToList m)
-        (_,m',l') = blockSplit block'
-        nodes'    = filter (not . dont_care) (blockToList m')
-
-        equal = eqListWith (eqMiddleWith eqBid) nodes nodes' &&
-                eqLastWith eqBid l l'
-
-
-eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool
-eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2
-eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) =
-  c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2
-eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) =
-  t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2
-eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) =
-  e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2
-eqLastWith _ _ _ = False
-
-eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool
-eqMaybeWith eltEq (Just e) (Just e') = eltEq e e'
-eqMaybeWith _ Nothing Nothing = True
-eqMaybeWith _ _ _ = False
-
-eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool
-eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs
-eqListWith _ []       []       = True
-eqListWith _ _        _        = False
-
--- | Given a block map, ensure that all "target" blocks are covered by
--- the same ticks as the respective "source" blocks. This not only
--- means copying ticks, but also adjusting tick scopes where
--- necessary.
-copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph
-copyTicks env g
-  | mapNull env = g
-  | otherwise   = ofBlockMap (g_entry g) $ mapMap copyTo blockMap
-  where -- Reverse block merge map
-        blockMap = toBlockMap g
-        revEnv = mapFoldlWithKey insertRev M.empty env
-        insertRev m k x = M.insertWith (const (k:)) x [k] m
-        -- Copy ticks and scopes into the given block
-        copyTo block = case M.lookup (entryLabel block) revEnv of
-          Nothing -> block
-          Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls
-        copy from to =
-          let ticks = blockTicks from
-              CmmEntry  _   scp0        = firstNode from
-              (CmmEntry lbl scp1, code) = blockSplitHead to
-          in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead`
-             foldr blockCons code (map CmmTick ticks)
-
--- Group by [Label]
--- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap.
-groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])]
-groupByLabel =
-  go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks]))
-    where
-      go !m [] = TM.foldTM (:) m []
-      go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries
-        where --k' = map (getKey . getUnique) k
-              adjust Nothing       = Just (k,[v])
-              adjust (Just (_,vs)) = Just (k,v:vs)
-
-groupByInt :: (a -> Int) -> [a] -> [[a]]
-groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs
-   -- See Note [Unique Determinism and code generation]
-  where
-    go m x = alterUFM addEntry m (f x)
-      where
-        addEntry xs = Just $! maybe [x] (x:) xs
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
deleted file mode 100644
index 606da02969..0000000000
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ /dev/null
@@ -1,451 +0,0 @@
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE BangPatterns #-}
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-module CmmContFlowOpt
-    ( cmmCfgOpts
-    , cmmCfgOptsProc
-    , removeUnreachableBlocksProc
-    , replaceLabels
-    )
-where
-
-import GhcPrelude hiding (succ, unzip, zip)
-
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
-import BlockId
-import Cmm
-import CmmUtils
-import CmmSwitch (mapSwitchTargets, switchTargetsToList)
-import Maybes
-import Panic
-import Util
-
-import Control.Monad
-
-
--- Note [What is shortcutting]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Consider this Cmm code:
---
--- L1: ...
---     goto L2;
--- L2: goto L3;
--- L3: ...
---
--- Here L2 is an empty block and contains only an unconditional branch
--- to L3. In this situation any block that jumps to L2 can jump
--- directly to L3:
---
--- L1: ...
---     goto L3;
--- L2: goto L3;
--- L3: ...
---
--- In this situation we say that we shortcut L2 to L3. One of
--- consequences of shortcutting is that some blocks of code may become
--- unreachable (in the example above this is true for L2).
-
-
--- Note [Control-flow optimisations]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- This optimisation does three things:
---
---   - If a block finishes in an unconditional branch to another block
---     and that is the only jump to that block we concatenate the
---     destination block at the end of the current one.
---
---   - If a block finishes in a call whose continuation block is a
---     goto, then we can shortcut the destination, making the
---     continuation block the destination of the goto - but see Note
---     [Shortcut call returns].
---
---   - For any block that is not a call we try to shortcut the
---     destination(s). Additionally, if a block ends with a
---     conditional branch we try to invert the condition.
---
--- Blocks are processed using postorder DFS traversal. A side effect
--- of determining traversal order with a graph search is elimination
--- of any blocks that are unreachable.
---
--- Transformations are improved by working from the end of the graph
--- towards the beginning, because we may be able to perform many
--- shortcuts in one go.
-
-
--- Note [Shortcut call returns]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- We are going to maintain the "current" graph (LabelMap CmmBlock) as
--- we go, and also a mapping from BlockId to BlockId, representing
--- continuation labels that we have renamed.  This latter mapping is
--- important because we might shortcut a CmmCall continuation.  For
--- example:
---
---    Sp[0] = L
---    call g returns to L
---    L: goto M
---    M: ...
---
--- So when we shortcut the L block, we need to replace not only
--- the continuation of the call, but also references to L in the
--- code (e.g. the assignment Sp[0] = L):
---
---    Sp[0] = M
---    call g returns to M
---    M: ...
---
--- So we keep track of which labels we have renamed and apply the mapping
--- at the end with replaceLabels.
-
-
--- Note [Shortcut call returns and proc-points]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Consider this code that you might get from a recursive
--- let-no-escape:
---
---       goto L1
---      L1:
---       if (Hp > HpLim) then L2 else L3
---      L2:
---       call stg_gc_noregs returns to L4
---      L4:
---       goto L1
---      L3:
---       ...
---       goto L1
---
--- Then the control-flow optimiser shortcuts L4.  But that turns L1
--- into the call-return proc point, and every iteration of the loop
--- has to shuffle variables to and from the stack.  So we must *not*
--- shortcut L4.
---
--- Moreover not shortcutting call returns is probably fine.  If L4 can
--- concat with its branch target then it will still do so.  And we
--- save some compile time because we don't have to traverse all the
--- code in replaceLabels.
---
--- However, we probably do want to do this if we are splitting proc
--- points, because L1 will be a proc-point anyway, so merging it with
--- L4 reduces the number of proc points.  Unfortunately recursive
--- let-no-escapes won't generate very good code with proc-point
--- splitting on - we should probably compile them to explicitly use
--- the native calling convention instead.
-
-cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
-cmmCfgOpts split g = fst (blockConcat split g)
-
-cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
-cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
-    where (g', env) = blockConcat split g
-          info' = info{ info_tbls = new_info_tbls }
-          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
-
-          -- If we changed any labels, then we have to update the info tables
-          -- too, except for the top-level info table because that might be
-          -- referred to by other procs.
-          upd_info (k,info)
-             | Just k' <- mapLookup k env
-             = (k', if k' == g_entry g'
-                       then info
-                       else info{ cit_lbl = infoTblLbl k' })
-             | otherwise
-             = (k,info)
-cmmCfgOptsProc _ top = top
-
-
-blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
-blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
-  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
-  where
-     -- We might be able to shortcut the entry BlockId itself.
-     -- Remember to update the shortcut_map, since we also have to
-     -- update the info_tbls mapping now.
-     (new_entry, shortcut_map')
-       | Just entry_blk <- mapLookup entry_id new_blocks
-       , Just dest      <- canShortcut entry_blk
-       = (dest, mapInsert entry_id dest shortcut_map)
-       | otherwise
-       = (entry_id, shortcut_map)
-
-     -- blocks are sorted in reverse postorder, but we want to go from the exit
-     -- towards beginning, so we use foldr below.
-     blocks = revPostorder g
-     blockmap = foldl' (flip addBlock) emptyBody blocks
-
-     -- Accumulator contains three components:
-     --  * map of blocks in a graph
-     --  * map of shortcut labels. See Note [Shortcut call returns]
-     --  * map containing number of predecessors for each block. We discard
-     --    it after we process all blocks.
-     (new_blocks, shortcut_map, _) =
-           foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
-
-     -- Map of predecessors for initial graph. We increase number of
-     -- predecessors for entry block by one to denote that it is
-     -- target of a jump, even if no block in the current graph jumps
-     -- to it.
-     initialBackEdges = incPreds entry_id (predMap blocks)
-
-     maybe_concat :: CmmBlock
-                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
-                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
-     maybe_concat block (!blocks, !shortcut_map, !backEdges)
-        -- If:
-        --   (1) current block ends with unconditional branch to b' and
-        --   (2) it has exactly one predecessor (namely, current block)
-        --
-        -- Then:
-        --   (1) append b' block at the end of current block
-        --   (2) remove b' from the map of blocks
-        --   (3) remove information about b' from predecessors map
-        --
-        -- Since we know that the block has only one predecessor we call
-        -- mapDelete directly instead of calling decPreds.
-        --
-        -- Note that we always maintain an up-to-date list of predecessors, so
-        -- we can ignore the contents of shortcut_map
-        | CmmBranch b' <- last
-        , hasOnePredecessor b'
-        , Just blk' <- mapLookup b' blocks
-        = let bid' = entryLabel blk'
-          in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
-             , shortcut_map
-             , mapDelete b' backEdges )
-
-        -- If:
-        --   (1) we are splitting proc points (see Note
-        --       [Shortcut call returns and proc-points]) and
-        --   (2) current block is a CmmCall or CmmForeignCall with
-        --       continuation b' and
-        --   (3) we can shortcut that continuation to dest
-        -- Then:
-        --   (1) we change continuation to point to b'
-        --   (2) create mapping from b' to dest
-        --   (3) increase number of predecessors of dest by 1
-        --   (4) decrease number of predecessors of b' by 1
-        --
-        -- Later we will use replaceLabels to substitute all occurrences of b'
-        -- with dest.
-        | splitting_procs
-        , Just b'   <- callContinuation_maybe last
-        , Just blk' <- mapLookup b' blocks
-        , Just dest <- canShortcut blk'
-        = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
-          , mapInsert b' dest shortcut_map
-          , decPreds b' $ incPreds dest backEdges )
-
-        -- If:
-        --   (1) a block does not end with a call
-        -- Then:
-        --   (1) if it ends with a conditional attempt to invert the
-        --       conditional
-        --   (2) attempt to shortcut all destination blocks
-        --   (3) if new successors of a block are different from the old ones
-        --       update the of predecessors accordingly
-        --
-        -- A special case of this is a situation when a block ends with an
-        -- unconditional jump to a block that can be shortcut.
-        | Nothing <- callContinuation_maybe last
-        = let oldSuccs = successors last
-              newSuccs = successors rewrite_last
-          in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
-             , shortcut_map
-             , if oldSuccs == newSuccs
-               then backEdges
-               else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
-
-        -- Otherwise don't do anything
-        | otherwise
-        = ( blocks, shortcut_map, backEdges )
-        where
-          (head, last) = blockSplitTail block
-          bid = entryLabel block
-
-          -- Changes continuation of a call to a specified label
-          update_cont dest =
-              case last of
-                CmmCall{}        -> last { cml_cont = Just dest }
-                CmmForeignCall{} -> last { succ = dest }
-                _                -> panic "Can't shortcut continuation."
-
-          -- Attempts to shortcut successors of last node
-          shortcut_last = mapSuccessors shortcut last
-            where
-              shortcut l =
-                 case mapLookup l blocks of
-                   Just b | Just dest <- canShortcut b -> dest
-                   _otherwise -> l
-
-          rewrite_last
-            -- Sometimes we can get rid of the conditional completely.
-            | CmmCondBranch _cond t f _l <- shortcut_last
-            , t == f
-            = CmmBranch t
-
-            -- See Note [Invert Cmm conditionals]
-            | CmmCondBranch cond t f l <- shortcut_last
-            , hasOnePredecessor t -- inverting will make t a fallthrough
-            , likelyTrue l || (numPreds f > 1)
-            , Just cond' <- maybeInvertCmmExpr cond
-            = CmmCondBranch cond' f t (invertLikeliness l)
-
-            -- If all jump destinations of a switch go to the
-            -- same target eliminate the switch.
-            | CmmSwitch _expr targets <- shortcut_last
-            , (t:ts) <- switchTargetsToList targets
-            , all (== t) ts
-            = CmmBranch t
-
-            | otherwise
-            = shortcut_last
-
-          likelyTrue (Just True)   = True
-          likelyTrue _             = False
-
-          invertLikeliness :: Maybe Bool -> Maybe Bool
-          invertLikeliness         = fmap not
-
-          -- Number of predecessors for a block
-          numPreds bid = mapLookup bid backEdges `orElse` 0
-
-          hasOnePredecessor b = numPreds b == 1
-
-{-
-  Note [Invert Cmm conditionals]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  The native code generator always produces jumps to the true branch.
-  Falling through to the false branch is however faster. So we try to
-  arrange for that to happen.
-  This means we invert the condition if:
-  * The likely path will become a fallthrough.
-  * We can't guarantee a fallthrough for the false branch but for the
-    true branch.
-
-  In some cases it's faster to avoid inverting when the false branch is likely.
-  However determining when that is the case is neither easy nor cheap so for
-  now we always invert as this produces smaller binaries and code that is
-  equally fast on average. (On an i7-6700K)
-
-  TODO:
-  There is also the edge case when both branches have multiple predecessors.
-  In this case we could assume that we will end up with a jump for BOTH
-  branches. In this case it might be best to put the likely path in the true
-  branch especially if there are large numbers of predecessors as this saves
-  us the jump thats not taken. However I haven't tested this and as of early
-  2018 we almost never generate cmm where this would apply.
--}
-
--- Functions for incrementing and decrementing number of predecessors. If
--- decrementing would set the predecessor count to 0, we remove entry from the
--- map.
--- Invariant: if a block has no predecessors it should be dropped from the
--- graph because it is unreachable. maybe_concat is constructed to maintain
--- that invariant, but calling replaceLabels may introduce unreachable blocks.
--- We rely on subsequent passes in the Cmm pipeline to remove unreachable
--- blocks.
-incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
-incPreds bid edges = mapInsertWith (+) bid 1 edges
-decPreds bid edges = case mapLookup bid edges of
-                       Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
-                       Just _                 -> mapDelete bid edges
-                       _                      -> edges
-
-
--- Checks if a block consists only of "goto dest". If it does than we return
--- "Just dest" label. See Note [What is shortcutting]
-canShortcut :: CmmBlock -> Maybe BlockId
-canShortcut block
-    | (_, middle, CmmBranch dest) <- blockSplit block
-    , all dont_care $ blockToList middle
-    = Just dest
-    | otherwise
-    = Nothing
-    where dont_care CmmComment{} = True
-          dont_care CmmTick{}    = True
-          dont_care _other       = False
-
--- Concatenates two blocks. First one is assumed to be open on exit, the second
--- is assumed to be closed on entry (i.e. it has a label attached to it, which
--- the splice function removes by calling snd on result of blockSplitHead).
-splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
-splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
-  where (CmmEntry lbl sc0, code0) = blockSplitHead head
-        (CmmEntry _   sc1, code1) = blockSplitHead rest
-        entry = CmmEntry lbl (combineTickScopes sc0 sc1)
-
--- If node is a call with continuation call return Just label of that
--- continuation. Otherwise return Nothing.
-callContinuation_maybe :: CmmNode O C -> Maybe BlockId
-callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
-callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
-callContinuation_maybe _ = Nothing
-
-
--- Map over the CmmGraph, replacing each label with its mapping in the
--- supplied LabelMap.
-replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
-replaceLabels env g
-  | mapNull env = g
-  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
-   where
-     replace_eid g = g {g_entry = lookup (g_entry g)}
-     lookup id = mapLookup id env `orElse` id
-
-     txnode :: CmmNode e x -> CmmNode e x
-     txnode (CmmBranch bid) = CmmBranch (lookup bid)
-     txnode (CmmCondBranch p t f l) =
-       mkCmmCondBranch (exp p) (lookup t) (lookup f) l
-     txnode (CmmSwitch e ids) =
-       CmmSwitch (exp e) (mapSwitchTargets lookup ids)
-     txnode (CmmCall t k rg a res r) =
-       CmmCall (exp t) (liftM lookup k) rg a res r
-     txnode fc@CmmForeignCall{} =
-       fc{ args = map exp (args fc), succ = lookup (succ fc) }
-     txnode other = mapExpDeep exp other
-
-     exp :: CmmExpr -> CmmExpr
-     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
-     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
-     exp e                                      = e
-
-mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
-mkCmmCondBranch p t f l =
-  if t == f then CmmBranch t else CmmCondBranch p t f l
-
--- Build a map from a block to its set of predecessors.
-predMap :: [CmmBlock] -> LabelMap Int
-predMap blocks = foldr add_preds mapEmpty blocks
-  where
-    add_preds block env = foldr add env (successors block)
-      where add lbl env = mapInsertWith (+) lbl 1 env
-
--- Removing unreachable blocks
-removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
-removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
-   | used_blocks `lengthLessThan` mapSize (toBlockMap g)
-   = CmmProc info' lbl live g'
-   | otherwise
-   = proc
-   where
-     g'    = ofBlockList (g_entry g) used_blocks
-     info' = info { info_tbls = keep_used (info_tbls info) }
-             -- Remove any info_tbls for unreachable
-
-     keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
-     keep_used bs = mapFoldlWithKey keep mapEmpty bs
-
-     keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
-     keep env l i | l `setMember` used_lbls = mapInsert l i env
-                  | otherwise               = env
-
-     used_blocks :: [CmmBlock]
-     used_blocks = revPostorder g
-
-     used_lbls :: LabelSet
-     used_lbls = setFromList $ map entryLabel used_blocks
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
deleted file mode 100644
index 860ee1a7f5..0000000000
--- a/compiler/cmm/CmmExpr.hs
+++ /dev/null
@@ -1,619 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE UndecidableInstances #-}
-
-module CmmExpr
-    ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr
-    , CmmReg(..), cmmRegType, cmmRegWidth
-    , CmmLit(..), cmmLitType
-    , LocalReg(..), localRegType
-    , GlobalReg(..), isArgReg, globalRegType
-    , spReg, hpReg, spLimReg, hpLimReg, nodeReg
-    , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
-    , node, baseReg
-    , VGcPtr(..)
-
-    , DefinerOfRegs, UserOfRegs
-    , foldRegsDefd, foldRegsUsed
-    , foldLocalRegsDefd, foldLocalRegsUsed
-
-    , RegSet, LocalRegSet, GlobalRegSet
-    , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
-    , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
-    , regSetToList
-
-    , Area(..)
-    , module CmmMachOp
-    , module CmmType
-    )
-where
-
-import GhcPrelude
-
-import BlockId
-import CLabel
-import CmmMachOp
-import CmmType
-import DynFlags
-import Outputable (panic)
-import Unique
-
-import Data.Set (Set)
-import qualified Data.Set as Set
-
-import BasicTypes (Alignment, mkAlignment, alignmentOf)
-
------------------------------------------------------------------------------
---              CmmExpr
--- An expression.  Expressions have no side effects.
------------------------------------------------------------------------------
-
-data CmmExpr
-  = CmmLit CmmLit               -- Literal
-  | CmmLoad !CmmExpr !CmmType   -- Read memory location
-  | CmmReg !CmmReg              -- Contents of register
-  | CmmMachOp MachOp [CmmExpr]  -- Machine operation (+, -, *, etc.)
-  | CmmStackSlot Area {-# UNPACK #-} !Int
-                                -- addressing expression of a stack slot
-                                -- See Note [CmmStackSlot aliasing]
-  | CmmRegOff !CmmReg Int
-        -- CmmRegOff reg i
-        --        ** is shorthand only, meaning **
-        -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
-        --      where rep = typeWidth (cmmRegType reg)
-
-instance Eq CmmExpr where       -- Equality ignores the types
-  CmmLit l1          == CmmLit l2          = l1==l2
-  CmmLoad e1 _       == CmmLoad e2 _       = e1==e2
-  CmmReg r1          == CmmReg r2          = r1==r2
-  CmmRegOff r1 i1    == CmmRegOff r2 i2    = r1==r2 && i1==i2
-  CmmMachOp op1 es1  == CmmMachOp op2 es2  = op1==op2 && es1==es2
-  CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
-  _e1                == _e2                = False
-
-data CmmReg
-  = CmmLocal  {-# UNPACK #-} !LocalReg
-  | CmmGlobal GlobalReg
-  deriving( Eq, Ord )
-
--- | A stack area is either the stack slot where a variable is spilled
--- or the stack space where function arguments and results are passed.
-data Area
-  = Old            -- See Note [Old Area]
-  | Young {-# UNPACK #-} !BlockId  -- Invariant: must be a continuation BlockId
-                   -- See Note [Continuation BlockId] in CmmNode.
-  deriving (Eq, Ord)
-
-{- Note [Old Area]
-~~~~~~~~~~~~~~~~~~
-There is a single call area 'Old', allocated at the extreme old
-end of the stack frame (ie just younger than the return address)
-which holds:
-  * incoming (overflow) parameters,
-  * outgoing (overflow) parameter to tail calls,
-  * outgoing (overflow) result values
-  * the update frame (if any)
-
-Its size is the max of all these requirements.  On entry, the stack
-pointer will point to the youngest incoming parameter, which is not
-necessarily at the young end of the Old area.
-
-End of note -}
-
-
-{- Note [CmmStackSlot aliasing]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When do two CmmStackSlots alias?
-
- - T[old+N] aliases with U[young(L)+M] for all T, U, L, N and M
- - T[old+N] aliases with U[old+M] only if the areas actually overlap
-
-Or more informally, different Areas may overlap with each other.
-
-An alternative semantics, that we previously had, was that different
-Areas do not overlap.  The problem that lead to redefining the
-semantics of stack areas is described below.
-
-e.g. if we had
-
-    x = Sp[old + 8]
-    y = Sp[old + 16]
-
-    Sp[young(L) + 8]  = L
-    Sp[young(L) + 16] = y
-    Sp[young(L) + 24] = x
-    call f() returns to L
-
-if areas semantically do not overlap, then we might optimise this to
-
-    Sp[young(L) + 8]  = L
-    Sp[young(L) + 16] = Sp[old + 8]
-    Sp[young(L) + 24] = Sp[old + 16]
-    call f() returns to L
-
-and now young(L) cannot be allocated at the same place as old, and we
-are doomed to use more stack.
-
-  - old+8  conflicts with young(L)+8
-  - old+16 conflicts with young(L)+16 and young(L)+8
-
-so young(L)+8 == old+24 and we get
-
-    Sp[-8]  = L
-    Sp[-16] = Sp[8]
-    Sp[-24] = Sp[0]
-    Sp -= 24
-    call f() returns to L
-
-However, if areas are defined to be "possibly overlapping" in the
-semantics, then we cannot commute any loads/stores of old with
-young(L), and we will be able to re-use both old+8 and old+16 for
-young(L).
-
-    x = Sp[8]
-    y = Sp[0]
-
-    Sp[8] = L
-    Sp[0] = y
-    Sp[-8] = x
-    Sp = Sp - 8
-    call f() returns to L
-
-Now, the assignments of y go away,
-
-    x = Sp[8]
-    Sp[8] = L
-    Sp[-8] = x
-    Sp = Sp - 8
-    call f() returns to L
--}
-
-data CmmLit
-  = CmmInt !Integer  Width
-        -- Interpretation: the 2's complement representation of the value
-        -- is truncated to the specified size.  This is easier than trying
-        -- to keep the value within range, because we don't know whether
-        -- it will be used as a signed or unsigned value (the CmmType doesn't
-        -- distinguish between signed & unsigned).
-  | CmmFloat  Rational Width
-  | CmmVec [CmmLit]                     -- Vector literal
-  | CmmLabel    CLabel                  -- Address of label
-  | CmmLabelOff CLabel Int              -- Address of label + byte offset
-
-        -- Due to limitations in the C backend, the following
-        -- MUST ONLY be used inside the info table indicated by label2
-        -- (label2 must be the info label), and label1 must be an
-        -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
-        -- Don't use it at all unless tablesNextToCode.
-        -- It is also used inside the NCG during when generating
-        -- position-independent code.
-  | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset
-        -- In an expression, the width just has the effect of MO_SS_Conv
-        -- from wordWidth to the desired width.
-        --
-        -- In a static literal, the supported Widths depend on the
-        -- architecture: wordWidth is supported on all
-        -- architectures. Additionally W32 is supported on x86_64 when
-        -- using the small memory model.
-
-  | CmmBlock {-# UNPACK #-} !BlockId     -- Code label
-        -- Invariant: must be a continuation BlockId
-        -- See Note [Continuation BlockId] in CmmNode.
-
-  | CmmHighStackMark -- A late-bound constant that stands for the max
-                     -- #bytes of stack space used during a procedure.
-                     -- During the stack-layout pass, CmmHighStackMark
-                     -- is replaced by a CmmInt for the actual number
-                     -- of bytes used
-  deriving Eq
-
-cmmExprType :: DynFlags -> CmmExpr -> CmmType
-cmmExprType dflags (CmmLit lit)        = cmmLitType dflags lit
-cmmExprType _      (CmmLoad _ rep)     = rep
-cmmExprType dflags (CmmReg reg)        = cmmRegType dflags reg
-cmmExprType dflags (CmmMachOp op args) = machOpResultType dflags op (map (cmmExprType dflags) args)
-cmmExprType dflags (CmmRegOff reg _)   = cmmRegType dflags reg
-cmmExprType dflags (CmmStackSlot _ _)  = bWord dflags -- an address
--- Careful though: what is stored at the stack slot may be bigger than
--- an address
-
-cmmLitType :: DynFlags -> CmmLit -> CmmType
-cmmLitType _      (CmmInt _ width)     = cmmBits  width
-cmmLitType _      (CmmFloat _ width)   = cmmFloat width
-cmmLitType _      (CmmVec [])          = panic "cmmLitType: CmmVec []"
-cmmLitType cflags (CmmVec (l:ls))      = let ty = cmmLitType cflags l
-                                         in if all (`cmmEqType` ty) (map (cmmLitType cflags) ls)
-                                            then cmmVec (1+length ls) ty
-                                            else panic "cmmLitType: CmmVec"
-cmmLitType dflags (CmmLabel lbl)       = cmmLabelType dflags lbl
-cmmLitType dflags (CmmLabelOff lbl _)  = cmmLabelType dflags lbl
-cmmLitType _      (CmmLabelDiffOff _ _ _ width) = cmmBits width
-cmmLitType dflags (CmmBlock _)         = bWord dflags
-cmmLitType dflags (CmmHighStackMark)   = bWord dflags
-
-cmmLabelType :: DynFlags -> CLabel -> CmmType
-cmmLabelType dflags lbl
- | isGcPtrLabel lbl = gcWord dflags
- | otherwise        = bWord dflags
-
-cmmExprWidth :: DynFlags -> CmmExpr -> Width
-cmmExprWidth dflags e = typeWidth (cmmExprType dflags e)
-
--- | Returns an alignment in bytes of a CmmExpr when it's a statically
--- known integer constant, otherwise returns an alignment of 1 byte.
--- The caller is responsible for using with a sensible CmmExpr
--- argument.
-cmmExprAlignment :: CmmExpr -> Alignment
-cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff)
-cmmExprAlignment _                          = mkAlignment 1
---------
---- Negation for conditional branches
-
-maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
-maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
-                                            return (CmmMachOp op' args)
-maybeInvertCmmExpr _ = Nothing
-
------------------------------------------------------------------------------
---              Local registers
------------------------------------------------------------------------------
-
-data LocalReg
-  = LocalReg {-# UNPACK #-} !Unique CmmType
-    -- ^ Parameters:
-    --   1. Identifier
-    --   2. Type
-
-instance Eq LocalReg where
-  (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
-
--- This is non-deterministic but we do not currently support deterministic
--- code-generation. See Note [Unique Determinism and code generation]
--- See Note [No Ord for Unique]
-instance Ord LocalReg where
-  compare (LocalReg u1 _) (LocalReg u2 _) = nonDetCmpUnique u1 u2
-
-instance Uniquable LocalReg where
-  getUnique (LocalReg uniq _) = uniq
-
-cmmRegType :: DynFlags -> CmmReg -> CmmType
-cmmRegType _      (CmmLocal  reg) = localRegType reg
-cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg
-
-cmmRegWidth :: DynFlags -> CmmReg -> Width
-cmmRegWidth dflags = typeWidth . cmmRegType dflags
-
-localRegType :: LocalReg -> CmmType
-localRegType (LocalReg _ rep) = rep
-
------------------------------------------------------------------------------
---    Register-use information for expressions and other types
------------------------------------------------------------------------------
-
--- | Sets of registers
-
--- These are used for dataflow facts, and a common operation is taking
--- the union of two RegSets and then asking whether the union is the
--- same as one of the inputs.  UniqSet isn't good here, because
--- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary
--- Sets.
-
-type RegSet r     = Set r
-type LocalRegSet  = RegSet LocalReg
-type GlobalRegSet = RegSet GlobalReg
-
-emptyRegSet             :: RegSet r
-nullRegSet              :: RegSet r -> Bool
-elemRegSet              :: Ord r => r -> RegSet r -> Bool
-extendRegSet            :: Ord r => RegSet r -> r -> RegSet r
-deleteFromRegSet        :: Ord r => RegSet r -> r -> RegSet r
-mkRegSet                :: Ord r => [r] -> RegSet r
-minusRegSet, plusRegSet, timesRegSet :: Ord r => RegSet r -> RegSet r -> RegSet r
-sizeRegSet              :: RegSet r -> Int
-regSetToList            :: RegSet r -> [r]
-
-emptyRegSet      = Set.empty
-nullRegSet       = Set.null
-elemRegSet       = Set.member
-extendRegSet     = flip Set.insert
-deleteFromRegSet = flip Set.delete
-mkRegSet         = Set.fromList
-minusRegSet      = Set.difference
-plusRegSet       = Set.union
-timesRegSet      = Set.intersection
-sizeRegSet       = Set.size
-regSetToList     = Set.toList
-
-class Ord r => UserOfRegs r a where
-  foldRegsUsed :: DynFlags -> (b -> r -> b) -> b -> a -> b
-
-foldLocalRegsUsed :: UserOfRegs LocalReg a
-                  => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
-foldLocalRegsUsed = foldRegsUsed
-
-class Ord r => DefinerOfRegs r a where
-  foldRegsDefd :: DynFlags -> (b -> r -> b) -> b -> a -> b
-
-foldLocalRegsDefd :: DefinerOfRegs LocalReg a
-                  => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
-foldLocalRegsDefd = foldRegsDefd
-
-instance UserOfRegs LocalReg CmmReg where
-    foldRegsUsed _ f z (CmmLocal reg) = f z reg
-    foldRegsUsed _ _ z (CmmGlobal _)  = z
-
-instance DefinerOfRegs LocalReg CmmReg where
-    foldRegsDefd _ f z (CmmLocal reg) = f z reg
-    foldRegsDefd _ _ z (CmmGlobal _)  = z
-
-instance UserOfRegs GlobalReg CmmReg where
-    foldRegsUsed _ _ z (CmmLocal _)    = z
-    foldRegsUsed _ f z (CmmGlobal reg) = f z reg
-
-instance DefinerOfRegs GlobalReg CmmReg where
-    foldRegsDefd _ _ z (CmmLocal _)    = z
-    foldRegsDefd _ f z (CmmGlobal reg) = f z reg
-
-instance Ord r => UserOfRegs r r where
-    foldRegsUsed _ f z r = f z r
-
-instance Ord r => DefinerOfRegs r r where
-    foldRegsDefd _ f z r = f z r
-
-instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where
-  -- The (Ord r) in the context is necessary here
-  -- See Note [Recursive superclasses] in TcInstDcls
-  foldRegsUsed dflags f !z e = expr z e
-    where expr z (CmmLit _)          = z
-          expr z (CmmLoad addr _)    = foldRegsUsed dflags f z addr
-          expr z (CmmReg r)          = foldRegsUsed dflags f z r
-          expr z (CmmMachOp _ exprs) = foldRegsUsed dflags f z exprs
-          expr z (CmmRegOff r _)     = foldRegsUsed dflags f z r
-          expr z (CmmStackSlot _ _)  = z
-
-instance UserOfRegs r a => UserOfRegs r [a] where
-  foldRegsUsed dflags f set as = foldl' (foldRegsUsed dflags f) set as
-  {-# INLINABLE foldRegsUsed #-}
-
-instance DefinerOfRegs r a => DefinerOfRegs r [a] where
-  foldRegsDefd dflags f set as = foldl' (foldRegsDefd dflags f) set as
-  {-# INLINABLE foldRegsDefd #-}
-
------------------------------------------------------------------------------
---              Global STG registers
------------------------------------------------------------------------------
-
-data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
-
------------------------------------------------------------------------------
---              Global STG registers
------------------------------------------------------------------------------
-{-
-Note [Overlapping global registers]
-
-The backend might not faithfully implement the abstraction of the STG
-machine with independent registers for different values of type
-GlobalReg. Specifically, certain pairs of registers (r1, r2) may
-overlap in the sense that a store to r1 invalidates the value in r2,
-and vice versa.
-
-Currently this occurs only on the x86_64 architecture where FloatReg n
-and DoubleReg n are assigned the same microarchitectural register, in
-order to allow functions to receive more Float# or Double# arguments
-in registers (as opposed to on the stack).
-
-There are no specific rules about which registers might overlap with
-which other registers, but presumably it's safe to assume that nothing
-will overlap with special registers like Sp or BaseReg.
-
-Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap
-on a particular platform. The instance Eq GlobalReg is syntactic
-equality of STG registers and does not take overlap into
-account. However it is still used in UserOfRegs/DefinerOfRegs and
-there are likely still bugs there, beware!
--}
-
-data GlobalReg
-  -- Argument and return registers
-  = VanillaReg                  -- pointers, unboxed ints and chars
-        {-# UNPACK #-} !Int     -- its number
-        VGcPtr
-
-  | FloatReg            -- single-precision floating-point registers
-        {-# UNPACK #-} !Int     -- its number
-
-  | DoubleReg           -- double-precision floating-point registers
-        {-# UNPACK #-} !Int     -- its number
-
-  | LongReg             -- long int registers (64-bit, really)
-        {-# UNPACK #-} !Int     -- its number
-
-  | XmmReg                      -- 128-bit SIMD vector register
-        {-# UNPACK #-} !Int     -- its number
-
-  | YmmReg                      -- 256-bit SIMD vector register
-        {-# UNPACK #-} !Int     -- its number
-
-  | ZmmReg                      -- 512-bit SIMD vector register
-        {-# UNPACK #-} !Int     -- its number
-
-  -- STG registers
-  | Sp                  -- Stack ptr; points to last occupied stack location.
-  | SpLim               -- Stack limit
-  | Hp                  -- Heap ptr; points to last occupied heap location.
-  | HpLim               -- Heap limit register
-  | CCCS                -- Current cost-centre stack
-  | CurrentTSO          -- pointer to current thread's TSO
-  | CurrentNursery      -- pointer to allocation area
-  | HpAlloc             -- allocation count for heap check failure
-
-                -- We keep the address of some commonly-called
-                -- functions in the register table, to keep code
-                -- size down:
-  | EagerBlackholeInfo  -- stg_EAGER_BLACKHOLE_info
-  | GCEnter1            -- stg_gc_enter_1
-  | GCFun               -- stg_gc_fun
-
-  -- Base offset for the register table, used for accessing registers
-  -- which do not have real registers assigned to them.  This register
-  -- will only appear after we have expanded GlobalReg into memory accesses
-  -- (where necessary) in the native code generator.
-  | BaseReg
-
-  -- The register used by the platform for the C stack pointer. This is
-  -- a break in the STG abstraction used exclusively to setup stack unwinding
-  -- information.
-  | MachSp
-
-  -- The is a dummy register used to indicate to the stack unwinder where
-  -- a routine would return to.
-  | UnwindReturnReg
-
-  -- Base Register for PIC (position-independent code) calculations
-  -- Only used inside the native code generator. It's exact meaning differs
-  -- from platform to platform (see module PositionIndependentCode).
-  | PicBaseReg
-
-  deriving( Show )
-
-instance Eq GlobalReg where
-   VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
-   FloatReg i == FloatReg j = i==j
-   DoubleReg i == DoubleReg j = i==j
-   LongReg i == LongReg j = i==j
-   -- NOTE: XMM, YMM, ZMM registers actually are the same registers
-   -- at least with respect to store at YMM i and then read from XMM i
-   -- and similarly for ZMM etc.
-   XmmReg i == XmmReg j = i==j
-   YmmReg i == YmmReg j = i==j
-   ZmmReg i == ZmmReg j = i==j
-   Sp == Sp = True
-   SpLim == SpLim = True
-   Hp == Hp = True
-   HpLim == HpLim = True
-   CCCS == CCCS = True
-   CurrentTSO == CurrentTSO = True
-   CurrentNursery == CurrentNursery = True
-   HpAlloc == HpAlloc = True
-   EagerBlackholeInfo == EagerBlackholeInfo = True
-   GCEnter1 == GCEnter1 = True
-   GCFun == GCFun = True
-   BaseReg == BaseReg = True
-   MachSp == MachSp = True
-   UnwindReturnReg == UnwindReturnReg = True
-   PicBaseReg == PicBaseReg = True
-   _r1 == _r2 = False
-
-instance Ord GlobalReg where
-   compare (VanillaReg i _) (VanillaReg j _) = compare i j
-     -- Ignore type when seeking clashes
-   compare (FloatReg i)  (FloatReg  j) = compare i j
-   compare (DoubleReg i) (DoubleReg j) = compare i j
-   compare (LongReg i)   (LongReg   j) = compare i j
-   compare (XmmReg i)    (XmmReg    j) = compare i j
-   compare (YmmReg i)    (YmmReg    j) = compare i j
-   compare (ZmmReg i)    (ZmmReg    j) = compare i j
-   compare Sp Sp = EQ
-   compare SpLim SpLim = EQ
-   compare Hp Hp = EQ
-   compare HpLim HpLim = EQ
-   compare CCCS CCCS = EQ
-   compare CurrentTSO CurrentTSO = EQ
-   compare CurrentNursery CurrentNursery = EQ
-   compare HpAlloc HpAlloc = EQ
-   compare EagerBlackholeInfo EagerBlackholeInfo = EQ
-   compare GCEnter1 GCEnter1 = EQ
-   compare GCFun GCFun = EQ
-   compare BaseReg BaseReg = EQ
-   compare MachSp MachSp = EQ
-   compare UnwindReturnReg UnwindReturnReg = EQ
-   compare PicBaseReg PicBaseReg = EQ
-   compare (VanillaReg _ _) _ = LT
-   compare _ (VanillaReg _ _) = GT
-   compare (FloatReg _) _     = LT
-   compare _ (FloatReg _)     = GT
-   compare (DoubleReg _) _    = LT
-   compare _ (DoubleReg _)    = GT
-   compare (LongReg _) _      = LT
-   compare _ (LongReg _)      = GT
-   compare (XmmReg _) _       = LT
-   compare _ (XmmReg _)       = GT
-   compare (YmmReg _) _       = LT
-   compare _ (YmmReg _)       = GT
-   compare (ZmmReg _) _       = LT
-   compare _ (ZmmReg _)       = GT
-   compare Sp _ = LT
-   compare _ Sp = GT
-   compare SpLim _ = LT
-   compare _ SpLim = GT
-   compare Hp _ = LT
-   compare _ Hp = GT
-   compare HpLim _ = LT
-   compare _ HpLim = GT
-   compare CCCS _ = LT
-   compare _ CCCS = GT
-   compare CurrentTSO _ = LT
-   compare _ CurrentTSO = GT
-   compare CurrentNursery _ = LT
-   compare _ CurrentNursery = GT
-   compare HpAlloc _ = LT
-   compare _ HpAlloc = GT
-   compare GCEnter1 _ = LT
-   compare _ GCEnter1 = GT
-   compare GCFun _ = LT
-   compare _ GCFun = GT
-   compare BaseReg _ = LT
-   compare _ BaseReg = GT
-   compare MachSp _ = LT
-   compare _ MachSp = GT
-   compare UnwindReturnReg _ = LT
-   compare _ UnwindReturnReg = GT
-   compare EagerBlackholeInfo _ = LT
-   compare _ EagerBlackholeInfo = GT
-
--- convenient aliases
-baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg,
-  currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg  :: CmmReg
-baseReg = CmmGlobal BaseReg
-spReg = CmmGlobal Sp
-hpReg = CmmGlobal Hp
-hpLimReg = CmmGlobal HpLim
-spLimReg = CmmGlobal SpLim
-nodeReg = CmmGlobal node
-currentTSOReg = CmmGlobal CurrentTSO
-currentNurseryReg = CmmGlobal CurrentNursery
-hpAllocReg = CmmGlobal HpAlloc
-cccsReg = CmmGlobal CCCS
-
-node :: GlobalReg
-node = VanillaReg 1 VGcPtr
-
-globalRegType :: DynFlags -> GlobalReg -> CmmType
-globalRegType dflags (VanillaReg _ VGcPtr)    = gcWord dflags
-globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
-globalRegType _      (FloatReg _)      = cmmFloat W32
-globalRegType _      (DoubleReg _)     = cmmFloat W64
-globalRegType _      (LongReg _)       = cmmBits W64
--- TODO: improve the internal model of SIMD/vectorized registers
--- the right design SHOULd improve handling of float and double code too.
--- see remarks in "NOTE [SIMD Design for the future]"" in GHC.StgToCmm.Prim
-globalRegType _      (XmmReg _)        = cmmVec 4 (cmmBits W32)
-globalRegType _      (YmmReg _)        = cmmVec 8 (cmmBits W32)
-globalRegType _      (ZmmReg _)        = cmmVec 16 (cmmBits W32)
-
-globalRegType dflags Hp                = gcWord dflags
-                                            -- The initialiser for all
-                                            -- dynamically allocated closures
-globalRegType dflags _                 = bWord dflags
-
-isArgReg :: GlobalReg -> Bool
-isArgReg (VanillaReg {}) = True
-isArgReg (FloatReg {})   = True
-isArgReg (DoubleReg {})  = True
-isArgReg (LongReg {})    = True
-isArgReg (XmmReg {})     = True
-isArgReg (YmmReg {})     = True
-isArgReg (ZmmReg {})     = True
-isArgReg _               = False
diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs
deleted file mode 100644
index 83c29cf6b5..0000000000
--- a/compiler/cmm/CmmImplementSwitchPlans.hs
+++ /dev/null
@@ -1,116 +0,0 @@
-{-# LANGUAGE GADTs #-}
-module CmmImplementSwitchPlans
-  ( cmmImplementSwitchPlans
-  )
-where
-
-import GhcPrelude
-
-import Hoopl.Block
-import BlockId
-import Cmm
-import CmmUtils
-import CmmSwitch
-import UniqSupply
-import DynFlags
-
---
--- This module replaces Switch statements as generated by the Stg -> Cmm
--- transformation, which might be huge and sparse and hence unsuitable for
--- assembly code, by proper constructs (if-then-else trees, dense jump tables).
---
--- The actual, abstract strategy is determined by createSwitchPlan in
--- CmmSwitch and returned as a SwitchPlan; here is just the implementation in
--- terms of Cmm code. See Note [Cmm Switches, the general plan] in CmmSwitch.
---
--- This division into different modules is both to clearly separate concerns,
--- but also because createSwitchPlan needs access to the constructors of
--- SwitchTargets, a data type exported abstractly by CmmSwitch.
---
-
--- | Traverses the 'CmmGraph', making sure that 'CmmSwitch' are suitable for
--- code generation.
-cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph
-cmmImplementSwitchPlans dflags g
-    -- Switch generation done by backend (LLVM/C)
-    | targetSupportsSwitch (hscTarget dflags) = return g
-    | otherwise = do
-    blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g)
-    return $ ofBlockList (g_entry g) blocks'
-
-visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock]
-visitSwitches dflags block
-  | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block
-  = do
-    let plan = createSwitchPlan ids
-    -- See Note [Floating switch expressions]
-    (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr
-
-    (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan
-
-    let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail
-
-    return $ block' : newBlocks
-
-  | otherwise
-  = return [block]
-
--- Note [Floating switch expressions]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
--- When we translate a sparse switch into a search tree we would like
--- to compute the value we compare against only once.
-
--- For this purpose we assign the switch expression to a local register
--- and then use this register when constructing the actual binary tree.
-
--- This is important as the expression could contain expensive code like
--- memory loads or divisions which we REALLY don't want to duplicate.
-
--- This happened in parts of the handwritten RTS Cmm code. See also #16933
-
--- See Note [Floating switch expressions]
-floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr)
-floatSwitchExpr _      reg@(CmmReg {})  = return (emptyBlock, reg)
-floatSwitchExpr dflags expr             = do
-  (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM
-  return (BMiddle assign, expr')
-
-
--- Implementing a switch plan (returning a tail block)
-implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock])
-implementSwitchPlan dflags scope expr = go
-  where
-    go (Unconditionally l)
-      = return (emptyBlock `blockJoinTail` CmmBranch l, [])
-    go (JumpTable ids)
-      = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, [])
-    go (IfLT signed i ids1 ids2)
-      = do
-        (bid1, newBlocks1) <- go' ids1
-        (bid2, newBlocks2) <- go' ids2
-
-        let lt | signed    = cmmSLtWord
-               | otherwise = cmmULtWord
-            scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i
-            lastNode = CmmCondBranch scrut bid1 bid2 Nothing
-            lastBlock = emptyBlock `blockJoinTail` lastNode
-        return (lastBlock, newBlocks1++newBlocks2)
-    go (IfEqual i l ids2)
-      = do
-        (bid2, newBlocks2) <- go' ids2
-
-        let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i
-            lastNode = CmmCondBranch scrut bid2 l Nothing
-            lastBlock = emptyBlock `blockJoinTail` lastNode
-        return (lastBlock, newBlocks2)
-
-    -- Same but returning a label to branch to
-    go' (Unconditionally l)
-      = return (l, [])
-    go' p
-      = do
-        bid <- mkBlockId `fmap` getUniqueM
-        (last, newBlocks) <- go p
-        let block = CmmEntry bid scope `blockJoinHead` last
-        return (bid, block: newBlocks)
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
deleted file mode 100644
index 3ef3d5001e..0000000000
--- a/compiler/cmm/CmmInfo.hs
+++ /dev/null
@@ -1,593 +0,0 @@
-{-# LANGUAGE CPP #-}
-module CmmInfo (
-  mkEmptyContInfoTable,
-  cmmToRawCmm,
-  mkInfoTable,
-  srtEscape,
-
-  -- info table accessors
-  closureInfoPtr,
-  entryCode,
-  getConstrTag,
-  cmmGetClosureType,
-  infoTable,
-  infoTableConstrTag,
-  infoTableSrtBitmap,
-  infoTableClosureType,
-  infoTablePtrs,
-  infoTableNonPtrs,
-  funInfoTable,
-  funInfoArity,
-
-  -- info table sizes and offsets
-  stdInfoTableSizeW,
-  fixedInfoTableSizeW,
-  profInfoTableSizeW,
-  maxStdInfoTableSizeW,
-  maxRetInfoTableSizeW,
-  stdInfoTableSizeB,
-  conInfoTableSizeB,
-  stdSrtBitmapOffset,
-  stdClosureTypeOffset,
-  stdPtrsOffset, stdNonPtrsOffset,
-) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import Cmm
-import CmmUtils
-import CLabel
-import SMRep
-import Bitmap
-import Stream (Stream)
-import qualified Stream
-import Hoopl.Collections
-
-import GHC.Platform
-import Maybes
-import DynFlags
-import ErrUtils (withTimingSilent)
-import Panic
-import UniqSupply
-import MonadUtils
-import Util
-import Outputable
-
-import Data.ByteString (ByteString)
-import Data.Bits
-
--- When we split at proc points, we need an empty info table.
-mkEmptyContInfoTable :: CLabel -> CmmInfoTable
-mkEmptyContInfoTable info_lbl
-  = CmmInfoTable { cit_lbl  = info_lbl
-                 , cit_rep  = mkStackRep []
-                 , cit_prof = NoProfilingInfo
-                 , cit_srt  = Nothing
-                 , cit_clo  = Nothing }
-
-cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a
-            -> IO (Stream IO RawCmmGroup a)
-cmmToRawCmm dflags cmms
-  = do { uniqs <- mkSplitUniqSupply 'i'
-       ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl])
-             do_one uniqs cmm =
-               -- NB. strictness fixes a space leak.  DO NOT REMOVE.
-               withTimingSilent dflags (text "Cmm -> Raw Cmm")
-                                forceRes $
-                 case initUs uniqs $ concatMapM (mkInfoTable dflags) cmm of
-                   (b,uniqs') -> return (uniqs',b)
-       ; return (snd <$> Stream.mapAccumL_ do_one uniqs cmms)
-       }
-
-    where forceRes (uniqs, rawcmms) =
-            uniqs `seq` foldr (\decl r -> decl `seq` r) () rawcmms
-
--- Make a concrete info table, represented as a list of CmmStatic
--- (it can't be simply a list of Word, because the SRT field is
--- represented by a label+offset expression).
---
--- With tablesNextToCode, the layout is
---      <reversed variable part>
---      <normal forward StgInfoTable, but without
---              an entry point at the front>
---      <code>
---
--- Without tablesNextToCode, the layout of an info table is
---      <entry label>
---      <normal forward rest of StgInfoTable>
---      <forward variable part>
---
---      See includes/rts/storage/InfoTables.h
---
--- For return-points these are as follows
---
--- Tables next to code:
---
---                      <srt slot>
---                      <standard info table>
---      ret-addr -->    <entry code (if any)>
---
--- Not tables-next-to-code:
---
---      ret-addr -->    <ptr to entry code>
---                      <standard info table>
---                      <srt slot>
---
---  * The SRT slot is only there if there is SRT info to record
-
-mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl]
-mkInfoTable _ (CmmData sec dat)
-  = return [CmmData sec dat]
-
-mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
-  --
-  -- in the non-tables-next-to-code case, procs can have at most a
-  -- single info table associated with the entry label of the proc.
-  --
-  | not (tablesNextToCode dflags)
-  = case topInfoTable proc of   --  must be at most one
-      -- no info table
-      Nothing ->
-         return [CmmProc mapEmpty entry_lbl live blocks]
-
-      Just info@CmmInfoTable { cit_lbl = info_lbl } -> do
-        (top_decls, (std_info, extra_bits)) <-
-             mkInfoTableContents dflags info Nothing
-        let
-          rel_std_info   = map (makeRelativeRefTo dflags info_lbl) std_info
-          rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
-        --
-        -- Separately emit info table (with the function entry
-        -- point as first entry) and the entry code
-        --
-        return (top_decls ++
-                [CmmProc mapEmpty entry_lbl live blocks,
-                 mkRODataLits info_lbl
-                    (CmmLabel entry_lbl : rel_std_info ++ rel_extra_bits)])
-
-  --
-  -- With tables-next-to-code, we can have many info tables,
-  -- associated with some of the BlockIds of the proc.  For each info
-  -- table we need to turn it into CmmStatics, and collect any new
-  -- CmmDecls that arise from doing so.
-  --
-  | otherwise
-  = do
-    (top_declss, raw_infos) <-
-       unzip `fmap` mapM do_one_info (mapToList (info_tbls infos))
-    return (concat top_declss ++
-            [CmmProc (mapFromList raw_infos) entry_lbl live blocks])
-
-  where
-   do_one_info (lbl,itbl) = do
-     (top_decls, (std_info, extra_bits)) <-
-         mkInfoTableContents dflags itbl Nothing
-     let
-        info_lbl = cit_lbl itbl
-        rel_std_info   = map (makeRelativeRefTo dflags info_lbl) std_info
-        rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
-     --
-     return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $
-                              reverse rel_extra_bits ++ rel_std_info))
-
------------------------------------------------------
-type InfoTableContents = ( [CmmLit]          -- The standard part
-                         , [CmmLit] )        -- The "extra bits"
--- These Lits have *not* had mkRelativeTo applied to them
-
-mkInfoTableContents :: DynFlags
-                    -> CmmInfoTable
-                    -> Maybe Int               -- Override default RTS type tag?
-                    -> UniqSM ([RawCmmDecl],             -- Auxiliary top decls
-                               InfoTableContents)       -- Info tbl + extra bits
-
-mkInfoTableContents dflags
-                    info@(CmmInfoTable { cit_lbl  = info_lbl
-                                       , cit_rep  = smrep
-                                       , cit_prof = prof
-                                       , cit_srt = srt })
-                    mb_rts_tag
-  | RTSRep rts_tag rep <- smrep
-  = mkInfoTableContents dflags info{cit_rep = rep} (Just rts_tag)
-    -- Completely override the rts_tag that mkInfoTableContents would
-    -- otherwise compute, with the rts_tag stored in the RTSRep
-    -- (which in turn came from a handwritten .cmm file)
-
-  | StackRep frame <- smrep
-  = do { (prof_lits, prof_data) <- mkProfLits dflags prof
-       ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
-       ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame
-       ; let
-             std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit
-             rts_tag | Just tag <- mb_rts_tag = tag
-                     | null liveness_data     = rET_SMALL -- Fits in extra_bits
-                     | otherwise              = rET_BIG   -- Does not; extra_bits is
-                                                          -- a label
-       ; return (prof_data ++ liveness_data, (std_info, srt_label)) }
-
-  | HeapRep _ ptrs nonptrs closure_type <- smrep
-  = do { let layout  = packIntsCLit dflags ptrs nonptrs
-       ; (prof_lits, prof_data) <- mkProfLits dflags prof
-       ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
-       ; (mb_srt_field, mb_layout, extra_bits, ct_data)
-                                <- mk_pieces closure_type srt_label
-       ; let std_info = mkStdInfoTable dflags prof_lits
-                                       (mb_rts_tag   `orElse` rtsClosureType smrep)
-                                       (mb_srt_field `orElse` srt_bitmap)
-                                       (mb_layout    `orElse` layout)
-       ; return (prof_data ++ ct_data, (std_info, extra_bits)) }
-  where
-    mk_pieces :: ClosureTypeInfo -> [CmmLit]
-              -> UniqSM ( Maybe CmmLit  -- Override the SRT field with this
-                        , Maybe CmmLit  -- Override the layout field with this
-                        , [CmmLit]           -- "Extra bits" for info table
-                        , [RawCmmDecl])      -- Auxiliary data decls
-    mk_pieces (Constr con_tag con_descr) _no_srt    -- A data constructor
-      = do { (descr_lit, decl) <- newStringLit con_descr
-           ; return ( Just (CmmInt (fromIntegral con_tag)
-                                   (halfWordWidth dflags))
-                    , Nothing, [descr_lit], [decl]) }
-
-    mk_pieces Thunk srt_label
-      = return (Nothing, Nothing, srt_label, [])
-
-    mk_pieces (ThunkSelector offset) _no_srt
-      = return (Just (CmmInt 0 (halfWordWidth dflags)),
-                Just (mkWordCLit dflags (fromIntegral offset)), [], [])
-         -- Layout known (one free var); we use the layout field for offset
-
-    mk_pieces (Fun arity (ArgSpec fun_type)) srt_label
-      = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label
-           ; return (Nothing, Nothing,  extra_bits, []) }
-
-    mk_pieces (Fun arity (ArgGen arg_bits)) srt_label
-      = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits
-           ; let fun_type | null liveness_data = aRG_GEN
-                          | otherwise          = aRG_GEN_BIG
-                 extra_bits = [ packIntsCLit dflags fun_type arity ]
-                           ++ (if inlineSRT dflags then [] else [ srt_lit ])
-                           ++ [ liveness_lit, slow_entry ]
-           ; return (Nothing, Nothing, extra_bits, liveness_data) }
-      where
-        slow_entry = CmmLabel (toSlowEntryLbl info_lbl)
-        srt_lit = case srt_label of
-                    []          -> mkIntCLit dflags 0
-                    (lit:_rest) -> ASSERT( null _rest ) lit
-
-    mk_pieces other _ = pprPanic "mk_pieces" (ppr other)
-
-mkInfoTableContents _ _ _ = panic "mkInfoTableContents"   -- NonInfoTable dealt with earlier
-
-packIntsCLit :: DynFlags -> Int -> Int -> CmmLit
-packIntsCLit dflags a b = packHalfWordsCLit dflags
-                           (toStgHalfWord dflags (fromIntegral a))
-                           (toStgHalfWord dflags (fromIntegral b))
-
-
-mkSRTLit :: DynFlags
-         -> CLabel
-         -> Maybe CLabel
-         -> ([CmmLit],    -- srt_label, if any
-             CmmLit)      -- srt_bitmap
-mkSRTLit dflags info_lbl (Just lbl)
-  | inlineSRT dflags
-  = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags))
-mkSRTLit dflags _ Nothing    = ([], CmmInt 0 (halfWordWidth dflags))
-mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags))
-
-
--- | Is the SRT offset field inline in the info table on this platform?
---
--- See the section "Referring to an SRT from the info table" in
--- Note [SRTs] in CmmBuildInfoTables.hs
-inlineSRT :: DynFlags -> Bool
-inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64
-  && tablesNextToCode dflags
-
--------------------------------------------------------------------------
---
---      Lay out the info table and handle relative offsets
---
--------------------------------------------------------------------------
-
--- This function takes
---   * the standard info table portion (StgInfoTable)
---   * the "extra bits" (StgFunInfoExtraRev etc.)
---   * the entry label
---   * the code
--- and lays them out in memory, producing a list of RawCmmDecl
-
--------------------------------------------------------------------------
---
---      Position independent code
---
--------------------------------------------------------------------------
--- In order to support position independent code, we mustn't put absolute
--- references into read-only space. Info tables in the tablesNextToCode
--- case must be in .text, which is read-only, so we doctor the CmmLits
--- to use relative offsets instead.
-
--- Note that this is done even when the -fPIC flag is not specified,
--- as we want to keep binary compatibility between PIC and non-PIC.
-
-makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit
-
-makeRelativeRefTo dflags info_lbl (CmmLabel lbl)
-  | tablesNextToCode dflags
-  = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags)
-makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off)
-  | tablesNextToCode dflags
-  = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags)
-makeRelativeRefTo _ _ lit = lit
-
-
--------------------------------------------------------------------------
---
---              Build a liveness mask for the stack layout
---
--------------------------------------------------------------------------
-
--- There are four kinds of things on the stack:
---
---      - pointer variables (bound in the environment)
---      - non-pointer variables (bound in the environment)
---      - free slots (recorded in the stack free list)
---      - non-pointer data slots (recorded in the stack free list)
---
--- The first two are represented with a 'Just' of a 'LocalReg'.
--- The last two with one or more 'Nothing' constructors.
--- Each 'Nothing' represents one used word.
---
--- The head of the stack layout is the top of the stack and
--- the least-significant bit.
-
-mkLivenessBits :: DynFlags -> Liveness -> UniqSM (CmmLit, [RawCmmDecl])
-              -- ^ Returns:
-              --   1. The bitmap (literal value or label)
-              --   2. Large bitmap CmmData if needed
-
-mkLivenessBits dflags liveness
-  | n_bits > mAX_SMALL_BITMAP_SIZE dflags -- does not fit in one word
-  = do { uniq <- getUniqueM
-       ; let bitmap_lbl = mkBitmapLabel uniq
-       ; return (CmmLabel bitmap_lbl,
-                 [mkRODataLits bitmap_lbl lits]) }
-
-  | otherwise -- Fits in one word
-  = return (mkStgWordCLit dflags bitmap_word, [])
-  where
-    n_bits = length liveness
-
-    bitmap :: Bitmap
-    bitmap = mkBitmap dflags liveness
-
-    small_bitmap = case bitmap of
-                     []  -> toStgWord dflags 0
-                     [b] -> b
-                     _   -> panic "mkLiveness"
-    bitmap_word = toStgWord dflags (fromIntegral n_bits)
-              .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags)
-
-    lits = mkWordCLit dflags (fromIntegral n_bits)
-         : map (mkStgWordCLit dflags) bitmap
-      -- The first word is the size.  The structure must match
-      -- StgLargeBitmap in includes/rts/storage/InfoTable.h
-
--------------------------------------------------------------------------
---
---      Generating a standard info table
---
--------------------------------------------------------------------------
-
--- The standard bits of an info table.  This part of the info table
--- corresponds to the StgInfoTable type defined in
--- includes/rts/storage/InfoTables.h.
---
--- Its shape varies with ticky/profiling/tables next to code etc
--- so we can't use constant offsets from Constants
-
-mkStdInfoTable
-   :: DynFlags
-   -> (CmmLit,CmmLit)   -- Closure type descr and closure descr  (profiling)
-   -> Int               -- Closure RTS tag
-   -> CmmLit            -- SRT length
-   -> CmmLit            -- layout field
-   -> [CmmLit]
-
-mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit
- =      -- Parallel revertible-black hole field
-    prof_info
-        -- Ticky info (none at present)
-        -- Debug info (none at present)
- ++ [layout_lit, tag, srt]
-
- where
-    prof_info
-        | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr]
-        | otherwise = []
-
-    tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags)
-
--------------------------------------------------------------------------
---
---      Making string literals
---
--------------------------------------------------------------------------
-
-mkProfLits :: DynFlags -> ProfilingInfo -> UniqSM ((CmmLit,CmmLit), [RawCmmDecl])
-mkProfLits dflags NoProfilingInfo       = return ((zeroCLit dflags, zeroCLit dflags), [])
-mkProfLits _ (ProfilingInfo td cd)
-  = do { (td_lit, td_decl) <- newStringLit td
-       ; (cd_lit, cd_decl) <- newStringLit cd
-       ; return ((td_lit,cd_lit), [td_decl,cd_decl]) }
-
-newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
-newStringLit bytes
-  = do { uniq <- getUniqueM
-       ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
-
-
--- Misc utils
-
--- | Value of the srt field of an info table when using an StgLargeSRT
-srtEscape :: DynFlags -> StgHalfWord
-srtEscape dflags = toStgHalfWord dflags (-1)
-
--------------------------------------------------------------------------
---
---      Accessing fields of an info table
---
--------------------------------------------------------------------------
-
--- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is
--- enabled.
-wordAligned :: DynFlags -> CmmExpr -> CmmExpr
-wordAligned dflags e
-  | gopt Opt_AlignmentSanitisation dflags
-  = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e]
-  | otherwise
-  = e
-
-closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr
--- Takes a closure pointer and returns the info table pointer
-closureInfoPtr dflags e =
-    CmmLoad (wordAligned dflags e) (bWord dflags)
-
-entryCode :: DynFlags -> CmmExpr -> CmmExpr
--- Takes an info pointer (the first word of a closure)
--- and returns its entry code
-entryCode dflags e
- | tablesNextToCode dflags = e
- | otherwise               = CmmLoad e (bWord dflags)
-
-getConstrTag :: DynFlags -> CmmExpr -> CmmExpr
--- Takes a closure pointer, and return the *zero-indexed*
--- constructor tag obtained from the info table
--- This lives in the SRT field of the info table
--- (constructors don't need SRTs).
-getConstrTag dflags closure_ptr
-  = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableConstrTag dflags info_table]
-  where
-    info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
-
-cmmGetClosureType :: DynFlags -> CmmExpr -> CmmExpr
--- Takes a closure pointer, and return the closure type
--- obtained from the info table
-cmmGetClosureType dflags closure_ptr
-  = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableClosureType dflags info_table]
-  where
-    info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
-
-infoTable :: DynFlags -> CmmExpr -> CmmExpr
--- Takes an info pointer (the first word of a closure)
--- and returns a pointer to the first word of the standard-form
--- info table, excluding the entry-code word (if present)
-infoTable dflags info_ptr
-  | tablesNextToCode dflags = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags)
-  | otherwise               = cmmOffsetW dflags info_ptr 1 -- Past the entry code pointer
-
-infoTableConstrTag :: DynFlags -> CmmExpr -> CmmExpr
--- Takes an info table pointer (from infoTable) and returns the constr tag
--- field of the info table (same as the srt_bitmap field)
-infoTableConstrTag = infoTableSrtBitmap
-
-infoTableSrtBitmap :: DynFlags -> CmmExpr -> CmmExpr
--- Takes an info table pointer (from infoTable) and returns the srt_bitmap
--- field of the info table
-infoTableSrtBitmap dflags info_tbl
-  = CmmLoad (cmmOffsetB dflags info_tbl (stdSrtBitmapOffset dflags)) (bHalfWord dflags)
-
-infoTableClosureType :: DynFlags -> CmmExpr -> CmmExpr
--- Takes an info table pointer (from infoTable) and returns the closure type
--- field of the info table.
-infoTableClosureType dflags info_tbl
-  = CmmLoad (cmmOffsetB dflags info_tbl (stdClosureTypeOffset dflags)) (bHalfWord dflags)
-
-infoTablePtrs :: DynFlags -> CmmExpr -> CmmExpr
-infoTablePtrs dflags info_tbl
-  = CmmLoad (cmmOffsetB dflags info_tbl (stdPtrsOffset dflags)) (bHalfWord dflags)
-
-infoTableNonPtrs :: DynFlags -> CmmExpr -> CmmExpr
-infoTableNonPtrs dflags info_tbl
-  = CmmLoad (cmmOffsetB dflags info_tbl (stdNonPtrsOffset dflags)) (bHalfWord dflags)
-
-funInfoTable :: DynFlags -> CmmExpr -> CmmExpr
--- Takes the info pointer of a function,
--- and returns a pointer to the first word of the StgFunInfoExtra struct
--- in the info table.
-funInfoTable dflags info_ptr
-  | tablesNextToCode dflags
-  = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags - sIZEOF_StgFunInfoExtraRev dflags)
-  | otherwise
-  = cmmOffsetW dflags info_ptr (1 + stdInfoTableSizeW dflags)
-                                -- Past the entry code pointer
-
--- Takes the info pointer of a function, returns the function's arity
-funInfoArity :: DynFlags -> CmmExpr -> CmmExpr
-funInfoArity dflags iptr
-  = cmmToWord dflags (cmmLoadIndex dflags rep fun_info (offset `div` rep_bytes))
-  where
-   fun_info = funInfoTable dflags iptr
-   rep = cmmBits (widthFromBytes rep_bytes)
-
-   (rep_bytes, offset)
-    | tablesNextToCode dflags = ( pc_REP_StgFunInfoExtraRev_arity pc
-                                , oFFSET_StgFunInfoExtraRev_arity dflags )
-    | otherwise               = ( pc_REP_StgFunInfoExtraFwd_arity pc
-                                , oFFSET_StgFunInfoExtraFwd_arity dflags )
-
-   pc = platformConstants dflags
-
------------------------------------------------------------------------------
---
---      Info table sizes & offsets
---
------------------------------------------------------------------------------
-
-stdInfoTableSizeW :: DynFlags -> WordOff
--- The size of a standard info table varies with profiling/ticky etc,
--- so we can't get it from Constants
--- It must vary in sync with mkStdInfoTable
-stdInfoTableSizeW dflags
-  = fixedInfoTableSizeW
-  + if gopt Opt_SccProfilingOn dflags
-       then profInfoTableSizeW
-       else 0
-
-fixedInfoTableSizeW :: WordOff
-fixedInfoTableSizeW = 2 -- layout, type
-
-profInfoTableSizeW :: WordOff
-profInfoTableSizeW = 2
-
-maxStdInfoTableSizeW :: WordOff
-maxStdInfoTableSizeW =
-  1 {- entry, when !tablesNextToCode -}
-  + fixedInfoTableSizeW
-  + profInfoTableSizeW
-
-maxRetInfoTableSizeW :: WordOff
-maxRetInfoTableSizeW =
-  maxStdInfoTableSizeW
-  + 1 {- srt label -}
-
-stdInfoTableSizeB  :: DynFlags -> ByteOff
-stdInfoTableSizeB dflags = stdInfoTableSizeW dflags * wORD_SIZE dflags
-
-stdSrtBitmapOffset :: DynFlags -> ByteOff
--- Byte offset of the SRT bitmap half-word which is
--- in the *higher-addressed* part of the type_lit
-stdSrtBitmapOffset dflags = stdInfoTableSizeB dflags - halfWordSize dflags
-
-stdClosureTypeOffset :: DynFlags -> ByteOff
--- Byte offset of the closure type half-word
-stdClosureTypeOffset dflags = stdInfoTableSizeB dflags - wORD_SIZE dflags
-
-stdPtrsOffset, stdNonPtrsOffset :: DynFlags -> ByteOff
-stdPtrsOffset    dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags
-stdNonPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags + halfWordSize dflags
-
-conInfoTableSizeB :: DynFlags -> Int
-conInfoTableSizeB dflags = stdInfoTableSizeB dflags + wORD_SIZE dflags
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
deleted file mode 100644
index e26f2878c0..0000000000
--- a/compiler/cmm/CmmLayoutStack.hs
+++ /dev/null
@@ -1,1236 +0,0 @@
-{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-}
-module CmmLayoutStack (
-       cmmLayoutStack, setInfoTableStackMap
-  ) where
-
-import GhcPrelude hiding ((<*>))
-
-import GHC.StgToCmm.Utils      ( callerSaveVolatileRegs, newTemp  ) -- XXX layering violation
-import GHC.StgToCmm.Foreign    ( saveThreadState, loadThreadState ) -- XXX layering violation
-
-import BasicTypes
-import Cmm
-import CmmInfo
-import BlockId
-import CLabel
-import CmmUtils
-import MkGraph
-import ForeignCall
-import CmmLive
-import CmmProcPoint
-import SMRep
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Dataflow
-import Hoopl.Graph
-import Hoopl.Label
-import UniqSupply
-import Maybes
-import UniqFM
-import Util
-
-import DynFlags
-import FastString
-import Outputable hiding ( isEmpty )
-import qualified Data.Set as Set
-import Control.Monad.Fix
-import Data.Array as Array
-import Data.Bits
-import Data.List (nub)
-
-{- Note [Stack Layout]
-
-The job of this pass is to
-
- - replace references to abstract stack Areas with fixed offsets from Sp.
-
- - replace the CmmHighStackMark constant used in the stack check with
-   the maximum stack usage of the proc.
-
- - save any variables that are live across a call, and reload them as
-   necessary.
-
-Before stack allocation, local variables remain live across native
-calls (CmmCall{ cmm_cont = Just _ }), and after stack allocation local
-variables are clobbered by native calls.
-
-We want to do stack allocation so that as far as possible
- - stack use is minimized, and
- - unnecessary stack saves and loads are avoided.
-
-The algorithm we use is a variant of linear-scan register allocation,
-where the stack is our register file.
-
-We proceed in two passes, see Note [Two pass approach] for why they are not easy
-to merge into one.
-
-Pass 1:
-
- - First, we do a liveness analysis, which annotates every block with
-   the variables live on entry to the block.
-
- - We traverse blocks in reverse postorder DFS; that is, we visit at
-   least one predecessor of a block before the block itself.  The
-   stack layout flowing from the predecessor of the block will
-   determine the stack layout on entry to the block.
-
- - We maintain a data structure
-
-     Map Label StackMap
-
-   which describes the contents of the stack and the stack pointer on
-   entry to each block that is a successor of a block that we have
-   visited.
-
- - For each block we visit:
-
-    - Look up the StackMap for this block.
-
-    - If this block is a proc point (or a call continuation, if we aren't
-      splitting proc points), we need to reload all the live variables from the
-      stack - but this is done in Pass 2, which calculates more precise liveness
-      information (see description of Pass 2).
-
-    - Walk forwards through the instructions:
-      - At an assignment  x = Sp[loc]
-        - Record the fact that Sp[loc] contains x, so that we won't
-          need to save x if it ever needs to be spilled.
-      - At an assignment  x = E
-        - If x was previously on the stack, it isn't any more
-      - At the last node, if it is a call or a jump to a proc point
-        - Lay out the stack frame for the call (see setupStackFrame)
-        - emit instructions to save all the live variables
-        - Remember the StackMaps for all the successors
-        - emit an instruction to adjust Sp
-      - If the last node is a branch, then the current StackMap is the
-        StackMap for the successors.
-
-    - Manifest Sp: replace references to stack areas in this block
-      with real Sp offsets. We cannot do this until we have laid out
-      the stack area for the successors above.
-
-      In this phase we also eliminate redundant stores to the stack;
-      see elimStackStores.
-
-  - There is one important gotcha: sometimes we'll encounter a control
-    transfer to a block that we've already processed (a join point),
-    and in that case we might need to rearrange the stack to match
-    what the block is expecting. (exactly the same as in linear-scan
-    register allocation, except here we have the luxury of an infinite
-    supply of temporary variables).
-
-  - Finally, we update the magic CmmHighStackMark constant with the
-    stack usage of the function, and eliminate the whole stack check
-    if there was no stack use. (in fact this is done as part of the
-    main traversal, by feeding the high-water-mark output back in as
-    an input. I hate cyclic programming, but it's just too convenient
-    sometimes.)
-
-  There are plenty of tricky details: update frames, proc points, return
-  addresses, foreign calls, and some ad-hoc optimisations that are
-  convenient to do here and effective in common cases.  Comments in the
-  code below explain these.
-
-Pass 2:
-
-- Calculate live registers, but taking into account that nothing is live at the
-  entry to a proc point.
-
-- At each proc point and call continuation insert reloads of live registers from
-  the stack (they were saved by Pass 1).
-
-
-Note [Two pass approach]
-
-The main reason for Pass 2 is being able to insert only the reloads that are
-needed and the fact that the two passes need different liveness information.
-Let's consider an example:
-
-  .....
-   \ /
-    D   <- proc point
-   / \
-  E   F
-   \ /
-    G   <- proc point
-    |
-    X
-
-Pass 1 needs liveness assuming that local variables are preserved across calls.
-This is important because it needs to save any local registers to the stack
-(e.g., if register a is used in block X, it must be saved before any native
-call).
-However, for Pass 2, where we want to reload registers from stack (in a proc
-point), this is overly conservative and would lead us to generate reloads in D
-for things used in X, even though we're going to generate reloads in G anyway
-(since it's also a proc point).
-So Pass 2 calculates liveness knowing that nothing is live at the entry to a
-proc point. This means that in D we only need to reload things used in E or F.
-This can be quite important, for an extreme example see testcase for #3294.
-
-Merging the two passes is not trivial - Pass 2 is a backward rewrite and Pass 1
-is a forward one. Furthermore, Pass 1 is creating code that uses local registers
-(saving them before a call), which the liveness analysis for Pass 2 must see to
-be correct.
-
--}
-
-
--- All stack locations are expressed as positive byte offsets from the
--- "base", which is defined to be the address above the return address
--- on the stack on entry to this CmmProc.
---
--- Lower addresses have higher StackLocs.
---
-type StackLoc = ByteOff
-
-{-
- A StackMap describes the stack at any given point.  At a continuation
- it has a particular layout, like this:
-
-         |             | <- base
-         |-------------|
-         |     ret0    | <- base + 8
-         |-------------|
-         .  upd frame  . <- base + sm_ret_off
-         |-------------|
-         |             |
-         .    vars     .
-         . (live/dead) .
-         |             | <- base + sm_sp - sm_args
-         |-------------|
-         |    ret1     |
-         .  ret vals   . <- base + sm_sp    (<--- Sp points here)
-         |-------------|
-
-Why do we include the final return address (ret0) in our stack map?  I
-have absolutely no idea, but it seems to be done that way consistently
-in the rest of the code generator, so I played along here. --SDM
-
-Note that we will be constructing an info table for the continuation
-(ret1), which needs to describe the stack down to, but not including,
-the update frame (or ret0, if there is no update frame).
--}
-
-data StackMap = StackMap
- {  sm_sp   :: StackLoc
-       -- ^ the offset of Sp relative to the base on entry
-       -- to this block.
- ,  sm_args :: ByteOff
-       -- ^ the number of bytes of arguments in the area for this block
-       -- Defn: the offset of young(L) relative to the base is given by
-       -- (sm_sp - sm_args) of the StackMap for block L.
- ,  sm_ret_off :: ByteOff
-       -- ^ Number of words of stack that we do not describe with an info
-       -- table, because it contains an update frame.
- ,  sm_regs :: UniqFM (LocalReg,StackLoc)
-       -- ^ regs on the stack
- }
-
-instance Outputable StackMap where
-  ppr StackMap{..} =
-     text "Sp = " <> int sm_sp $$
-     text "sm_args = " <> int sm_args $$
-     text "sm_ret_off = " <> int sm_ret_off $$
-     text "sm_regs = " <> pprUFM sm_regs ppr
-
-
-cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
-               -> UniqSM (CmmGraph, LabelMap StackMap)
-cmmLayoutStack dflags procpoints entry_args
-               graph@(CmmGraph { g_entry = entry })
-  = do
-    -- We need liveness info. Dead assignments are removed later
-    -- by the sinking pass.
-    let liveness = cmmLocalLiveness dflags graph
-        blocks = revPostorder graph
-
-    (final_stackmaps, _final_high_sp, new_blocks) <-
-          mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
-            layout dflags procpoints liveness entry entry_args
-                   rec_stackmaps rec_high_sp blocks
-
-    blocks_with_reloads <-
-        insertReloadsAsNeeded dflags procpoints final_stackmaps entry new_blocks
-    new_blocks' <- mapM (lowerSafeForeignCall dflags) blocks_with_reloads
-    return (ofBlockList entry new_blocks', final_stackmaps)
-
--- -----------------------------------------------------------------------------
--- Pass 1
--- -----------------------------------------------------------------------------
-
-layout :: DynFlags
-       -> LabelSet                      -- proc points
-       -> LabelMap CmmLocalLive         -- liveness
-       -> BlockId                       -- entry
-       -> ByteOff                       -- stack args on entry
-
-       -> LabelMap StackMap             -- [final] stack maps
-       -> ByteOff                       -- [final] Sp high water mark
-
-       -> [CmmBlock]                    -- [in] blocks
-
-       -> UniqSM
-          ( LabelMap StackMap           -- [out] stack maps
-          , ByteOff                     -- [out] Sp high water mark
-          , [CmmBlock]                  -- [out] new blocks
-          )
-
-layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks
-  = go blocks init_stackmap entry_args []
-  where
-    (updfr, cont_info)  = collectContInfo blocks
-
-    init_stackmap = mapSingleton entry StackMap{ sm_sp   = entry_args
-                                               , sm_args = entry_args
-                                               , sm_ret_off = updfr
-                                               , sm_regs = emptyUFM
-                                               }
-
-    go [] acc_stackmaps acc_hwm acc_blocks
-      = return (acc_stackmaps, acc_hwm, acc_blocks)
-
-    go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
-      = do
-       let (entry0@(CmmEntry entry_lbl tscope), middle0, last0) = blockSplit b0
-
-       let stack0@StackMap { sm_sp = sp0 }
-               = mapFindWithDefault
-                     (pprPanic "no stack map for" (ppr entry_lbl))
-                     entry_lbl acc_stackmaps
-
-       -- (a) Update the stack map to include the effects of
-       --     assignments in this block
-       let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
-
-       -- (b) Look at the last node and if we are making a call or
-       --     jumping to a proc point, we must save the live
-       --     variables, adjust Sp, and construct the StackMaps for
-       --     each of the successor blocks.  See handleLastNode for
-       --     details.
-       (middle1, sp_off, last1, fixup_blocks, out)
-           <- handleLastNode dflags procpoints liveness cont_info
-                             acc_stackmaps stack1 tscope middle0 last0
-
-       -- (c) Manifest Sp: run over the nodes in the block and replace
-       --     CmmStackSlot with CmmLoad from Sp with a concrete offset.
-       --
-       -- our block:
-       --    middle0          -- the original middle nodes
-       --    middle1          -- live variable saves from handleLastNode
-       --    Sp = Sp + sp_off -- Sp adjustment goes here
-       --    last1            -- the last node
-       --
-       let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1
-
-       let final_blocks =
-               manifestSp dflags final_stackmaps stack0 sp0 final_sp_high
-                          entry0 middle_pre sp_off last1 fixup_blocks
-
-       let acc_stackmaps' = mapUnion acc_stackmaps out
-
-           -- If this block jumps to the GC, then we do not take its
-           -- stack usage into account for the high-water mark.
-           -- Otherwise, if the only stack usage is in the stack-check
-           -- failure block itself, we will do a redundant stack
-           -- check.  The stack has a buffer designed to accommodate
-           -- the largest amount of stack needed for calling the GC.
-           --
-           this_sp_hwm | isGcJump last0 = 0
-                       | otherwise      = sp0 - sp_off
-
-           hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
-
-       go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
-
-
--- -----------------------------------------------------------------------------
-
--- Not foolproof, but GCFun is the culprit we most want to catch
-isGcJump :: CmmNode O C -> Bool
-isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
-  = l == GCFun || l == GCEnter1
-isGcJump _something_else = False
-
--- -----------------------------------------------------------------------------
-
--- This doesn't seem right somehow.  We need to find out whether this
--- proc will push some update frame material at some point, so that we
--- can avoid using that area of the stack for spilling.  The
--- updfr_space field of the CmmProc *should* tell us, but it doesn't
--- (I think maybe it gets filled in later when we do proc-point
--- splitting).
---
--- So we'll just take the max of all the cml_ret_offs.  This could be
--- unnecessarily pessimistic, but probably not in the code we
--- generate.
-
-collectContInfo :: [CmmBlock] -> (ByteOff, LabelMap ByteOff)
-collectContInfo blocks
-  = (maximum ret_offs, mapFromList (catMaybes mb_argss))
- where
-  (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
-
-  get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff)
-  get_cont b =
-     case lastNode b of
-        CmmCall { cml_cont = Just l, .. }
-           -> (Just (l, cml_ret_args), cml_ret_off)
-        CmmForeignCall { .. }
-           -> (Just (succ, ret_args), ret_off)
-        _other -> (Nothing, 0)
-
-
--- -----------------------------------------------------------------------------
--- Updating the StackMap from middle nodes
-
--- Look for loads from stack slots, and update the StackMap.  This is
--- purely for optimisation reasons, so that we can avoid saving a
--- variable back to a different stack slot if it is already on the
--- stack.
---
--- This happens a lot: for example when function arguments are passed
--- on the stack and need to be immediately saved across a call, we
--- want to just leave them where they are on the stack.
---
-procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap
-procMiddle stackmaps node sm
-  = case node of
-     CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
-       -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
-        where loc = getStackLoc area off stackmaps
-     CmmAssign (CmmLocal r) _other
-       -> sm { sm_regs = delFromUFM (sm_regs sm) r }
-     _other
-       -> sm
-
-getStackLoc :: Area -> ByteOff -> LabelMap StackMap -> StackLoc
-getStackLoc Old       n _         = n
-getStackLoc (Young l) n stackmaps =
-  case mapLookup l stackmaps of
-    Nothing -> pprPanic "getStackLoc" (ppr l)
-    Just sm -> sm_sp sm - sm_args sm + n
-
-
--- -----------------------------------------------------------------------------
--- Handling stack allocation for a last node
-
--- We take a single last node and turn it into:
---
---    C1 (some statements)
---    Sp = Sp + N
---    C2 (some more statements)
---    call f()          -- the actual last node
---
--- plus possibly some more blocks (we may have to add some fixup code
--- between the last node and the continuation).
---
--- C1: is the code for saving the variables across this last node onto
--- the stack, if the continuation is a call or jumps to a proc point.
---
--- C2: if the last node is a safe foreign call, we have to inject some
--- extra code that goes *after* the Sp adjustment.
-
-handleLastNode
-   :: DynFlags -> ProcPointSet -> LabelMap CmmLocalLive -> LabelMap ByteOff
-   -> LabelMap StackMap -> StackMap -> CmmTickScope
-   -> Block CmmNode O O
-   -> CmmNode O C
-   -> UniqSM
-      ( [CmmNode O O]      -- nodes to go *before* the Sp adjustment
-      , ByteOff            -- amount to adjust Sp
-      , CmmNode O C        -- new last node
-      , [CmmBlock]         -- new blocks
-      , LabelMap StackMap  -- stackmaps for the continuations
-      )
-
-handleLastNode dflags procpoints liveness cont_info stackmaps
-               stack0@StackMap { sm_sp = sp0 } tscp middle last
- = case last of
-    --  At each return / tail call,
-    --  adjust Sp to point to the last argument pushed, which
-    --  is cml_args, after popping any other junk from the stack.
-    CmmCall{ cml_cont = Nothing, .. } -> do
-      let sp_off = sp0 - cml_args
-      return ([], sp_off, last, [], mapEmpty)
-
-    --  At each CmmCall with a continuation:
-    CmmCall{ cml_cont = Just cont_lbl, .. } ->
-       return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
-
-    CmmForeignCall{ succ = cont_lbl, .. } -> do
-       return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off
-            -- one word of args: the return address
-
-    CmmBranch {}     ->  handleBranches
-    CmmCondBranch {} ->  handleBranches
-    CmmSwitch {}     ->  handleBranches
-
-  where
-     -- Calls and ForeignCalls are handled the same way:
-     lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
-              -> ( [CmmNode O O]
-                 , ByteOff
-                 , CmmNode O C
-                 , [CmmBlock]
-                 , LabelMap StackMap
-                 )
-     lastCall lbl cml_args cml_ret_args cml_ret_off
-      =  ( assignments
-         , spOffsetForCall sp0 cont_stack cml_args
-         , last
-         , [] -- no new blocks
-         , mapSingleton lbl cont_stack )
-      where
-         (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
-
-
-     prepareStack lbl cml_ret_args cml_ret_off
-       | Just cont_stack <- mapLookup lbl stackmaps
-             -- If we have already seen this continuation before, then
-             -- we just have to make the stack look the same:
-       = (fixupStack stack0 cont_stack, cont_stack)
-             -- Otherwise, we have to allocate the stack frame
-       | otherwise
-       = (save_assignments, new_cont_stack)
-       where
-        (new_cont_stack, save_assignments)
-           = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
-
-
-     -- For other last nodes (branches), if any of the targets is a
-     -- proc point, we have to set up the stack to match what the proc
-     -- point is expecting.
-     --
-     handleBranches :: UniqSM ( [CmmNode O O]
-                                , ByteOff
-                                , CmmNode O C
-                                , [CmmBlock]
-                                , LabelMap StackMap )
-
-     handleBranches
-         -- Note [diamond proc point]
-       | Just l <- futureContinuation middle
-       , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
-       = do
-         let cont_args = mapFindWithDefault 0 l cont_info
-             (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
-             out = mapFromList [ (l', cont_stack)
-                               | l' <- successors last ]
-         return ( assigs
-                , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
-                , last
-                , []
-                , out)
-
-        | otherwise = do
-          pps <- mapM handleBranch (successors last)
-          let lbl_map :: LabelMap Label
-              lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
-              fix_lbl l = mapFindWithDefault l l lbl_map
-          return ( []
-                 , 0
-                 , mapSuccessors fix_lbl last
-                 , concat [ blk | (_,_,_,blk) <- pps ]
-                 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
-
-     -- For each successor of this block
-     handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
-     handleBranch l
-        --   (a) if the successor already has a stackmap, we need to
-        --       shuffle the current stack to make it look the same.
-        --       We have to insert a new block to make this happen.
-        | Just stack2 <- mapLookup l stackmaps
-        = do
-             let assigs = fixupStack stack0 stack2
-             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
-             return (l, tmp_lbl, stack2, block)
-
-        --   (b) if the successor is a proc point, save everything
-        --       on the stack.
-        | l `setMember` procpoints
-        = do
-             let cont_args = mapFindWithDefault 0 l cont_info
-                 (stack2, assigs) =
-                      setupStackFrame dflags l liveness (sm_ret_off stack0)
-                                                        cont_args stack0
-             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
-             return (l, tmp_lbl, stack2, block)
-
-        --   (c) otherwise, the current StackMap is the StackMap for
-        --       the continuation.  But we must remember to remove any
-        --       variables from the StackMap that are *not* live at
-        --       the destination, because this StackMap might be used
-        --       by fixupStack if this is a join point.
-        | otherwise = return (l, l, stack1, [])
-        where live = mapFindWithDefault (panic "handleBranch") l liveness
-              stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
-              is_live (r,_) = r `elemRegSet` live
-
-
-makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap
-               -> CmmTickScope -> [CmmNode O O]
-               -> UniqSM (Label, [CmmBlock])
-makeFixupBlock dflags sp0 l stack tscope assigs
-  | null assigs && sp0 == sm_sp stack = return (l, [])
-  | otherwise = do
-    tmp_lbl <- newBlockId
-    let sp_off = sp0 - sm_sp stack
-        block = blockJoin (CmmEntry tmp_lbl tscope)
-                          ( maybeAddSpAdj dflags sp0 sp_off
-                           $ blockFromList assigs )
-                          (CmmBranch l)
-    return (tmp_lbl, [block])
-
-
--- Sp is currently pointing to current_sp,
--- we want it to point to
---    (sm_sp cont_stack - sm_args cont_stack + args)
--- so the difference is
---    sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
-spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
-spOffsetForCall current_sp cont_stack args
-  = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
-
-
--- | create a sequence of assignments to establish the new StackMap,
--- given the old StackMap.
-fixupStack :: StackMap -> StackMap -> [CmmNode O O]
-fixupStack old_stack new_stack = concatMap move new_locs
- where
-     old_map  = sm_regs old_stack
-     new_locs = stackSlotRegs new_stack
-
-     move (r,n)
-       | Just (_,m) <- lookupUFM old_map r, n == m = []
-       | otherwise = [CmmStore (CmmStackSlot Old n)
-                               (CmmReg (CmmLocal r))]
-
-
-
-setupStackFrame
-             :: DynFlags
-             -> BlockId                 -- label of continuation
-             -> LabelMap CmmLocalLive   -- liveness
-             -> ByteOff      -- updfr
-             -> ByteOff      -- bytes of return values on stack
-             -> StackMap     -- current StackMap
-             -> (StackMap, [CmmNode O O])
-
-setupStackFrame dflags lbl liveness updfr_off ret_args stack0
-  = (cont_stack, assignments)
-  where
-      -- get the set of LocalRegs live in the continuation
-      live = mapFindWithDefault Set.empty lbl liveness
-
-      -- the stack from the base to updfr_off is off-limits.
-      -- our new stack frame contains:
-      --   * saved live variables
-      --   * the return address [young(C) + 8]
-      --   * the args for the call,
-      --     which are replaced by the return values at the return
-      --     point.
-
-      -- everything up to updfr_off is off-limits
-      -- stack1 contains updfr_off, plus everything we need to save
-      (stack1, assignments) = allocate dflags updfr_off live stack0
-
-      -- And the Sp at the continuation is:
-      --   sm_sp stack1 + ret_args
-      cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
-                         , sm_args = ret_args
-                         , sm_ret_off = updfr_off
-                         }
-
-
--- -----------------------------------------------------------------------------
--- Note [diamond proc point]
---
--- This special case looks for the pattern we get from a typical
--- tagged case expression:
---
---    Sp[young(L1)] = L1
---    if (R1 & 7) != 0 goto L1 else goto L2
---  L2:
---    call [R1] returns to L1
---  L1: live: {y}
---    x = R1
---
--- If we let the generic case handle this, we get
---
---    Sp[-16] = L1
---    if (R1 & 7) != 0 goto L1a else goto L2
---  L2:
---    Sp[-8] = y
---    Sp = Sp - 16
---    call [R1] returns to L1
---  L1a:
---    Sp[-8] = y
---    Sp = Sp - 16
---    goto L1
---  L1:
---    x = R1
---
--- The code for saving the live vars is duplicated in each branch, and
--- furthermore there is an extra jump in the fast path (assuming L1 is
--- a proc point, which it probably is if there is a heap check).
---
--- So to fix this we want to set up the stack frame before the
--- conditional jump.  How do we know when to do this, and when it is
--- safe?  The basic idea is, when we see the assignment
---
---   Sp[young(L)] = L
---
--- we know that
---   * we are definitely heading for L
---   * there can be no more reads from another stack area, because young(L)
---     overlaps with it.
---
--- We don't necessarily know that everything live at L is live now
--- (some might be assigned between here and the jump to L).  So we
--- simplify and only do the optimisation when we see
---
---   (1) a block containing an assignment of a return address L
---   (2) ending in a branch where one (and only) continuation goes to L,
---       and no other continuations go to proc points.
---
--- then we allocate the stack frame for L at the end of the block,
--- before the branch.
---
--- We could generalise (2), but that would make it a bit more
--- complicated to handle, and this currently catches the common case.
-
-futureContinuation :: Block CmmNode O O -> Maybe BlockId
-futureContinuation middle = foldBlockNodesB f middle Nothing
-   where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
-         f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
-               = Just l
-         f _ r = r
-
--- -----------------------------------------------------------------------------
--- Saving live registers
-
--- | Given a set of live registers and a StackMap, save all the registers
--- on the stack and return the new StackMap and the assignments to do
--- the saving.
---
-allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap
-         -> (StackMap, [CmmNode O O])
-allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
-                                              , sm_regs = regs0 }
- =
-   -- we only have to save regs that are not already in a slot
-   let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
-       regs1   = filterUFM (\(r,_) -> elemRegSet r live) regs0
-   in
-
-   -- make a map of the stack
-   let stack = reverse $ Array.elems $
-               accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
-                 ret_words ++ live_words
-            where ret_words =
-                   [ (x, Occupied)
-                   | x <- [ 1 .. toWords dflags ret_off] ]
-                  live_words =
-                   [ (toWords dflags x, Occupied)
-                   | (r,off) <- nonDetEltsUFM regs1,
-                   -- See Note [Unique Determinism and code generation]
-                     let w = localRegBytes dflags r,
-                     x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
-   in
-
-   -- Pass over the stack: find slots to save all the new live variables,
-   -- choosing the oldest slots first (hence a foldr).
-   let
-       save slot ([], stack, n, assigs, regs) -- no more regs to save
-          = ([], slot:stack, plusW dflags n 1, assigs, regs)
-       save slot (to_save, stack, n, assigs, regs)
-          = case slot of
-               Occupied ->  (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
-               Empty
-                 | Just (stack', r, to_save') <-
-                       select_save to_save (slot:stack)
-                 -> let assig = CmmStore (CmmStackSlot Old n')
-                                         (CmmReg (CmmLocal r))
-                        n' = plusW dflags n 1
-                   in
-                        (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
-
-                 | otherwise
-                 -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
-
-       -- we should do better here: right now we'll fit the smallest first,
-       -- but it would make more sense to fit the biggest first.
-       select_save :: [LocalReg] -> [StackSlot]
-                   -> Maybe ([StackSlot], LocalReg, [LocalReg])
-       select_save regs stack = go regs []
-         where go []     _no_fit = Nothing
-               go (r:rs) no_fit
-                 | Just rest <- dropEmpty words stack
-                 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
-                 | otherwise
-                 = go rs (r:no_fit)
-                 where words = localRegWords dflags r
-
-       -- fill in empty slots as much as possible
-       (still_to_save, save_stack, n, save_assigs, save_regs)
-          = foldr save (to_save, [], 0, [], []) stack
-
-       -- push any remaining live vars on the stack
-       (push_sp, push_assigs, push_regs)
-          = foldr push (n, [], []) still_to_save
-          where
-              push r (n, assigs, regs)
-                = (n', assig : assigs, (r,(r,n')) : regs)
-                where
-                  n' = n + localRegBytes dflags r
-                  assig = CmmStore (CmmStackSlot Old n')
-                                   (CmmReg (CmmLocal r))
-
-       trim_sp
-          | not (null push_regs) = push_sp
-          | otherwise
-          = plusW dflags n (- length (takeWhile isEmpty save_stack))
-
-       final_regs = regs1 `addListToUFM` push_regs
-                          `addListToUFM` save_regs
-
-   in
-  -- XXX should be an assert
-   if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
-
-   if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0  then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
-
-   ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
-   , push_assigs ++ save_assigs )
-
-
--- -----------------------------------------------------------------------------
--- Manifesting Sp
-
--- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp.  The
--- block looks like this:
---
---    middle_pre       -- the middle nodes
---    Sp = Sp + sp_off -- Sp adjustment goes here
---    last             -- the last node
---
--- And we have some extra blocks too (that don't contain Sp adjustments)
---
--- The adjustment for middle_pre will be different from that for
--- middle_post, because the Sp adjustment intervenes.
---
-manifestSp
-   :: DynFlags
-   -> LabelMap StackMap  -- StackMaps for other blocks
-   -> StackMap           -- StackMap for this block
-   -> ByteOff            -- Sp on entry to the block
-   -> ByteOff            -- SpHigh
-   -> CmmNode C O        -- first node
-   -> [CmmNode O O]      -- middle
-   -> ByteOff            -- sp_off
-   -> CmmNode O C        -- last node
-   -> [CmmBlock]         -- new blocks
-   -> [CmmBlock]         -- final blocks with Sp manifest
-
-manifestSp dflags stackmaps stack0 sp0 sp_high
-           first middle_pre sp_off last fixup_blocks
-  = final_block : fixup_blocks'
-  where
-    area_off = getAreaOff stackmaps
-
-    adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
-    adj_pre_sp  = mapExpDeep (areaToSp dflags sp0            sp_high area_off)
-    adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
-
-    final_middle = maybeAddSpAdj dflags sp0 sp_off
-                 . blockFromList
-                 . map adj_pre_sp
-                 . elimStackStores stack0 stackmaps area_off
-                 $ middle_pre
-    final_last    = optStackCheck (adj_post_sp last)
-
-    final_block   = blockJoin first final_middle final_last
-
-    fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
-
-getAreaOff :: LabelMap StackMap -> (Area -> StackLoc)
-getAreaOff _ Old = 0
-getAreaOff stackmaps (Young l) =
-  case mapLookup l stackmaps of
-    Just sm -> sm_sp sm - sm_args sm
-    Nothing -> pprPanic "getAreaOff" (ppr l)
-
-
-maybeAddSpAdj
-  :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
-maybeAddSpAdj dflags sp0 sp_off block =
-  add_initial_unwind $ add_adj_unwind $ adj block
-  where
-    adj block
-      | sp_off /= 0
-      = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off)
-      | otherwise = block
-    -- Add unwind pseudo-instruction at the beginning of each block to
-    -- document Sp level for debugging
-    add_initial_unwind block
-      | debugLevel dflags > 0
-      = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block
-      | otherwise
-      = block
-      where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags)
-
-    -- Add unwind pseudo-instruction right after the Sp adjustment
-    -- if there is one.
-    add_adj_unwind block
-      | debugLevel dflags > 0
-      , sp_off /= 0
-      = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)]
-      | otherwise
-      = block
-      where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off)
-
-{- Note [SP old/young offsets]
-
-Sp(L) is the Sp offset on entry to block L relative to the base of the
-OLD area.
-
-SpArgs(L) is the size of the young area for L, i.e. the number of
-arguments.
-
- - in block L, each reference to [old + N] turns into
-   [Sp + Sp(L) - N]
-
- - in block L, each reference to [young(L') + N] turns into
-   [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
-
- - be careful with the last node of each block: Sp has already been adjusted
-   to be Sp + Sp(L) - Sp(L')
--}
-
-areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
-
-areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n)
-  = cmmOffset dflags spExpr (sp_old - area_off area - n)
-    -- Replace (CmmStackSlot area n) with an offset from Sp
-
-areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark)
-  = mkIntExpr dflags sp_hwm
-    -- Replace CmmHighStackMark with the number of bytes of stack used,
-    -- the sp_hwm.   See Note [Stack usage] in GHC.StgToCmm.Heap
-
-areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) args)
-  | falseStackCheck args
-  = zeroExpr dflags
-areaToSp dflags _ _ _ (CmmMachOp (MO_U_Ge _) args)
-  | falseStackCheck args
-  = mkIntExpr dflags 1
-    -- Replace a stack-overflow test that cannot fail with a no-op
-    -- See Note [Always false stack check]
-
-areaToSp _ _ _ _ other = other
-
--- | Determine whether a stack check cannot fail.
-falseStackCheck :: [CmmExpr] -> Bool
-falseStackCheck [ CmmMachOp (MO_Sub _)
-                      [ CmmRegOff (CmmGlobal Sp) x_off
-                      , CmmLit (CmmInt y_lit _)]
-                , CmmReg (CmmGlobal SpLim)]
-  = fromIntegral x_off >= y_lit
-falseStackCheck _ = False
-
--- Note [Always false stack check]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- We can optimise stack checks of the form
---
---   if ((Sp + x) - y < SpLim) then .. else ..
---
--- where are non-negative integer byte offsets.  Since we know that
--- SpLim <= Sp (remember the stack grows downwards), this test must
--- yield False if (x >= y), so we can rewrite the comparison to False.
--- A subsequent sinking pass will later drop the dead code.
--- Optimising this away depends on knowing that SpLim <= Sp, so it is
--- really the job of the stack layout algorithm, hence we do it now.
---
--- The control flow optimiser may negate a conditional to increase
--- the likelihood of a fallthrough if the branch is not taken.  But
--- not every conditional is inverted as the control flow optimiser
--- places some requirements on the predecessors of both branch targets.
--- So we better look for the inverted comparison too.
-
-optStackCheck :: CmmNode O C -> CmmNode O C
-optStackCheck n = -- Note [Always false stack check]
- case n of
-   CmmCondBranch (CmmLit (CmmInt 0 _)) _true false _ -> CmmBranch false
-   CmmCondBranch (CmmLit (CmmInt _ _)) true _false _ -> CmmBranch true
-   other -> other
-
-
--- -----------------------------------------------------------------------------
-
--- | Eliminate stores of the form
---
---    Sp[area+n] = r
---
--- when we know that r is already in the same slot as Sp[area+n].  We
--- could do this in a later optimisation pass, but that would involve
--- a separate analysis and we already have the information to hand
--- here.  It helps clean up some extra stack stores in common cases.
---
--- Note that we may have to modify the StackMap as we walk through the
--- code using procMiddle, since an assignment to a variable in the
--- StackMap will invalidate its mapping there.
---
-elimStackStores :: StackMap
-                -> LabelMap StackMap
-                -> (Area -> ByteOff)
-                -> [CmmNode O O]
-                -> [CmmNode O O]
-elimStackStores stackmap stackmaps area_off nodes
-  = go stackmap nodes
-  where
-    go _stackmap [] = []
-    go stackmap (n:ns)
-     = case n of
-         CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
-            | Just (_,off) <- lookupUFM (sm_regs stackmap) r
-            , area_off area + m == off
-            -> go stackmap ns
-         _otherwise
-            -> n : go (procMiddle stackmaps n stackmap) ns
-
-
--- -----------------------------------------------------------------------------
--- Update info tables to include stack liveness
-
-
-setInfoTableStackMap :: DynFlags -> LabelMap StackMap -> CmmDecl -> CmmDecl
-setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g)
-  = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g
-  where
-    fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
-       info_tbl { cit_rep = StackRep (get_liveness lbl) }
-    fix_info _ other = other
-
-    get_liveness :: BlockId -> Liveness
-    get_liveness lbl
-      = case mapLookup lbl stackmaps of
-          Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
-          Just sm -> stackMapToLiveness dflags sm
-
-setInfoTableStackMap _ _ d = d
-
-
-stackMapToLiveness :: DynFlags -> StackMap -> Liveness
-stackMapToLiveness dflags StackMap{..} =
-   reverse $ Array.elems $
-        accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
-                                     toWords dflags (sm_sp - sm_args)) live_words
-   where
-     live_words =  [ (toWords dflags off, False)
-                   | (r,off) <- nonDetEltsUFM sm_regs
-                   , isGcPtrType (localRegType r) ]
-                   -- See Note [Unique Determinism and code generation]
-
--- -----------------------------------------------------------------------------
--- Pass 2
--- -----------------------------------------------------------------------------
-
-insertReloadsAsNeeded
-    :: DynFlags
-    -> ProcPointSet
-    -> LabelMap StackMap
-    -> BlockId
-    -> [CmmBlock]
-    -> UniqSM [CmmBlock]
-insertReloadsAsNeeded dflags procpoints final_stackmaps entry blocks = do
-    toBlockList . fst <$>
-        rewriteCmmBwd liveLattice rewriteCC (ofBlockList entry blocks) mapEmpty
-  where
-    rewriteCC :: RewriteFun CmmLocalLive
-    rewriteCC (BlockCC e_node middle0 x_node) fact_base0 = do
-        let entry_label = entryLabel e_node
-            stackmap = case mapLookup entry_label final_stackmaps of
-                Just sm -> sm
-                Nothing -> panic "insertReloadsAsNeeded: rewriteCC: stackmap"
-
-            -- Merge the liveness from successor blocks and analyse the last
-            -- node.
-            joined = gen_kill dflags x_node $!
-                         joinOutFacts liveLattice x_node fact_base0
-            -- What is live at the start of middle0.
-            live_at_middle0 = foldNodesBwdOO (gen_kill dflags) middle0 joined
-
-            -- If this is a procpoint we need to add the reloads, but only if
-            -- they're actually live. Furthermore, nothing is live at the entry
-            -- to a proc point.
-            (middle1, live_with_reloads)
-                | entry_label `setMember` procpoints
-                = let reloads = insertReloads dflags stackmap live_at_middle0
-                  in (foldr blockCons middle0 reloads, emptyRegSet)
-                | otherwise
-                = (middle0, live_at_middle0)
-
-            -- Final liveness for this block.
-            !fact_base2 = mapSingleton entry_label live_with_reloads
-
-        return (BlockCC e_node middle1 x_node, fact_base2)
-
-insertReloads :: DynFlags -> StackMap -> CmmLocalLive -> [CmmNode O O]
-insertReloads dflags stackmap live =
-     [ CmmAssign (CmmLocal reg)
-                 -- This cmmOffset basically corresponds to manifesting
-                 -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets]
-                 (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off))
-                          (localRegType reg))
-     | (reg, reg_off) <- stackSlotRegs stackmap
-     , reg `elemRegSet` live
-     ]
-   where
-     sp_off = sm_sp stackmap
-
--- -----------------------------------------------------------------------------
--- Lowering safe foreign calls
-
-{-
-Note [Lower safe foreign calls]
-
-We start with
-
-   Sp[young(L1)] = L1
- ,-----------------------
- | r1 = foo(x,y,z) returns to L1
- '-----------------------
- L1:
-   R1 = r1 -- copyIn, inserted by mkSafeCall
-   ...
-
-the stack layout algorithm will arrange to save and reload everything
-live across the call.  Our job now is to expand the call so we get
-
-   Sp[young(L1)] = L1
- ,-----------------------
- | SAVE_THREAD_STATE()
- | token = suspendThread(BaseReg, interruptible)
- | r = foo(x,y,z)
- | BaseReg = resumeThread(token)
- | LOAD_THREAD_STATE()
- | R1 = r  -- copyOut
- | jump Sp[0]
- '-----------------------
- L1:
-   r = R1 -- copyIn, inserted by mkSafeCall
-   ...
-
-Note the copyOut, which saves the results in the places that L1 is
-expecting them (see Note [safe foreign call convention]). Note also
-that safe foreign call is replace by an unsafe one in the Cmm graph.
--}
-
-lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
-lowerSafeForeignCall dflags block
-  | (entry@(CmmEntry _ tscp), middle, CmmForeignCall { .. }) <- blockSplit block
-  = do
-    -- Both 'id' and 'new_base' are KindNonPtr because they're
-    -- RTS-only objects and are not subject to garbage collection
-    id <- newTemp (bWord dflags)
-    new_base <- newTemp (cmmRegType dflags baseReg)
-    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
-    save_state_code <- saveThreadState dflags
-    load_state_code <- loadThreadState dflags
-    let suspend = save_state_code  <*>
-                  caller_save <*>
-                  mkMiddle (callSuspendThread dflags id intrbl)
-        midCall = mkUnsafeCall tgt res args
-        resume  = mkMiddle (callResumeThread new_base id) <*>
-                  -- Assign the result to BaseReg: we
-                  -- might now have a different Capability!
-                  mkAssign baseReg (CmmReg (CmmLocal new_base)) <*>
-                  caller_load <*>
-                  load_state_code
-
-        (_, regs, copyout) =
-             copyOutOflow dflags NativeReturn Jump (Young succ)
-                            (map (CmmReg . CmmLocal) res)
-                            ret_off []
-
-        -- NB. after resumeThread returns, the top-of-stack probably contains
-        -- the stack frame for succ, but it might not: if the current thread
-        -- received an exception during the call, then the stack might be
-        -- different.  Hence we continue by jumping to the top stack frame,
-        -- not by jumping to succ.
-        jump = CmmCall { cml_target    = entryCode dflags $
-                                         CmmLoad spExpr (bWord dflags)
-                       , cml_cont      = Just succ
-                       , cml_args_regs = regs
-                       , cml_args      = widthInBytes (wordWidth dflags)
-                       , cml_ret_args  = ret_args
-                       , cml_ret_off   = ret_off }
-
-    graph' <- lgraphOfAGraph ( suspend <*>
-                               midCall <*>
-                               resume  <*>
-                               copyout <*>
-                               mkLast jump, tscp)
-
-    case toBlockList graph' of
-      [one] -> let (_, middle', last) = blockSplit one
-               in return (blockJoin entry (middle `blockAppend` middle') last)
-      _ -> panic "lowerSafeForeignCall0"
-
-  -- Block doesn't end in a safe foreign call:
-  | otherwise = return block
-
-
-foreignLbl :: FastString -> CmmExpr
-foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction))
-
-callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
-callSuspendThread dflags id intrbl =
-  CmmUnsafeForeignCall
-       (ForeignTarget (foreignLbl (fsLit "suspendThread"))
-        (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
-       [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)]
-
-callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
-callResumeThread new_base id =
-  CmmUnsafeForeignCall
-       (ForeignTarget (foreignLbl (fsLit "resumeThread"))
-            (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
-       [new_base] [CmmReg (CmmLocal id)]
-
--- -----------------------------------------------------------------------------
-
-plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
-plusW dflags b w = b + w * wORD_SIZE dflags
-
-data StackSlot = Occupied | Empty
-     -- Occupied: a return address or part of an update frame
-
-instance Outputable StackSlot where
-  ppr Occupied = text "XXX"
-  ppr Empty    = text "---"
-
-dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
-dropEmpty 0 ss           = Just ss
-dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
-dropEmpty _ _            = Nothing
-
-isEmpty :: StackSlot -> Bool
-isEmpty Empty = True
-isEmpty _ = False
-
-localRegBytes :: DynFlags -> LocalReg -> ByteOff
-localRegBytes dflags r
-    = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
-
-localRegWords :: DynFlags -> LocalReg -> WordOff
-localRegWords dflags = toWords dflags . localRegBytes dflags
-
-toWords :: DynFlags -> ByteOff -> WordOff
-toWords dflags x = x `quot` wORD_SIZE dflags
-
-
-stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
-stackSlotRegs sm = nonDetEltsUFM (sm_regs sm)
-  -- See Note [Unique Determinism and code generation]
diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x
deleted file mode 100644
index 468ea00a93..0000000000
--- a/compiler/cmm/CmmLex.x
+++ /dev/null
@@ -1,368 +0,0 @@
------------------------------------------------------------------------------
---
--- (c) The University of Glasgow, 2004-2006
---
--- Lexer for concrete Cmm.  We try to stay close to the C-- spec, but there
--- are a few minor differences:
---
---   * extra keywords for our macros, and float32/float64 types
---   * global registers (Sp,Hp, etc.)
---
------------------------------------------------------------------------------
-
-{
-module CmmLex (
-   CmmToken(..), cmmlex,
-  ) where
-
-import GhcPrelude
-
-import CmmExpr
-
-import Lexer
-import CmmMonad
-import SrcLoc
-import UniqFM
-import StringBuffer
-import FastString
-import Ctype
-import Util
---import TRACE
-
-import Data.Word
-import Data.Char
-}
-
-$whitechar   = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
-$white_no_nl = $whitechar # \n
-
-$ascdigit  = 0-9
-$unidigit  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$digit     = [$ascdigit $unidigit]
-$octit     = 0-7
-$hexit     = [$digit A-F a-f]
-
-$unilarge  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
-$large     = [$asclarge $unilarge]
-
-$unismall  = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
-$small     = [$ascsmall $unismall \_]
-
-$namebegin = [$large $small \. \$ \@]
-$namechar  = [$namebegin $digit]
-
-@decimal     = $digit+
-@octal       = $octit+
-@hexadecimal = $hexit+
-@exponent    = [eE] [\-\+]? @decimal
-
-@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
-
-@escape      = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
-@strchar     = ($printable # [\"\\]) | @escape
-
-cmm :-
-
-$white_no_nl+           ;
-^\# pragma .* \n        ; -- Apple GCC 3.3 CPP generates pragmas in its output
-
-^\# (line)?             { begin line_prag }
-
--- single-line line pragmas, of the form
---    # <line> "<file>" <extra-stuff> \n
-<line_prag> $digit+                     { setLine line_prag1 }
-<line_prag1> \" [^\"]* \"       { setFile line_prag2 }
-<line_prag2> .*                         { pop }
-
-<0> {
-  \n                    ;
-
-  [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!]      { special_char }
-
-  ".."                  { kw CmmT_DotDot }
-  "::"                  { kw CmmT_DoubleColon }
-  ">>"                  { kw CmmT_Shr }
-  "<<"                  { kw CmmT_Shl }
-  ">="                  { kw CmmT_Ge }
-  "<="                  { kw CmmT_Le }
-  "=="                  { kw CmmT_Eq }
-  "!="                  { kw CmmT_Ne }
-  "&&"                  { kw CmmT_BoolAnd }
-  "||"                  { kw CmmT_BoolOr }
-
-  "True"                { kw CmmT_True  }
-  "False"               { kw CmmT_False }
-  "likely"              { kw CmmT_likely}
-
-  P@decimal             { global_regN (\n -> VanillaReg n VGcPtr) }
-  R@decimal             { global_regN (\n -> VanillaReg n VNonGcPtr) }
-  F@decimal             { global_regN FloatReg }
-  D@decimal             { global_regN DoubleReg }
-  L@decimal             { global_regN LongReg }
-  Sp                    { global_reg Sp }
-  SpLim                 { global_reg SpLim }
-  Hp                    { global_reg Hp }
-  HpLim                 { global_reg HpLim }
-  CCCS                  { global_reg CCCS }
-  CurrentTSO            { global_reg CurrentTSO }
-  CurrentNursery        { global_reg CurrentNursery }
-  HpAlloc               { global_reg HpAlloc }
-  BaseReg               { global_reg BaseReg }
-  MachSp                { global_reg MachSp }
-  UnwindReturnReg       { global_reg UnwindReturnReg }
-
-  $namebegin $namechar* { name }
-
-  0 @octal              { tok_octal }
-  @decimal              { tok_decimal }
-  0[xX] @hexadecimal    { tok_hexadecimal }
-  @floating_point       { strtoken tok_float }
-
-  \" @strchar* \"       { strtoken tok_string }
-}
-
-{
-data CmmToken
-  = CmmT_SpecChar  Char
-  | CmmT_DotDot
-  | CmmT_DoubleColon
-  | CmmT_Shr
-  | CmmT_Shl
-  | CmmT_Ge
-  | CmmT_Le
-  | CmmT_Eq
-  | CmmT_Ne
-  | CmmT_BoolAnd
-  | CmmT_BoolOr
-  | CmmT_CLOSURE
-  | CmmT_INFO_TABLE
-  | CmmT_INFO_TABLE_RET
-  | CmmT_INFO_TABLE_FUN
-  | CmmT_INFO_TABLE_CONSTR
-  | CmmT_INFO_TABLE_SELECTOR
-  | CmmT_else
-  | CmmT_export
-  | CmmT_section
-  | CmmT_goto
-  | CmmT_if
-  | CmmT_call
-  | CmmT_jump
-  | CmmT_foreign
-  | CmmT_never
-  | CmmT_prim
-  | CmmT_reserve
-  | CmmT_return
-  | CmmT_returns
-  | CmmT_import
-  | CmmT_switch
-  | CmmT_case
-  | CmmT_default
-  | CmmT_push
-  | CmmT_unwind
-  | CmmT_bits8
-  | CmmT_bits16
-  | CmmT_bits32
-  | CmmT_bits64
-  | CmmT_bits128
-  | CmmT_bits256
-  | CmmT_bits512
-  | CmmT_float32
-  | CmmT_float64
-  | CmmT_gcptr
-  | CmmT_GlobalReg GlobalReg
-  | CmmT_Name      FastString
-  | CmmT_String    String
-  | CmmT_Int       Integer
-  | CmmT_Float     Rational
-  | CmmT_EOF
-  | CmmT_False
-  | CmmT_True
-  | CmmT_likely
-  deriving (Show)
-
--- -----------------------------------------------------------------------------
--- Lexer actions
-
-type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
-
-begin :: Int -> Action
-begin code _span _str _len = do liftP (pushLexState code); lexToken
-
-pop :: Action
-pop _span _buf _len = liftP popLexState >> lexToken
-
-special_char :: Action
-special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf)))
-
-kw :: CmmToken -> Action
-kw tok span _buf _len = return (L span tok)
-
-global_regN :: (Int -> GlobalReg) -> Action
-global_regN con span buf len
-  = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
-  where buf' = stepOn buf
-        n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
-
-global_reg :: GlobalReg -> Action
-global_reg r span _buf _len = return (L span (CmmT_GlobalReg r))
-
-strtoken :: (String -> CmmToken) -> Action
-strtoken f span buf len =
-  return (L span $! (f $! lexemeToString buf len))
-
-name :: Action
-name span buf len =
-  case lookupUFM reservedWordsFM fs of
-        Just tok -> return (L span tok)
-        Nothing  -> return (L span (CmmT_Name fs))
-  where
-        fs = lexemeToFastString buf len
-
-reservedWordsFM = listToUFM $
-        map (\(x, y) -> (mkFastString x, y)) [
-        ( "CLOSURE",            CmmT_CLOSURE ),
-        ( "INFO_TABLE",         CmmT_INFO_TABLE ),
-        ( "INFO_TABLE_RET",     CmmT_INFO_TABLE_RET ),
-        ( "INFO_TABLE_FUN",     CmmT_INFO_TABLE_FUN ),
-        ( "INFO_TABLE_CONSTR",  CmmT_INFO_TABLE_CONSTR ),
-        ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
-        ( "else",               CmmT_else ),
-        ( "export",             CmmT_export ),
-        ( "section",            CmmT_section ),
-        ( "goto",               CmmT_goto ),
-        ( "if",                 CmmT_if ),
-        ( "call",               CmmT_call ),
-        ( "jump",               CmmT_jump ),
-        ( "foreign",            CmmT_foreign ),
-        ( "never",              CmmT_never ),
-        ( "prim",               CmmT_prim ),
-        ( "reserve",            CmmT_reserve ),
-        ( "return",             CmmT_return ),
-        ( "returns",            CmmT_returns ),
-        ( "import",             CmmT_import ),
-        ( "switch",             CmmT_switch ),
-        ( "case",               CmmT_case ),
-        ( "default",            CmmT_default ),
-        ( "push",               CmmT_push ),
-        ( "unwind",             CmmT_unwind ),
-        ( "bits8",              CmmT_bits8 ),
-        ( "bits16",             CmmT_bits16 ),
-        ( "bits32",             CmmT_bits32 ),
-        ( "bits64",             CmmT_bits64 ),
-        ( "bits128",            CmmT_bits128 ),
-        ( "bits256",            CmmT_bits256 ),
-        ( "bits512",            CmmT_bits512 ),
-        ( "float32",            CmmT_float32 ),
-        ( "float64",            CmmT_float64 ),
--- New forms
-        ( "b8",                 CmmT_bits8 ),
-        ( "b16",                CmmT_bits16 ),
-        ( "b32",                CmmT_bits32 ),
-        ( "b64",                CmmT_bits64 ),
-        ( "b128",               CmmT_bits128 ),
-        ( "b256",               CmmT_bits256 ),
-        ( "b512",               CmmT_bits512 ),
-        ( "f32",                CmmT_float32 ),
-        ( "f64",                CmmT_float64 ),
-        ( "gcptr",              CmmT_gcptr ),
-        ( "likely",             CmmT_likely),
-        ( "True",               CmmT_True  ),
-        ( "False",              CmmT_False )
-        ]
-
-tok_decimal span buf len
-  = return (L span (CmmT_Int  $! parseUnsignedInteger buf len 10 octDecDigit))
-
-tok_octal span buf len
-  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
-
-tok_hexadecimal span buf len
-  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
-
-tok_float str = CmmT_Float $! readRational str
-
-tok_string str = CmmT_String (read str)
-                 -- urk, not quite right, but it'll do for now
-
--- -----------------------------------------------------------------------------
--- Line pragmas
-
-setLine :: Int -> Action
-setLine code span buf len = do
-  let line = parseUnsignedInteger buf len 10 octDecDigit
-  liftP $ do
-    setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
-          -- subtract one: the line number refers to the *following* line
-    -- trace ("setLine "  ++ show line) $ do
-    popLexState >> pushLexState code
-  lexToken
-
-setFile :: Int -> Action
-setFile code span buf len = do
-  let file = lexemeToFastString (stepOn buf) (len-2)
-  liftP $ do
-    setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
-    popLexState >> pushLexState code
-  lexToken
-
--- -----------------------------------------------------------------------------
--- This is the top-level function: called from the parser each time a
--- new token is to be read from the input.
-
-cmmlex :: (Located CmmToken -> PD a) -> PD a
-cmmlex cont = do
-  (L span tok) <- lexToken
-  --trace ("token: " ++ show tok) $ do
-  cont (L (RealSrcSpan span) tok)
-
-lexToken :: PD (RealLocated CmmToken)
-lexToken = do
-  inp@(loc1,buf) <- getInput
-  sc <- liftP getLexState
-  case alexScan inp sc of
-    AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
-                  liftP (setLastToken span 0)
-                  return (L span CmmT_EOF)
-    AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
-    AlexSkip inp2 _ -> do
-        setInput inp2
-        lexToken
-    AlexToken inp2@(end,_buf2) len t -> do
-        setInput inp2
-        let span = mkRealSrcSpan loc1 end
-        span `seq` liftP (setLastToken span len)
-        t span buf len
-
--- -----------------------------------------------------------------------------
--- Monad stuff
-
--- Stuff that Alex needs to know about our input type:
-type AlexInput = (RealSrcLoc,StringBuffer)
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (_,s) = prevChar s '\n'
-
--- backwards compatibility for Alex 2.x
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar inp = case alexGetByte inp of
-                    Nothing    -> Nothing
-                    Just (b,i) -> c `seq` Just (c,i)
-                       where c = chr $ fromIntegral b
-
-alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
-alexGetByte (loc,s)
-  | atEnd s   = Nothing
-  | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
-  where c    = currentChar s
-        b    = fromIntegral $ ord $ c
-        loc' = advanceSrcLoc loc c
-        s'   = stepOn s
-
-getInput :: PD AlexInput
-getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b)
-
-setInput :: AlexInput -> PD ()
-setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } ()
-}
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
deleted file mode 100644
index 3ad65bd536..0000000000
--- a/compiler/cmm/CmmLint.hs
+++ /dev/null
@@ -1,261 +0,0 @@
------------------------------------------------------------------------------
---
--- (c) The University of Glasgow 2011
---
--- CmmLint: checking the correctness of Cmm statements and expressions
---
------------------------------------------------------------------------------
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GADTs #-}
-module CmmLint (
-    cmmLint, cmmLintGraph
-  ) where
-
-import GhcPrelude
-
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
-import Cmm
-import CmmUtils
-import CmmLive
-import CmmSwitch (switchTargetsToList)
-import PprCmm () -- For Outputable instances
-import Outputable
-import DynFlags
-
-import Control.Monad (ap)
-
--- Things to check:
---     - invariant on CmmBlock in CmmExpr (see comment there)
---     - check for branches to blocks that don't exist
---     - check types
-
--- -----------------------------------------------------------------------------
--- Exported entry points:
-
-cmmLint :: (Outputable d, Outputable h)
-        => DynFlags -> GenCmmGroup d h CmmGraph -> Maybe SDoc
-cmmLint dflags tops = runCmmLint dflags (mapM_ (lintCmmDecl dflags)) tops
-
-cmmLintGraph :: DynFlags -> CmmGraph -> Maybe SDoc
-cmmLintGraph dflags g = runCmmLint dflags (lintCmmGraph dflags) g
-
-runCmmLint :: Outputable a => DynFlags -> (a -> CmmLint b) -> a -> Maybe SDoc
-runCmmLint dflags l p =
-   case unCL (l p) dflags of
-     Left err -> Just (vcat [text "Cmm lint error:",
-                             nest 2 err,
-                             text "Program was:",
-                             nest 2 (ppr p)])
-     Right _  -> Nothing
-
-lintCmmDecl :: DynFlags -> GenCmmDecl h i CmmGraph -> CmmLint ()
-lintCmmDecl dflags (CmmProc _ lbl _ g)
-  = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph dflags g
-lintCmmDecl _ (CmmData {})
-  = return ()
-
-
-lintCmmGraph :: DynFlags -> CmmGraph -> CmmLint ()
-lintCmmGraph dflags g =
-    cmmLocalLiveness dflags g `seq` mapM_ (lintCmmBlock labels) blocks
-    -- cmmLiveness throws an error if there are registers
-    -- live on entry to the graph (i.e. undefined
-    -- variables)
-  where
-       blocks = toBlockList g
-       labels = setFromList (map entryLabel blocks)
-
-
-lintCmmBlock :: LabelSet -> CmmBlock -> CmmLint ()
-lintCmmBlock labels block
-  = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do
-        let (_, middle, last) = blockSplit block
-        mapM_ lintCmmMiddle (blockToList middle)
-        lintCmmLast labels last
-
--- -----------------------------------------------------------------------------
--- lintCmmExpr
-
--- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
--- byte/word mismatches.
-
-lintCmmExpr :: CmmExpr -> CmmLint CmmType
-lintCmmExpr (CmmLoad expr rep) = do
-  _ <- lintCmmExpr expr
-  -- Disabled, if we have the inlining phase before the lint phase,
-  -- we can have funny offsets due to pointer tagging. -- EZY
-  -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
-  --   cmmCheckWordAddress expr
-  return rep
-lintCmmExpr expr@(CmmMachOp op args) = do
-  dflags <- getDynFlags
-  tys <- mapM lintCmmExpr args
-  if map (typeWidth . cmmExprType dflags) args == machOpArgReps dflags op
-        then cmmCheckMachOp op args tys
-        else cmmLintMachOpErr expr (map (cmmExprType dflags) args) (machOpArgReps dflags op)
-lintCmmExpr (CmmRegOff reg offset)
-  = do dflags <- getDynFlags
-       let rep = typeWidth (cmmRegType dflags reg)
-       lintCmmExpr (CmmMachOp (MO_Add rep)
-                [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
-lintCmmExpr expr =
-  do dflags <- getDynFlags
-     return (cmmExprType dflags expr)
-
--- Check for some common byte/word mismatches (eg. Sp + 1)
-cmmCheckMachOp   :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType
-cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys
-  = cmmCheckMachOp op [reg, lit] tys
-cmmCheckMachOp op _ tys
-  = do dflags <- getDynFlags
-       return (machOpResultType dflags op tys)
-
-{-
-isOffsetOp :: MachOp -> Bool
-isOffsetOp (MO_Add _) = True
-isOffsetOp (MO_Sub _) = True
-isOffsetOp _ = False
-
--- This expression should be an address from which a word can be loaded:
--- check for funny-looking sub-word offsets.
-_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
-_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
-  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
-  = cmmLintDubiousWordOffset e
-_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
-  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
-  = cmmLintDubiousWordOffset e
-_cmmCheckWordAddress _
-  = return ()
-
--- No warnings for unaligned arithmetic with the node register,
--- which is used to extract fields from tagged constructor closures.
-notNodeReg :: CmmExpr -> Bool
-notNodeReg (CmmReg reg) | reg == nodeReg = False
-notNodeReg _                             = True
--}
-
-lintCmmMiddle :: CmmNode O O -> CmmLint ()
-lintCmmMiddle node = case node of
-  CmmComment _ -> return ()
-  CmmTick _    -> return ()
-  CmmUnwind{}  -> return ()
-
-  CmmAssign reg expr -> do
-            dflags <- getDynFlags
-            erep <- lintCmmExpr expr
-            let reg_ty = cmmRegType dflags reg
-            if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
-                then return ()
-                else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
-
-  CmmStore l r -> do
-            _ <- lintCmmExpr l
-            _ <- lintCmmExpr r
-            return ()
-
-  CmmUnsafeForeignCall target _formals actuals -> do
-            lintTarget target
-            mapM_ lintCmmExpr actuals
-
-
-lintCmmLast :: LabelSet -> CmmNode O C -> CmmLint ()
-lintCmmLast labels node = case node of
-  CmmBranch id -> checkTarget id
-
-  CmmCondBranch e t f _ -> do
-            dflags <- getDynFlags
-            mapM_ checkTarget [t,f]
-            _ <- lintCmmExpr e
-            checkCond dflags e
-
-  CmmSwitch e ids -> do
-            dflags <- getDynFlags
-            mapM_ checkTarget $ switchTargetsToList ids
-            erep <- lintCmmExpr e
-            if (erep `cmmEqType_ignoring_ptrhood` bWord dflags)
-              then return ()
-              else cmmLintErr (text "switch scrutinee is not a word: " <>
-                               ppr e <> text " :: " <> ppr erep)
-
-  CmmCall { cml_target = target, cml_cont = cont } -> do
-          _ <- lintCmmExpr target
-          maybe (return ()) checkTarget cont
-
-  CmmForeignCall tgt _ args succ _ _ _ -> do
-          lintTarget tgt
-          mapM_ lintCmmExpr args
-          checkTarget succ
- where
-  checkTarget id
-     | setMember id labels = return ()
-     | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
-
-
-lintTarget :: ForeignTarget -> CmmLint ()
-lintTarget (ForeignTarget e _) = lintCmmExpr e >> return ()
-lintTarget (PrimTarget {})     = return ()
-
-
-checkCond :: DynFlags -> CmmExpr -> CmmLint ()
-checkCond _ (CmmMachOp mop _) | isComparisonMachOp mop = return ()
-checkCond dflags (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth dflags = return () -- constant values
-checkCond _ expr
-    = cmmLintErr (hang (text "expression is not a conditional:") 2
-                         (ppr expr))
-
--- -----------------------------------------------------------------------------
--- CmmLint monad
-
--- just a basic error monad:
-
-newtype CmmLint a = CmmLint { unCL :: DynFlags -> Either SDoc a }
-    deriving (Functor)
-
-instance Applicative CmmLint where
-      pure a = CmmLint (\_ -> Right a)
-      (<*>) = ap
-
-instance Monad CmmLint where
-  CmmLint m >>= k = CmmLint $ \dflags ->
-                                case m dflags of
-                                Left e -> Left e
-                                Right a -> unCL (k a) dflags
-
-instance HasDynFlags CmmLint where
-    getDynFlags = CmmLint (\dflags -> Right dflags)
-
-cmmLintErr :: SDoc -> CmmLint a
-cmmLintErr msg = CmmLint (\_ -> Left msg)
-
-addLintInfo :: SDoc -> CmmLint a -> CmmLint a
-addLintInfo info thing = CmmLint $ \dflags ->
-   case unCL thing dflags of
-        Left err -> Left (hang info 2 err)
-        Right a  -> Right a
-
-cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
-cmmLintMachOpErr expr argsRep opExpectsRep
-     = cmmLintErr (text "in MachOp application: " $$
-                   nest 2 (ppr  expr) $$
-                      (text "op is expecting: " <+> ppr opExpectsRep) $$
-                      (text "arguments provide: " <+> ppr argsRep))
-
-cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a
-cmmLintAssignErr stmt e_ty r_ty
-  = cmmLintErr (text "in assignment: " $$
-                nest 2 (vcat [ppr stmt,
-                              text "Reg ty:" <+> ppr r_ty,
-                              text "Rhs ty:" <+> ppr e_ty]))
-
-
-{-
-cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
-cmmLintDubiousWordOffset expr
-   = cmmLintErr (text "offset is not a multiple of words: " $$
-                 nest 2 (ppr expr))
--}
-
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
deleted file mode 100644
index ca474ef61c..0000000000
--- a/compiler/cmm/CmmLive.hs
+++ /dev/null
@@ -1,93 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
-module CmmLive
-    ( CmmLocalLive
-    , cmmLocalLiveness
-    , cmmGlobalLiveness
-    , liveLattice
-    , gen_kill
-    )
-where
-
-import GhcPrelude
-
-import DynFlags
-import BlockId
-import Cmm
-import PprCmmExpr () -- For Outputable instances
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Dataflow
-import Hoopl.Label
-
-import Maybes
-import Outputable
-
------------------------------------------------------------------------------
--- Calculating what variables are live on entry to a basic block
------------------------------------------------------------------------------
-
--- | The variables live on entry to a block
-type CmmLive r = RegSet r
-type CmmLocalLive = CmmLive LocalReg
-
--- | The dataflow lattice
-liveLattice :: Ord r => DataflowLattice (CmmLive r)
-{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive LocalReg) #-}
-{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive GlobalReg) #-}
-liveLattice = DataflowLattice emptyRegSet add
-  where
-    add (OldFact old) (NewFact new) =
-        let !join = plusRegSet old new
-        in changedIf (sizeRegSet join > sizeRegSet old) join
-
--- | A mapping from block labels to the variables live on entry
-type BlockEntryLiveness r = LabelMap (CmmLive r)
-
------------------------------------------------------------------------------
--- | Calculated liveness info for a CmmGraph
------------------------------------------------------------------------------
-
-cmmLocalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness LocalReg
-cmmLocalLiveness dflags graph =
-    check $ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
-  where
-    entry = g_entry graph
-    check facts =
-        noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
-
-cmmGlobalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness GlobalReg
-cmmGlobalLiveness dflags graph =
-    analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
-
--- | On entry to the procedure, there had better not be any LocalReg's live-in.
-noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a
-noLiveOnEntry bid in_fact x =
-  if nullRegSet in_fact then x
-  else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
-
-gen_kill
-    :: (DefinerOfRegs r n, UserOfRegs r n)
-    => DynFlags -> n -> CmmLive r -> CmmLive r
-gen_kill dflags node set =
-    let !afterKill = foldRegsDefd dflags deleteFromRegSet set node
-    in foldRegsUsed dflags extendRegSet afterKill node
-{-# INLINE gen_kill #-}
-
-xferLive
-    :: forall r.
-       ( UserOfRegs r (CmmNode O O)
-       , DefinerOfRegs r (CmmNode O O)
-       , UserOfRegs r (CmmNode O C)
-       , DefinerOfRegs r (CmmNode O C)
-       )
-    => DynFlags -> TransferFun (CmmLive r)
-xferLive dflags (BlockCC eNode middle xNode) fBase =
-    let joined = gen_kill dflags xNode $! joinOutFacts liveLattice xNode fBase
-        !result = foldNodesBwdOO (gen_kill dflags) middle joined
-    in mapSingleton (entryLabel eNode) result
-{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive LocalReg) #-}
-{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive GlobalReg) #-}
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
deleted file mode 100644
index 418ebec13f..0000000000
--- a/compiler/cmm/CmmMachOp.hs
+++ /dev/null
@@ -1,664 +0,0 @@
-module CmmMachOp
-    ( MachOp(..)
-    , pprMachOp, isCommutableMachOp, isAssociativeMachOp
-    , isComparisonMachOp, maybeIntComparison, machOpResultType
-    , machOpArgReps, maybeInvertComparison, isFloatComparison
-
-    -- MachOp builders
-    , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
-    , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
-    , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
-    , mo_wordULe, mo_wordUGt, mo_wordULt
-    , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot
-    , mo_wordShl, mo_wordSShr, mo_wordUShr
-    , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
-    , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord
-    , mo_u_32ToWord, mo_s_32ToWord
-    , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
-
-    -- CallishMachOp
-    , CallishMachOp(..), callishMachOpHints
-    , pprCallishMachOp
-    , machOpMemcpyishAlign
-
-    -- Atomic read-modify-write
-    , AtomicMachOp(..)
-   )
-where
-
-import GhcPrelude
-
-import CmmType
-import Outputable
-import DynFlags
-
------------------------------------------------------------------------------
---              MachOp
------------------------------------------------------------------------------
-
-{- |
-Machine-level primops; ones which we can reasonably delegate to the
-native code generators to handle.
-
-Most operations are parameterised by the 'Width' that they operate on.
-Some operations have separate signed and unsigned versions, and float
-and integer versions.
--}
-
-data MachOp
-  -- Integer operations (insensitive to signed/unsigned)
-  = MO_Add Width
-  | MO_Sub Width
-  | MO_Eq  Width
-  | MO_Ne  Width
-  | MO_Mul Width                -- low word of multiply
-
-  -- Signed multiply/divide
-  | MO_S_MulMayOflo Width       -- nonzero if signed multiply overflows
-  | MO_S_Quot Width             -- signed / (same semantics as IntQuotOp)
-  | MO_S_Rem  Width             -- signed % (same semantics as IntRemOp)
-  | MO_S_Neg  Width             -- unary -
-
-  -- Unsigned multiply/divide
-  | MO_U_MulMayOflo Width       -- nonzero if unsigned multiply overflows
-  | MO_U_Quot Width             -- unsigned / (same semantics as WordQuotOp)
-  | MO_U_Rem  Width             -- unsigned % (same semantics as WordRemOp)
-
-  -- Signed comparisons
-  | MO_S_Ge Width
-  | MO_S_Le Width
-  | MO_S_Gt Width
-  | MO_S_Lt Width
-
-  -- Unsigned comparisons
-  | MO_U_Ge Width
-  | MO_U_Le Width
-  | MO_U_Gt Width
-  | MO_U_Lt Width
-
-  -- Floating point arithmetic
-  | MO_F_Add  Width
-  | MO_F_Sub  Width
-  | MO_F_Neg  Width             -- unary -
-  | MO_F_Mul  Width
-  | MO_F_Quot Width
-
-  -- Floating point comparison
-  | MO_F_Eq Width
-  | MO_F_Ne Width
-  | MO_F_Ge Width
-  | MO_F_Le Width
-  | MO_F_Gt Width
-  | MO_F_Lt Width
-
-  -- Bitwise operations.  Not all of these may be supported
-  -- at all sizes, and only integral Widths are valid.
-  | MO_And   Width
-  | MO_Or    Width
-  | MO_Xor   Width
-  | MO_Not   Width
-  | MO_Shl   Width
-  | MO_U_Shr Width      -- unsigned shift right
-  | MO_S_Shr Width      -- signed shift right
-
-  -- Conversions.  Some of these will be NOPs.
-  -- Floating-point conversions use the signed variant.
-  | MO_SF_Conv Width Width      -- Signed int -> Float
-  | MO_FS_Conv Width Width      -- Float -> Signed int
-  | MO_SS_Conv Width Width      -- Signed int -> Signed int
-  | MO_UU_Conv Width Width      -- unsigned int -> unsigned int
-  | MO_XX_Conv Width Width      -- int -> int; puts no requirements on the
-                                -- contents of upper bits when extending;
-                                -- narrowing is simply truncation; the only
-                                -- expectation is that we can recover the
-                                -- original value by applying the opposite
-                                -- MO_XX_Conv, e.g.,
-                                --   MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x)
-                                -- is equivalent to just x.
-  | MO_FF_Conv Width Width      -- Float -> Float
-
-  -- Vector element insertion and extraction operations
-  | MO_V_Insert  Length Width   -- Insert scalar into vector
-  | MO_V_Extract Length Width   -- Extract scalar from vector
-
-  -- Integer vector operations
-  | MO_V_Add Length Width
-  | MO_V_Sub Length Width
-  | MO_V_Mul Length Width
-
-  -- Signed vector multiply/divide
-  | MO_VS_Quot Length Width
-  | MO_VS_Rem  Length Width
-  | MO_VS_Neg  Length Width
-
-  -- Unsigned vector multiply/divide
-  | MO_VU_Quot Length Width
-  | MO_VU_Rem  Length Width
-
-  -- Floating point vector element insertion and extraction operations
-  | MO_VF_Insert  Length Width   -- Insert scalar into vector
-  | MO_VF_Extract Length Width   -- Extract scalar from vector
-
-  -- Floating point vector operations
-  | MO_VF_Add  Length Width
-  | MO_VF_Sub  Length Width
-  | MO_VF_Neg  Length Width      -- unary negation
-  | MO_VF_Mul  Length Width
-  | MO_VF_Quot Length Width
-
-  -- Alignment check (for -falignment-sanitisation)
-  | MO_AlignmentCheck Int Width
-  deriving (Eq, Show)
-
-pprMachOp :: MachOp -> SDoc
-pprMachOp mo = text (show mo)
-
-
-
--- -----------------------------------------------------------------------------
--- Some common MachReps
-
--- A 'wordRep' is a machine word on the target architecture
--- Specifically, it is the size of an Int#, Word#, Addr#
--- and the unit of allocation on the stack and the heap
--- Any pointer is also guaranteed to be a wordRep.
-
-mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
-    , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
-    , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
-    , mo_wordULe, mo_wordUGt, mo_wordULt
-    , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
-    , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
-    , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
-    :: DynFlags -> MachOp
-
-mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
-    , mo_32To8, mo_32To16
-    :: MachOp
-
-mo_wordAdd      dflags = MO_Add (wordWidth dflags)
-mo_wordSub      dflags = MO_Sub (wordWidth dflags)
-mo_wordEq       dflags = MO_Eq  (wordWidth dflags)
-mo_wordNe       dflags = MO_Ne  (wordWidth dflags)
-mo_wordMul      dflags = MO_Mul (wordWidth dflags)
-mo_wordSQuot    dflags = MO_S_Quot (wordWidth dflags)
-mo_wordSRem     dflags = MO_S_Rem (wordWidth dflags)
-mo_wordSNeg     dflags = MO_S_Neg (wordWidth dflags)
-mo_wordUQuot    dflags = MO_U_Quot (wordWidth dflags)
-mo_wordURem     dflags = MO_U_Rem (wordWidth dflags)
-
-mo_wordSGe      dflags = MO_S_Ge  (wordWidth dflags)
-mo_wordSLe      dflags = MO_S_Le  (wordWidth dflags)
-mo_wordSGt      dflags = MO_S_Gt  (wordWidth dflags)
-mo_wordSLt      dflags = MO_S_Lt  (wordWidth dflags)
-
-mo_wordUGe      dflags = MO_U_Ge  (wordWidth dflags)
-mo_wordULe      dflags = MO_U_Le  (wordWidth dflags)
-mo_wordUGt      dflags = MO_U_Gt  (wordWidth dflags)
-mo_wordULt      dflags = MO_U_Lt  (wordWidth dflags)
-
-mo_wordAnd      dflags = MO_And (wordWidth dflags)
-mo_wordOr       dflags = MO_Or  (wordWidth dflags)
-mo_wordXor      dflags = MO_Xor (wordWidth dflags)
-mo_wordNot      dflags = MO_Not (wordWidth dflags)
-mo_wordShl      dflags = MO_Shl (wordWidth dflags)
-mo_wordSShr     dflags = MO_S_Shr (wordWidth dflags)
-mo_wordUShr     dflags = MO_U_Shr (wordWidth dflags)
-
-mo_u_8To32             = MO_UU_Conv W8 W32
-mo_s_8To32             = MO_SS_Conv W8 W32
-mo_u_16To32            = MO_UU_Conv W16 W32
-mo_s_16To32            = MO_SS_Conv W16 W32
-
-mo_u_8ToWord    dflags = MO_UU_Conv W8  (wordWidth dflags)
-mo_s_8ToWord    dflags = MO_SS_Conv W8  (wordWidth dflags)
-mo_u_16ToWord   dflags = MO_UU_Conv W16 (wordWidth dflags)
-mo_s_16ToWord   dflags = MO_SS_Conv W16 (wordWidth dflags)
-mo_s_32ToWord   dflags = MO_SS_Conv W32 (wordWidth dflags)
-mo_u_32ToWord   dflags = MO_UU_Conv W32 (wordWidth dflags)
-
-mo_WordTo8      dflags = MO_UU_Conv (wordWidth dflags) W8
-mo_WordTo16     dflags = MO_UU_Conv (wordWidth dflags) W16
-mo_WordTo32     dflags = MO_UU_Conv (wordWidth dflags) W32
-mo_WordTo64     dflags = MO_UU_Conv (wordWidth dflags) W64
-
-mo_32To8               = MO_UU_Conv W32 W8
-mo_32To16              = MO_UU_Conv W32 W16
-
-
--- ----------------------------------------------------------------------------
--- isCommutableMachOp
-
-{- |
-Returns 'True' if the MachOp has commutable arguments.  This is used
-in the platform-independent Cmm optimisations.
-
-If in doubt, return 'False'.  This generates worse code on the
-native routes, but is otherwise harmless.
--}
-isCommutableMachOp :: MachOp -> Bool
-isCommutableMachOp mop =
-  case mop of
-        MO_Add _                -> True
-        MO_Eq _                 -> True
-        MO_Ne _                 -> True
-        MO_Mul _                -> True
-        MO_S_MulMayOflo _       -> True
-        MO_U_MulMayOflo _       -> True
-        MO_And _                -> True
-        MO_Or _                 -> True
-        MO_Xor _                -> True
-        MO_F_Add _              -> True
-        MO_F_Mul _              -> True
-        _other                  -> False
-
--- ----------------------------------------------------------------------------
--- isAssociativeMachOp
-
-{- |
-Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
-This is used in the platform-independent Cmm optimisations.
-
-If in doubt, return 'False'.  This generates worse code on the
-native routes, but is otherwise harmless.
--}
-isAssociativeMachOp :: MachOp -> Bool
-isAssociativeMachOp mop =
-  case mop of
-        MO_Add {} -> True       -- NB: does not include
-        MO_Mul {} -> True --     floatint point!
-        MO_And {} -> True
-        MO_Or  {} -> True
-        MO_Xor {} -> True
-        _other    -> False
-
-
--- ----------------------------------------------------------------------------
--- isComparisonMachOp
-
-{- |
-Returns 'True' if the MachOp is a comparison.
-
-If in doubt, return False.  This generates worse code on the
-native routes, but is otherwise harmless.
--}
-isComparisonMachOp :: MachOp -> Bool
-isComparisonMachOp mop =
-  case mop of
-    MO_Eq   _  -> True
-    MO_Ne   _  -> True
-    MO_S_Ge _  -> True
-    MO_S_Le _  -> True
-    MO_S_Gt _  -> True
-    MO_S_Lt _  -> True
-    MO_U_Ge _  -> True
-    MO_U_Le _  -> True
-    MO_U_Gt _  -> True
-    MO_U_Lt _  -> True
-    MO_F_Eq {} -> True
-    MO_F_Ne {} -> True
-    MO_F_Ge {} -> True
-    MO_F_Le {} -> True
-    MO_F_Gt {} -> True
-    MO_F_Lt {} -> True
-    _other     -> False
-
-{- |
-Returns @Just w@ if the operation is an integer comparison with width
-@w@, or @Nothing@ otherwise.
--}
-maybeIntComparison :: MachOp -> Maybe Width
-maybeIntComparison mop =
-  case mop of
-    MO_Eq   w  -> Just w
-    MO_Ne   w  -> Just w
-    MO_S_Ge w  -> Just w
-    MO_S_Le w  -> Just w
-    MO_S_Gt w  -> Just w
-    MO_S_Lt w  -> Just w
-    MO_U_Ge w  -> Just w
-    MO_U_Le w  -> Just w
-    MO_U_Gt w  -> Just w
-    MO_U_Lt w  -> Just w
-    _ -> Nothing
-
-isFloatComparison :: MachOp -> Bool
-isFloatComparison mop =
-  case mop of
-    MO_F_Eq {} -> True
-    MO_F_Ne {} -> True
-    MO_F_Ge {} -> True
-    MO_F_Le {} -> True
-    MO_F_Gt {} -> True
-    MO_F_Lt {} -> True
-    _other     -> False
-
--- -----------------------------------------------------------------------------
--- Inverting conditions
-
--- Sometimes it's useful to be able to invert the sense of a
--- condition.  Not all conditional tests are invertible: in
--- particular, floating point conditionals cannot be inverted, because
--- there exist floating-point values which return False for both senses
--- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
-
-maybeInvertComparison :: MachOp -> Maybe MachOp
-maybeInvertComparison op
-  = case op of  -- None of these Just cases include floating point
-        MO_Eq r   -> Just (MO_Ne r)
-        MO_Ne r   -> Just (MO_Eq r)
-        MO_U_Lt r -> Just (MO_U_Ge r)
-        MO_U_Gt r -> Just (MO_U_Le r)
-        MO_U_Le r -> Just (MO_U_Gt r)
-        MO_U_Ge r -> Just (MO_U_Lt r)
-        MO_S_Lt r -> Just (MO_S_Ge r)
-        MO_S_Gt r -> Just (MO_S_Le r)
-        MO_S_Le r -> Just (MO_S_Gt r)
-        MO_S_Ge r -> Just (MO_S_Lt r)
-        _other    -> Nothing
-
--- ----------------------------------------------------------------------------
--- machOpResultType
-
-{- |
-Returns the MachRep of the result of a MachOp.
--}
-machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType
-machOpResultType dflags mop tys =
-  case mop of
-    MO_Add {}           -> ty1  -- Preserve GC-ptr-hood
-    MO_Sub {}           -> ty1  -- of first arg
-    MO_Mul    r         -> cmmBits r
-    MO_S_MulMayOflo r   -> cmmBits r
-    MO_S_Quot r         -> cmmBits r
-    MO_S_Rem  r         -> cmmBits r
-    MO_S_Neg  r         -> cmmBits r
-    MO_U_MulMayOflo r   -> cmmBits r
-    MO_U_Quot r         -> cmmBits r
-    MO_U_Rem  r         -> cmmBits r
-
-    MO_Eq {}            -> comparisonResultRep dflags
-    MO_Ne {}            -> comparisonResultRep dflags
-    MO_S_Ge {}          -> comparisonResultRep dflags
-    MO_S_Le {}          -> comparisonResultRep dflags
-    MO_S_Gt {}          -> comparisonResultRep dflags
-    MO_S_Lt {}          -> comparisonResultRep dflags
-
-    MO_U_Ge {}          -> comparisonResultRep dflags
-    MO_U_Le {}          -> comparisonResultRep dflags
-    MO_U_Gt {}          -> comparisonResultRep dflags
-    MO_U_Lt {}          -> comparisonResultRep dflags
-
-    MO_F_Add r          -> cmmFloat r
-    MO_F_Sub r          -> cmmFloat r
-    MO_F_Mul r          -> cmmFloat r
-    MO_F_Quot r         -> cmmFloat r
-    MO_F_Neg r          -> cmmFloat r
-    MO_F_Eq  {}         -> comparisonResultRep dflags
-    MO_F_Ne  {}         -> comparisonResultRep dflags
-    MO_F_Ge  {}         -> comparisonResultRep dflags
-    MO_F_Le  {}         -> comparisonResultRep dflags
-    MO_F_Gt  {}         -> comparisonResultRep dflags
-    MO_F_Lt  {}         -> comparisonResultRep dflags
-
-    MO_And {}           -> ty1  -- Used for pointer masking
-    MO_Or {}            -> ty1
-    MO_Xor {}           -> ty1
-    MO_Not   r          -> cmmBits r
-    MO_Shl   r          -> cmmBits r
-    MO_U_Shr r          -> cmmBits r
-    MO_S_Shr r          -> cmmBits r
-
-    MO_SS_Conv _ to     -> cmmBits to
-    MO_UU_Conv _ to     -> cmmBits to
-    MO_XX_Conv _ to     -> cmmBits to
-    MO_FS_Conv _ to     -> cmmBits to
-    MO_SF_Conv _ to     -> cmmFloat to
-    MO_FF_Conv _ to     -> cmmFloat to
-
-    MO_V_Insert  l w    -> cmmVec l (cmmBits w)
-    MO_V_Extract _ w    -> cmmBits w
-
-    MO_V_Add l w        -> cmmVec l (cmmBits w)
-    MO_V_Sub l w        -> cmmVec l (cmmBits w)
-    MO_V_Mul l w        -> cmmVec l (cmmBits w)
-
-    MO_VS_Quot l w      -> cmmVec l (cmmBits w)
-    MO_VS_Rem  l w      -> cmmVec l (cmmBits w)
-    MO_VS_Neg  l w      -> cmmVec l (cmmBits w)
-
-    MO_VU_Quot l w      -> cmmVec l (cmmBits w)
-    MO_VU_Rem  l w      -> cmmVec l (cmmBits w)
-
-    MO_VF_Insert  l w   -> cmmVec l (cmmFloat w)
-    MO_VF_Extract _ w   -> cmmFloat w
-
-    MO_VF_Add  l w      -> cmmVec l (cmmFloat w)
-    MO_VF_Sub  l w      -> cmmVec l (cmmFloat w)
-    MO_VF_Mul  l w      -> cmmVec l (cmmFloat w)
-    MO_VF_Quot l w      -> cmmVec l (cmmFloat w)
-    MO_VF_Neg  l w      -> cmmVec l (cmmFloat w)
-
-    MO_AlignmentCheck _ _ -> ty1
-  where
-    (ty1:_) = tys
-
-comparisonResultRep :: DynFlags -> CmmType
-comparisonResultRep = bWord  -- is it?
-
-
--- -----------------------------------------------------------------------------
--- machOpArgReps
-
--- | This function is used for debugging only: we can check whether an
--- application of a MachOp is "type-correct" by checking that the MachReps of
--- its arguments are the same as the MachOp expects.  This is used when
--- linting a CmmExpr.
-
-machOpArgReps :: DynFlags -> MachOp -> [Width]
-machOpArgReps dflags op =
-  case op of
-    MO_Add    r         -> [r,r]
-    MO_Sub    r         -> [r,r]
-    MO_Eq     r         -> [r,r]
-    MO_Ne     r         -> [r,r]
-    MO_Mul    r         -> [r,r]
-    MO_S_MulMayOflo r   -> [r,r]
-    MO_S_Quot r         -> [r,r]
-    MO_S_Rem  r         -> [r,r]
-    MO_S_Neg  r         -> [r]
-    MO_U_MulMayOflo r   -> [r,r]
-    MO_U_Quot r         -> [r,r]
-    MO_U_Rem  r         -> [r,r]
-
-    MO_S_Ge r           -> [r,r]
-    MO_S_Le r           -> [r,r]
-    MO_S_Gt r           -> [r,r]
-    MO_S_Lt r           -> [r,r]
-
-    MO_U_Ge r           -> [r,r]
-    MO_U_Le r           -> [r,r]
-    MO_U_Gt r           -> [r,r]
-    MO_U_Lt r           -> [r,r]
-
-    MO_F_Add r          -> [r,r]
-    MO_F_Sub r          -> [r,r]
-    MO_F_Mul r          -> [r,r]
-    MO_F_Quot r         -> [r,r]
-    MO_F_Neg r          -> [r]
-    MO_F_Eq  r          -> [r,r]
-    MO_F_Ne  r          -> [r,r]
-    MO_F_Ge  r          -> [r,r]
-    MO_F_Le  r          -> [r,r]
-    MO_F_Gt  r          -> [r,r]
-    MO_F_Lt  r          -> [r,r]
-
-    MO_And   r          -> [r,r]
-    MO_Or    r          -> [r,r]
-    MO_Xor   r          -> [r,r]
-    MO_Not   r          -> [r]
-    MO_Shl   r          -> [r, wordWidth dflags]
-    MO_U_Shr r          -> [r, wordWidth dflags]
-    MO_S_Shr r          -> [r, wordWidth dflags]
-
-    MO_SS_Conv from _   -> [from]
-    MO_UU_Conv from _   -> [from]
-    MO_XX_Conv from _   -> [from]
-    MO_SF_Conv from _   -> [from]
-    MO_FS_Conv from _   -> [from]
-    MO_FF_Conv from _   -> [from]
-
-    MO_V_Insert  l r    -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
-    MO_V_Extract l r    -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
-
-    MO_V_Add _ r        -> [r,r]
-    MO_V_Sub _ r        -> [r,r]
-    MO_V_Mul _ r        -> [r,r]
-
-    MO_VS_Quot _ r      -> [r,r]
-    MO_VS_Rem  _ r      -> [r,r]
-    MO_VS_Neg  _ r      -> [r]
-
-    MO_VU_Quot _ r      -> [r,r]
-    MO_VU_Rem  _ r      -> [r,r]
-
-    MO_VF_Insert  l r   -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
-    MO_VF_Extract l r   -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
-
-    MO_VF_Add  _ r      -> [r,r]
-    MO_VF_Sub  _ r      -> [r,r]
-    MO_VF_Mul  _ r      -> [r,r]
-    MO_VF_Quot _ r      -> [r,r]
-    MO_VF_Neg  _ r      -> [r]
-
-    MO_AlignmentCheck _ r -> [r]
-
------------------------------------------------------------------------------
--- CallishMachOp
------------------------------------------------------------------------------
-
--- CallishMachOps tend to be implemented by foreign calls in some backends,
--- so we separate them out.  In Cmm, these can only occur in a
--- statement position, in contrast to an ordinary MachOp which can occur
--- anywhere in an expression.
-data CallishMachOp
-  = MO_F64_Pwr
-  | MO_F64_Sin
-  | MO_F64_Cos
-  | MO_F64_Tan
-  | MO_F64_Sinh
-  | MO_F64_Cosh
-  | MO_F64_Tanh
-  | MO_F64_Asin
-  | MO_F64_Acos
-  | MO_F64_Atan
-  | MO_F64_Asinh
-  | MO_F64_Acosh
-  | MO_F64_Atanh
-  | MO_F64_Log
-  | MO_F64_Log1P
-  | MO_F64_Exp
-  | MO_F64_ExpM1
-  | MO_F64_Fabs
-  | MO_F64_Sqrt
-  | MO_F32_Pwr
-  | MO_F32_Sin
-  | MO_F32_Cos
-  | MO_F32_Tan
-  | MO_F32_Sinh
-  | MO_F32_Cosh
-  | MO_F32_Tanh
-  | MO_F32_Asin
-  | MO_F32_Acos
-  | MO_F32_Atan
-  | MO_F32_Asinh
-  | MO_F32_Acosh
-  | MO_F32_Atanh
-  | MO_F32_Log
-  | MO_F32_Log1P
-  | MO_F32_Exp
-  | MO_F32_ExpM1
-  | MO_F32_Fabs
-  | MO_F32_Sqrt
-
-  | MO_UF_Conv Width
-
-  | MO_S_Mul2    Width
-  | MO_S_QuotRem Width
-  | MO_U_QuotRem Width
-  | MO_U_QuotRem2 Width
-  | MO_Add2      Width
-  | MO_AddWordC  Width
-  | MO_SubWordC  Width
-  | MO_AddIntC   Width
-  | MO_SubIntC   Width
-  | MO_U_Mul2    Width
-
-  | MO_ReadBarrier
-  | MO_WriteBarrier
-  | MO_Touch         -- Keep variables live (when using interior pointers)
-
-  -- Prefetch
-  | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not
-                     -- program behavior.
-                     -- the Int can be 0-3. Needs to be known at compile time
-                     -- to interact with code generation correctly.
-                     --  TODO: add support for prefetch WRITES,
-                     --  currently only exposes prefetch reads, which
-                     -- would the majority of use cases in ghc anyways
-
-
-  -- These three MachOps are parameterised by the known alignment
-  -- of the destination and source (for memcpy/memmove) pointers.
-  -- This information may be used for optimisation in backends.
-  | MO_Memcpy Int
-  | MO_Memset Int
-  | MO_Memmove Int
-  | MO_Memcmp Int
-
-  | MO_PopCnt Width
-  | MO_Pdep Width
-  | MO_Pext Width
-  | MO_Clz Width
-  | MO_Ctz Width
-
-  | MO_BSwap Width
-  | MO_BRev Width
-
-  -- Atomic read-modify-write.
-  | MO_AtomicRMW Width AtomicMachOp
-  | MO_AtomicRead Width
-  | MO_AtomicWrite Width
-  | MO_Cmpxchg Width
-  deriving (Eq, Show)
-
--- | The operation to perform atomically.
-data AtomicMachOp =
-      AMO_Add
-    | AMO_Sub
-    | AMO_And
-    | AMO_Nand
-    | AMO_Or
-    | AMO_Xor
-      deriving (Eq, Show)
-
-pprCallishMachOp :: CallishMachOp -> SDoc
-pprCallishMachOp mo = text (show mo)
-
-callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
-callishMachOpHints op = case op of
-  MO_Memcpy _  -> ([], [AddrHint,AddrHint,NoHint])
-  MO_Memset _  -> ([], [AddrHint,NoHint,NoHint])
-  MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
-  MO_Memcmp _  -> ([], [AddrHint, AddrHint, NoHint])
-  _            -> ([],[])
-  -- empty lists indicate NoHint
-
--- | The alignment of a 'memcpy'-ish operation.
-machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
-machOpMemcpyishAlign op = case op of
-  MO_Memcpy  align -> Just align
-  MO_Memset  align -> Just align
-  MO_Memmove align -> Just align
-  MO_Memcmp  align -> Just align
-  _                -> Nothing
diff --git a/compiler/cmm/CmmMonad.hs b/compiler/cmm/CmmMonad.hs
deleted file mode 100644
index a04c4ad49b..0000000000
--- a/compiler/cmm/CmmMonad.hs
+++ /dev/null
@@ -1,59 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
--- A Parser monad with access to the 'DynFlags'.
---
--- The 'P' monad  only has access to the subset of of 'DynFlags'
--- required for parsing Haskell.
-
--- The parser for C-- requires access to a lot more of the 'DynFlags',
--- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance.
------------------------------------------------------------------------------
-module CmmMonad (
-    PD(..)
-  , liftP
-  ) where
-
-import GhcPrelude
-
-import Control.Monad
-import qualified Control.Monad.Fail as MonadFail
-
-import DynFlags
-import Lexer
-
-newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a }
-
-instance Functor PD where
-  fmap = liftM
-
-instance Applicative PD where
-  pure = returnPD
-  (<*>) = ap
-
-instance Monad PD where
-  (>>=) = thenPD
-#if !MIN_VERSION_base(4,13,0)
-  fail = MonadFail.fail
-#endif
-
-instance MonadFail.MonadFail PD where
-  fail = failPD
-
-liftP :: P a -> PD a
-liftP (P f) = PD $ \_ s -> f s
-
-returnPD :: a -> PD a
-returnPD = liftP . return
-
-thenPD :: PD a -> (a -> PD b) -> PD b
-(PD m) `thenPD` k = PD $ \d s ->
-        case m d s of
-                POk s1 a         -> unPD (k a) d s1
-                PFailed s1 -> PFailed s1
-
-failPD :: String -> PD a
-failPD = liftP . fail
-
-instance HasDynFlags PD where
-   getDynFlags = PD $ \d s -> POk s d
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
deleted file mode 100644
index f9bad961e6..0000000000
--- a/compiler/cmm/CmmNode.hs
+++ /dev/null
@@ -1,724 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE ExplicitForAll #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE UndecidableInstances #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
-
--- CmmNode type for representation using Hoopl graphs.
-
-module CmmNode (
-     CmmNode(..), CmmFormal, CmmActual, CmmTickish,
-     UpdFrameOffset, Convention(..),
-     ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
-     CmmReturnInfo(..),
-     mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
-     mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
-
-     -- * Tick scopes
-     CmmTickScope(..), isTickSubScope, combineTickScopes,
-  ) where
-
-import GhcPrelude hiding (succ)
-
-import GHC.Platform.Regs
-import CmmExpr
-import CmmSwitch
-import DynFlags
-import FastString
-import ForeignCall
-import Outputable
-import SMRep
-import CoreSyn (Tickish)
-import qualified Unique as U
-
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Collections
-import Hoopl.Label
-import Data.Maybe
-import Data.List (tails,sortBy)
-import Unique (nonDetCmpUnique)
-import Util
-
-
-------------------------
--- CmmNode
-
-#define ULabel {-# UNPACK #-} !Label
-
-data CmmNode e x where
-  CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
-
-  CmmComment :: FastString -> CmmNode O O
-
-    -- Tick annotation, covering Cmm code in our tick scope. We only
-    -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
-    -- See Note [CmmTick scoping details]
-  CmmTick :: !CmmTickish -> CmmNode O O
-
-    -- Unwind pseudo-instruction, encoding stack unwinding
-    -- instructions for a debugger. This describes how to reconstruct
-    -- the "old" value of a register if we want to navigate the stack
-    -- up one frame. Having unwind information for @Sp@ will allow the
-    -- debugger to "walk" the stack.
-    --
-    -- See Note [What is this unwinding business?] in Debug
-  CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
-
-  CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
-    -- Assign to register
-
-  CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
-    -- Assign to memory location.  Size is
-    -- given by cmmExprType of the rhs.
-
-  CmmUnsafeForeignCall ::       -- An unsafe foreign call;
-                                -- see Note [Foreign calls]
-                                -- Like a "fat machine instruction"; can occur
-                                -- in the middle of a block
-      ForeignTarget ->          -- call target
-      [CmmFormal] ->            -- zero or more results
-      [CmmActual] ->            -- zero or more arguments
-      CmmNode O O
-      -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
-      -- See Note [Unsafe foreign calls clobber caller-save registers]
-      --
-      -- Invariant: the arguments and the ForeignTarget must not
-      -- mention any registers for which GHC.Platform.callerSaves
-      -- is True.  See Note [Register Parameter Passing].
-
-  CmmBranch :: ULabel -> CmmNode O C
-                                   -- Goto another block in the same procedure
-
-  CmmCondBranch :: {                 -- conditional branch
-      cml_pred :: CmmExpr,
-      cml_true, cml_false :: ULabel,
-      cml_likely :: Maybe Bool       -- likely result of the conditional,
-                                     -- if known
-  } -> CmmNode O C
-
-  CmmSwitch
-    :: CmmExpr       -- Scrutinee, of some integral type
-    -> SwitchTargets -- Cases. See [Note SwitchTargets]
-    -> CmmNode O C
-
-  CmmCall :: {                -- A native call or tail call
-      cml_target :: CmmExpr,  -- never a CmmPrim to a CallishMachOp!
-
-      cml_cont :: Maybe Label,
-          -- Label of continuation (Nothing for return or tail call)
-          --
-          -- Note [Continuation BlockIds]: these BlockIds are called
-          -- Continuation BlockIds, and are the only BlockIds that can
-          -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
-          -- (CmmStackSlot (Young b) _).
-
-      cml_args_regs :: [GlobalReg],
-          -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
-          -- to the call.  This is essential information for the
-          -- native code generator's register allocator; without
-          -- knowing which GlobalRegs are live it has to assume that
-          -- they are all live.  This list should only include
-          -- GlobalRegs that are mapped to real machine registers on
-          -- the target platform.
-
-      cml_args :: ByteOff,
-          -- Byte offset, from the *old* end of the Area associated with
-          -- the Label (if cml_cont = Nothing, then Old area), of
-          -- youngest outgoing arg.  Set the stack pointer to this before
-          -- transferring control.
-          -- (NB: an update frame might also have been stored in the Old
-          --      area, but it'll be in an older part than the args.)
-
-      cml_ret_args :: ByteOff,
-          -- For calls *only*, the byte offset for youngest returned value
-          -- This is really needed at the *return* point rather than here
-          -- at the call, but in practice it's convenient to record it here.
-
-      cml_ret_off :: ByteOff
-        -- For calls *only*, the byte offset of the base of the frame that
-        -- must be described by the info table for the return point.
-        -- The older words are an update frames, which have their own
-        -- info-table and layout information
-
-        -- From a liveness point of view, the stack words older than
-        -- cml_ret_off are treated as live, even if the sequel of
-        -- the call goes into a loop.
-  } -> CmmNode O C
-
-  CmmForeignCall :: {           -- A safe foreign call; see Note [Foreign calls]
-                                -- Always the last node of a block
-      tgt   :: ForeignTarget,   -- call target and convention
-      res   :: [CmmFormal],     -- zero or more results
-      args  :: [CmmActual],     -- zero or more arguments; see Note [Register parameter passing]
-      succ  :: ULabel,          -- Label of continuation
-      ret_args :: ByteOff,      -- same as cml_ret_args
-      ret_off :: ByteOff,       -- same as cml_ret_off
-      intrbl:: Bool             -- whether or not the call is interruptible
-  } -> CmmNode O C
-
-{- Note [Foreign calls]
-~~~~~~~~~~~~~~~~~~~~~~~
-A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
-a CmmForeignCall call is used for *safe* foreign calls.
-
-Unsafe ones are mostly easy: think of them as a "fat machine
-instruction".  In particular, they do *not* kill all live registers,
-just the registers they return to (there was a bit of code in GHC that
-conservatively assumed otherwise.)  However, see [Register parameter passing].
-
-Safe ones are trickier.  A safe foreign call
-     r = f(x)
-ultimately expands to
-     push "return address"      -- Never used to return to;
-                                -- just points an info table
-     save registers into TSO
-     call suspendThread
-     r = f(x)                   -- Make the call
-     call resumeThread
-     restore registers
-     pop "return address"
-We cannot "lower" a safe foreign call to this sequence of Cmms, because
-after we've saved Sp all the Cmm optimiser's assumptions are broken.
-
-Note that a safe foreign call needs an info table.
-
-So Safe Foreign Calls must remain as last nodes until the stack is
-made manifest in CmmLayoutStack, where they are lowered into the above
-sequence.
--}
-
-{- Note [Unsafe foreign calls clobber caller-save registers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A foreign call is defined to clobber any GlobalRegs that are mapped to
-caller-saves machine registers (according to the prevailing C ABI).
-GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
-
-This is a design choice that makes it easier to generate code later.
-We could instead choose to say that foreign calls do *not* clobber
-caller-saves regs, but then we would have to figure out which regs
-were live across the call later and insert some saves/restores.
-
-Furthermore when we generate code we never have any GlobalRegs live
-across a call, because they are always copied-in to LocalRegs and
-copied-out again before making a call/jump.  So all we have to do is
-avoid any code motion that would make a caller-saves GlobalReg live
-across a foreign call during subsequent optimisations.
--}
-
-{- Note [Register parameter passing]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-On certain architectures, some registers are utilized for parameter
-passing in the C calling convention.  For example, in x86-64 Linux
-convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
-argument passing.  These are registers R3-R6, which our generated
-code may also be using; as a result, it's necessary to save these
-values before doing a foreign call.  This is done during initial
-code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils.  However,
-one result of doing this is that the contents of these registers
-may mysteriously change if referenced inside the arguments.  This
-is dangerous, so you'll need to disable inlining much in the same
-way is done in cmm/CmmOpt.hs currently.  We should fix this!
--}
-
----------------------------------------------
--- Eq instance of CmmNode
-
-deriving instance Eq (CmmNode e x)
-
-----------------------------------------------
--- Hoopl instances of CmmNode
-
-instance NonLocal CmmNode where
-  entryLabel (CmmEntry l _) = l
-
-  successors (CmmBranch l) = [l]
-  successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
-  successors (CmmSwitch _ ids) = switchTargetsToList ids
-  successors (CmmCall {cml_cont=l}) = maybeToList l
-  successors (CmmForeignCall {succ=l}) = [l]
-
-
---------------------------------------------------
--- Various helper types
-
-type CmmActual = CmmExpr
-type CmmFormal = LocalReg
-
-type UpdFrameOffset = ByteOff
-
--- | A convention maps a list of values (function arguments or return
--- values) to registers or stack locations.
-data Convention
-  = NativeDirectCall
-       -- ^ top-level Haskell functions use @NativeDirectCall@, which
-       -- maps arguments to registers starting with R2, according to
-       -- how many registers are available on the platform.  This
-       -- convention ignores R1, because for a top-level function call
-       -- the function closure is implicit, and doesn't need to be passed.
-  | NativeNodeCall
-       -- ^ non-top-level Haskell functions, which pass the address of
-       -- the function closure in R1 (regardless of whether R1 is a
-       -- real register or not), and the rest of the arguments in
-       -- registers or on the stack.
-  | NativeReturn
-       -- ^ a native return.  The convention for returns depends on
-       -- how many values are returned: for just one value returned,
-       -- the appropriate register is used (R1, F1, etc.). regardless
-       -- of whether it is a real register or not.  For multiple
-       -- values returned, they are mapped to registers or the stack.
-  | Slow
-       -- ^ Slow entry points: all args pushed on the stack
-  | GC
-       -- ^ Entry to the garbage collector: uses the node reg!
-       -- (TODO: I don't think we need this --SDM)
-  deriving( Eq )
-
-data ForeignConvention
-  = ForeignConvention
-        CCallConv               -- Which foreign-call convention
-        [ForeignHint]           -- Extra info about the args
-        [ForeignHint]           -- Extra info about the result
-        CmmReturnInfo
-  deriving Eq
-
-data CmmReturnInfo
-  = CmmMayReturn
-  | CmmNeverReturns
-  deriving ( Eq )
-
-data ForeignTarget        -- The target of a foreign call
-  = ForeignTarget                -- A foreign procedure
-        CmmExpr                  -- Its address
-        ForeignConvention        -- Its calling convention
-  | PrimTarget            -- A possibly-side-effecting machine operation
-        CallishMachOp            -- Which one
-  deriving Eq
-
-foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
-foreignTargetHints target
-  = ( res_hints ++ repeat NoHint
-    , arg_hints ++ repeat NoHint )
-  where
-    (res_hints, arg_hints) =
-       case target of
-          PrimTarget op -> callishMachOpHints op
-          ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
-             (res_hints, arg_hints)
-
---------------------------------------------------
--- Instances of register and slot users / definers
-
-instance UserOfRegs LocalReg (CmmNode e x) where
-  foldRegsUsed dflags f !z n = case n of
-    CmmAssign _ expr -> fold f z expr
-    CmmStore addr rval -> fold f (fold f z addr) rval
-    CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
-    CmmCondBranch expr _ _ _ -> fold f z expr
-    CmmSwitch expr _ -> fold f z expr
-    CmmCall {cml_target=tgt} -> fold f z tgt
-    CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
-    _ -> z
-    where fold :: forall a b. UserOfRegs LocalReg a
-               => (b -> LocalReg -> b) -> b -> a -> b
-          fold f z n = foldRegsUsed dflags f z n
-
-instance UserOfRegs GlobalReg (CmmNode e x) where
-  foldRegsUsed dflags f !z n = case n of
-    CmmAssign _ expr -> fold f z expr
-    CmmStore addr rval -> fold f (fold f z addr) rval
-    CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
-    CmmCondBranch expr _ _ _ -> fold f z expr
-    CmmSwitch expr _ -> fold f z expr
-    CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
-    CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
-    _ -> z
-    where fold :: forall a b.  UserOfRegs GlobalReg a
-               => (b -> GlobalReg -> b) -> b -> a -> b
-          fold f z n = foldRegsUsed dflags f z n
-
-instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
-  -- The (Ord r) in the context is necessary here
-  -- See Note [Recursive superclasses] in TcInstDcls
-  foldRegsUsed _      _ !z (PrimTarget _)      = z
-  foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e
-
-instance DefinerOfRegs LocalReg (CmmNode e x) where
-  foldRegsDefd dflags f !z n = case n of
-    CmmAssign lhs _ -> fold f z lhs
-    CmmUnsafeForeignCall _ fs _ -> fold f z fs
-    CmmForeignCall {res=res} -> fold f z res
-    _ -> z
-    where fold :: forall a b. DefinerOfRegs LocalReg a
-               => (b -> LocalReg -> b) -> b -> a -> b
-          fold f z n = foldRegsDefd dflags f z n
-
-instance DefinerOfRegs GlobalReg (CmmNode e x) where
-  foldRegsDefd dflags f !z n = case n of
-    CmmAssign lhs _ -> fold f z lhs
-    CmmUnsafeForeignCall tgt _ _  -> fold f z (foreignTargetRegs tgt)
-    CmmCall        {} -> fold f z activeRegs
-    CmmForeignCall {} -> fold f z activeRegs
-                      -- See Note [Safe foreign calls clobber STG registers]
-    _ -> z
-    where fold :: forall a b. DefinerOfRegs GlobalReg a
-               => (b -> GlobalReg -> b) -> b -> a -> b
-          fold f z n = foldRegsDefd dflags f z n
-
-          platform = targetPlatform dflags
-          activeRegs = activeStgRegs platform
-          activeCallerSavesRegs = filter (callerSaves platform) activeRegs
-
-          foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
-          foreignTargetRegs _ = activeCallerSavesRegs
-
--- Note [Safe foreign calls clobber STG registers]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- During stack layout phase every safe foreign call is expanded into a block
--- that contains unsafe foreign call (instead of safe foreign call) and ends
--- with a normal call (See Note [Foreign calls]). This means that we must
--- treat safe foreign call as if it was a normal call (because eventually it
--- will be). This is important if we try to run sinking pass before stack
--- layout phase. Consider this example of what might go wrong (this is cmm
--- code from stablename001 test). Here is code after common block elimination
--- (before stack layout):
---
---  c1q6:
---      _s1pf::P64 = R1;
---      _c1q8::I64 = performMajorGC;
---      I64[(young<c1q9> + 8)] = c1q9;
---      foreign call "ccall" arg hints:  []  result hints:  [] (_c1q8::I64)(...)
---                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
---  c1q9:
---      I64[(young<c1qb> + 8)] = c1qb;
---      R1 = _s1pc::P64;
---      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
---
--- If we run sinking pass now (still before stack layout) we will get this:
---
---  c1q6:
---      I64[(young<c1q9> + 8)] = c1q9;
---      foreign call "ccall" arg hints:  []  result hints:  [] performMajorGC(...)
---                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
---  c1q9:
---      I64[(young<c1qb> + 8)] = c1qb;
---      _s1pf::P64 = R1;         <------ _s1pf sunk past safe foreign call
---      R1 = _s1pc::P64;
---      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
---
--- Notice that _s1pf was sunk past a foreign call. When we run stack layout
--- safe call to performMajorGC will be turned into:
---
---  c1q6:
---      _s1pc::P64 = P64[Sp + 8];
---      I64[Sp - 8] = c1q9;
---      Sp = Sp - 8;
---      I64[I64[CurrentTSO + 24] + 16] = Sp;
---      P64[CurrentNursery + 8] = Hp + 8;
---      (_u1qI::I64) = call "ccall" arg hints:  [PtrHint,]
---                           result hints:  [PtrHint] suspendThread(BaseReg, 0);
---      call "ccall" arg hints:  []  result hints:  [] performMajorGC();
---      (_u1qJ::I64) = call "ccall" arg hints:  [PtrHint]
---                           result hints:  [PtrHint] resumeThread(_u1qI::I64);
---      BaseReg = _u1qJ::I64;
---      _u1qK::P64 = CurrentTSO;
---      _u1qL::P64 = I64[_u1qK::P64 + 24];
---      Sp = I64[_u1qL::P64 + 16];
---      SpLim = _u1qL::P64 + 192;
---      HpAlloc = 0;
---      Hp = I64[CurrentNursery + 8] - 8;
---      HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
---      call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
---  c1q9:
---      I64[(young<c1qb> + 8)] = c1qb;
---      _s1pf::P64 = R1;         <------ INCORRECT!
---      R1 = _s1pc::P64;
---      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
---
--- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
--- call is clearly incorrect. This is what would happen if we assumed that
--- safe foreign call has the same semantics as unsafe foreign call. To prevent
--- this we need to treat safe foreign call as if was normal call.
-
------------------------------------
--- mapping Expr in CmmNode
-
-mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
-mapForeignTarget exp   (ForeignTarget e c) = ForeignTarget (exp e) c
-mapForeignTarget _   m@(PrimTarget _)      = m
-
-wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
--- Take a transformer on expressions and apply it recursively.
--- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
---                  then  uses f to rewrite the resulting expression
-wrapRecExp f (CmmMachOp op es)    = f (CmmMachOp op $ map (wrapRecExp f) es)
-wrapRecExp f (CmmLoad addr ty)    = f (CmmLoad (wrapRecExp f addr) ty)
-wrapRecExp f e                    = f e
-
-mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
-mapExp _ f@(CmmEntry{})                          = f
-mapExp _ m@(CmmComment _)                        = m
-mapExp _ m@(CmmTick _)                           = m
-mapExp f   (CmmUnwind regs)                      = CmmUnwind (map (fmap (fmap f)) regs)
-mapExp f   (CmmAssign r e)                       = CmmAssign r (f e)
-mapExp f   (CmmStore addr e)                     = CmmStore (f addr) (f e)
-mapExp f   (CmmUnsafeForeignCall tgt fs as)      = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
-mapExp _ l@(CmmBranch _)                         = l
-mapExp f   (CmmCondBranch e ti fi l)             = CmmCondBranch (f e) ti fi l
-mapExp f   (CmmSwitch e ids)                     = CmmSwitch (f e) ids
-mapExp f   n@CmmCall {cml_target=tgt}            = n{cml_target = f tgt}
-mapExp f   (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
-
-mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
-mapExpDeep f = mapExp $ wrapRecExp f
-
-------------------------------------------------------------------------
--- mapping Expr in CmmNode, but not performing allocation if no changes
-
-mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
-mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
-mapForeignTargetM _ (PrimTarget _)      = Nothing
-
-wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
--- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
---                   then  gives f a chance to rewrite the resulting expression
-wrapRecExpM f n@(CmmMachOp op es)  = maybe (f n) (f . CmmMachOp op)    (mapListM (wrapRecExpM f) es)
-wrapRecExpM f n@(CmmLoad addr ty)  = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
-wrapRecExpM f e                    = f e
-
-mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
-mapExpM _ (CmmEntry{})              = Nothing
-mapExpM _ (CmmComment _)            = Nothing
-mapExpM _ (CmmTick _)               = Nothing
-mapExpM f (CmmUnwind regs)          = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
-mapExpM f (CmmAssign r e)           = CmmAssign r `fmap` f e
-mapExpM f (CmmStore addr e)         = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
-mapExpM _ (CmmBranch _)             = Nothing
-mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
-mapExpM f (CmmSwitch e tbl)         = (\x -> CmmSwitch x tbl)       `fmap` f e
-mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
-mapExpM f (CmmUnsafeForeignCall tgt fs as)
-    = case mapForeignTargetM f tgt of
-        Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
-        Nothing   -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
-mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
-    = case mapForeignTargetM f tgt of
-        Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
-        Nothing   -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
-
--- share as much as possible
-mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
-mapListM f xs = let (b, r) = mapListT f xs
-                in if b then Just r else Nothing
-
-mapListJ :: (a -> Maybe a) -> [a] -> [a]
-mapListJ f xs = snd (mapListT f xs)
-
-mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
-mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
-    where g (_,   y, Nothing) (True, ys)  = (True,  y:ys)
-          g (_,   _, Just y)  (True, ys)  = (True,  y:ys)
-          g (ys', _, Nothing) (False, _)  = (False, ys')
-          g (_,   _, Just y)  (False, ys) = (True,  y:ys)
-
-mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
-mapExpDeepM f = mapExpM $ wrapRecExpM f
-
------------------------------------
--- folding Expr in CmmNode
-
-foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
-foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
-foldExpForeignTarget _   (PrimTarget _)      z = z
-
--- Take a folder on expressions and apply it recursively.
--- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
--- itself, delegating all the other CmmExpr forms to 'f'.
-wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
-wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
-wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
-wrapRecExpf f e                  z = f e z
-
-foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
-foldExp _ (CmmEntry {}) z                         = z
-foldExp _ (CmmComment {}) z                       = z
-foldExp _ (CmmTick {}) z                          = z
-foldExp f (CmmUnwind xs) z                        = foldr (maybe id f) z (map snd xs)
-foldExp f (CmmAssign _ e) z                       = f e z
-foldExp f (CmmStore addr e) z                     = f addr $ f e z
-foldExp f (CmmUnsafeForeignCall t _ as) z         = foldr f (foldExpForeignTarget f t z) as
-foldExp _ (CmmBranch _) z                         = z
-foldExp f (CmmCondBranch e _ _ _) z               = f e z
-foldExp f (CmmSwitch e _) z                       = f e z
-foldExp f (CmmCall {cml_target=tgt}) z            = f tgt z
-foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
-
-foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
-foldExpDeep f = foldExp (wrapRecExpf f)
-
--- -----------------------------------------------------------------------------
-
-mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
-mapSuccessors f (CmmBranch bid)         = CmmBranch (f bid)
-mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
-mapSuccessors f (CmmSwitch e ids)       = CmmSwitch e (mapSwitchTargets f ids)
-mapSuccessors _ n = n
-
-mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C
-                     -> (CmmNode O C, [a])
-mapCollectSuccessors f (CmmBranch bid)
-  = let (bid', acc) = f bid in (CmmBranch bid', [acc])
-mapCollectSuccessors f (CmmCondBranch p y n l)
-  = let (bidt, acct) = f y
-        (bidf, accf) = f n
-    in  (CmmCondBranch p bidt bidf l, [accf, acct])
-mapCollectSuccessors f (CmmSwitch e ids)
-  = let lbls = switchTargetsToList ids :: [Label]
-        lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a)
-    in ( CmmSwitch e
-          (mapSwitchTargets
-            (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids)
-          , map snd (mapElems lblMap)
-        )
-mapCollectSuccessors _ n = (n, [])
-
--- -----------------------------------------------------------------------------
-
--- | Tickish in Cmm context (annotations only)
-type CmmTickish = Tickish ()
-
--- | Tick scope identifier, allowing us to reason about what
--- annotations in a Cmm block should scope over. We especially take
--- care to allow optimisations to reorganise blocks without losing
--- tick association in the process.
-data CmmTickScope
-  = GlobalScope
-    -- ^ The global scope is the "root" of the scope graph. Every
-    -- scope is a sub-scope of the global scope. It doesn't make sense
-    -- to add ticks to this scope. On the other hand, this means that
-    -- setting this scope on a block means no ticks apply to it.
-
-  | SubScope !U.Unique CmmTickScope
-    -- ^ Constructs a new sub-scope to an existing scope. This allows
-    -- us to translate Core-style scoping rules (see @tickishScoped@)
-    -- into the Cmm world. Suppose the following code:
-    --
-    --   tick<1> case ... of
-    --             A -> tick<2> ...
-    --             B -> tick<3> ...
-    --
-    -- We want the top-level tick annotation to apply to blocks
-    -- generated for the A and B alternatives. We can achieve that by
-    -- generating tick<1> into a block with scope a, while the code
-    -- for alternatives A and B gets generated into sub-scopes a/b and
-    -- a/c respectively.
-
-  | CombinedScope CmmTickScope CmmTickScope
-    -- ^ A combined scope scopes over everything that the two given
-    -- scopes cover. It is therefore a sub-scope of either scope. This
-    -- is required for optimisations. Consider common block elimination:
-    --
-    --   A -> tick<2> case ... of
-    --     C -> [common]
-    --   B -> tick<3> case ... of
-    --     D -> [common]
-    --
-    -- We will generate code for the C and D alternatives, and figure
-    -- out afterwards that it's actually common code. Scoping rules
-    -- dictate that the resulting common block needs to be covered by
-    -- both tick<2> and tick<3>, therefore we need to construct a
-    -- scope that is a child to *both* scope. Now we can do that - if
-    -- we assign the scopes a/c and b/d to the common-ed up blocks,
-    -- the new block could have a combined tick scope a/c+b/d, which
-    -- both tick<2> and tick<3> apply to.
-
--- Note [CmmTick scoping details]:
---
--- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
--- same block. Note that as a result of this, optimisations making
--- tick scopes more specific can *reduce* the amount of code a tick
--- scopes over. Fixing this would require a separate @CmmTickScope@
--- field for @CmmTick@. Right now we do not do this simply because I
--- couldn't find an example where it actually mattered -- multiple
--- blocks within the same scope generally jump to each other, which
--- prevents common block elimination from happening in the first
--- place. But this is no strong reason, so if Cmm optimisations become
--- more involved in future this might have to be revisited.
-
--- | Output all scope paths.
-scopeToPaths :: CmmTickScope -> [[U.Unique]]
-scopeToPaths GlobalScope           = [[]]
-scopeToPaths (SubScope u s)        = map (u:) (scopeToPaths s)
-scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
-
--- | Returns the head uniques of the scopes. This is based on the
--- assumption that the @Unique@ of @SubScope@ identifies the
--- underlying super-scope. Used for efficient equality and comparison,
--- see below.
-scopeUniques :: CmmTickScope -> [U.Unique]
-scopeUniques GlobalScope           = []
-scopeUniques (SubScope u _)        = [u]
-scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
-
--- Equality and order is based on the head uniques defined above. We
--- take care to short-cut the (extremely) common cases.
-instance Eq CmmTickScope where
-  GlobalScope    == GlobalScope     = True
-  GlobalScope    == _               = False
-  _              == GlobalScope     = False
-  (SubScope u _) == (SubScope u' _) = u == u'
-  (SubScope _ _) == _               = False
-  _              == (SubScope _ _)  = False
-  scope          == scope'          =
-    sortBy nonDetCmpUnique (scopeUniques scope) ==
-    sortBy nonDetCmpUnique (scopeUniques scope')
-    -- This is still deterministic because
-    -- the order is the same for equal lists
-
--- This is non-deterministic but we do not currently support deterministic
--- code-generation. See Note [Unique Determinism and code generation]
--- See Note [No Ord for Unique]
-instance Ord CmmTickScope where
-  compare GlobalScope    GlobalScope     = EQ
-  compare GlobalScope    _               = LT
-  compare _              GlobalScope     = GT
-  compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
-  compare scope scope'                   = cmpList nonDetCmpUnique
-     (sortBy nonDetCmpUnique $ scopeUniques scope)
-     (sortBy nonDetCmpUnique $ scopeUniques scope')
-
-instance Outputable CmmTickScope where
-  ppr GlobalScope     = text "global"
-  ppr (SubScope us GlobalScope)
-                      = ppr us
-  ppr (SubScope us s) = ppr s <> char '/' <> ppr us
-  ppr combined        = parens $ hcat $ punctuate (char '+') $
-                        map (hcat . punctuate (char '/') . map ppr . reverse) $
-                        scopeToPaths combined
-
--- | Checks whether two tick scopes are sub-scopes of each other. True
--- if the two scopes are equal.
-isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
-isTickSubScope = cmp
-  where cmp _              GlobalScope             = True
-        cmp GlobalScope    _                       = False
-        cmp (CombinedScope s1 s2) s'               = cmp s1 s' && cmp s2 s'
-        cmp s              (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
-        cmp (SubScope u s) s'@(SubScope u' _)      = u == u' || cmp s s'
-
--- | Combine two tick scopes. The new scope should be sub-scope of
--- both parameters. We simplify automatically if one tick scope is a
--- sub-scope of the other already.
-combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
-combineTickScopes s1 s2
-  | s1 `isTickSubScope` s2 = s1
-  | s2 `isTickSubScope` s1 = s2
-  | otherwise              = CombinedScope s1 s2
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
deleted file mode 100644
index 5b542a390e..0000000000
--- a/compiler/cmm/CmmOpt.hs
+++ /dev/null
@@ -1,423 +0,0 @@
------------------------------------------------------------------------------
---
--- Cmm optimisation
---
--- (c) The University of Glasgow 2006
---
------------------------------------------------------------------------------
-
-module CmmOpt (
-        constantFoldNode,
-        constantFoldExpr,
-        cmmMachOpFold,
-        cmmMachOpFoldM
- ) where
-
-import GhcPrelude
-
-import CmmUtils
-import Cmm
-import DynFlags
-import Util
-
-import Outputable
-import GHC.Platform
-
-import Data.Bits
-import Data.Maybe
-
-
-constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
-constantFoldNode dflags = mapExp (constantFoldExpr dflags)
-
-constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
-constantFoldExpr dflags = wrapRecExp f
-  where f (CmmMachOp op args) = cmmMachOpFold dflags op args
-        f (CmmRegOff r 0) = CmmReg r
-        f e = e
-
--- -----------------------------------------------------------------------------
--- MachOp constant folder
-
--- Now, try to constant-fold the MachOps.  The arguments have already
--- been optimized and folded.
-
-cmmMachOpFold
-    :: DynFlags
-    -> MachOp       -- The operation from an CmmMachOp
-    -> [CmmExpr]    -- The optimized arguments
-    -> CmmExpr
-
-cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
-
--- Returns Nothing if no changes, useful for Hoopl, also reduces
--- allocation!
-cmmMachOpFoldM
-    :: DynFlags
-    -> MachOp
-    -> [CmmExpr]
-    -> Maybe CmmExpr
-
-cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
-  = Just $ case op of
-      MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
-      MO_Not _   -> CmmLit (CmmInt (complement x) rep)
-
-        -- these are interesting: we must first narrow to the
-        -- "from" type, in order to truncate to the correct size.
-        -- The final narrow/widen to the destination type
-        -- is implicit in the CmmLit.
-      MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
-      MO_SS_Conv  from to -> CmmLit (CmmInt (narrowS from x) to)
-      MO_UU_Conv  from to -> CmmLit (CmmInt (narrowU from x) to)
-
-      _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
-
-
--- Eliminate conversion NOPs
-cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
-cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
-
--- Eliminate nested conversions where possible
-cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
-  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
-    Just (_,   rep3,signed2) <- isIntConversion conv_outer
-  = case () of
-        -- widen then narrow to the same size is a nop
-      _ | rep1 < rep2 && rep1 == rep3 -> Just x
-        -- Widen then narrow to different size: collapse to single conversion
-        -- but remember to use the signedness from the widening, just in case
-        -- the final conversion is a widen.
-        | rep1 < rep2 && rep2 > rep3 ->
-            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
-        -- Nested widenings: collapse if the signedness is the same
-        | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
-            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
-        -- Nested narrowings: collapse
-        | rep1 > rep2 && rep2 > rep3 ->
-            Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
-        | otherwise ->
-            Nothing
-  where
-        isIntConversion (MO_UU_Conv rep1 rep2)
-          = Just (rep1,rep2,False)
-        isIntConversion (MO_SS_Conv rep1 rep2)
-          = Just (rep1,rep2,True)
-        isIntConversion _ = Nothing
-
-        intconv True  = MO_SS_Conv
-        intconv False = MO_UU_Conv
-
--- ToDo: a narrow of a load can be collapsed into a narrow load, right?
--- but what if the architecture only supports word-sized loads, should
--- we do the transformation anyway?
-
-cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
-  = case mop of
-        -- for comparisons: don't forget to narrow the arguments before
-        -- comparing, since they might be out of range.
-        MO_Eq _   -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
-        MO_Ne _   -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
-
-        MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u >  y_u then 1 else 0) (wordWidth dflags))
-        MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
-        MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u <  y_u then 1 else 0) (wordWidth dflags))
-        MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
-
-        MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s >  y_s then 1 else 0) (wordWidth dflags))
-        MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
-        MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s <  y_s then 1 else 0) (wordWidth dflags))
-        MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
-
-        MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
-        MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
-        MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
-        MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
-        MO_U_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem`  y_u) r)
-        MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
-        MO_S_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
-
-        MO_And   r -> Just $ CmmLit (CmmInt (x .&. y) r)
-        MO_Or    r -> Just $ CmmLit (CmmInt (x .|. y) r)
-        MO_Xor   r -> Just $ CmmLit (CmmInt (x `xor` y) r)
-
-        MO_Shl   r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
-        MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
-        MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
-
-        _          -> Nothing
-
-   where
-        x_u = narrowU xrep x
-        y_u = narrowU xrep y
-        x_s = narrowS xrep x
-        y_s = narrowS xrep y
-
-
--- When possible, shift the constants to the right-hand side, so that we
--- can match for strength reductions.  Note that the code generator will
--- also assume that constants have been shifted to the right when
--- possible.
-
-cmmMachOpFoldM dflags op [x@(CmmLit _), y]
-   | not (isLit y) && isCommutableMachOp op
-   = Just (cmmMachOpFold dflags op [y, x])
-
--- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
--- moved to the right, it is more likely that we will find
--- opportunities for constant folding when the expression is
--- right-associated.
---
--- ToDo: this appears to introduce a quadratic behaviour due to the
--- nested cmmMachOpFold.  Can we fix this?
---
--- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
--- is also a lit (otherwise arg1 would be on the right).  If we
--- put arg1 on the left of the rearranged expression, we'll get into a
--- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
---
--- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
--- PicBaseReg from the corresponding label (or label difference).
---
-cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
-   | mop2 `associates_with` mop1
-     && not (isLit arg1) && not (isPicReg arg1)
-   = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
-   where
-     MO_Add{} `associates_with` MO_Sub{} = True
-     mop1 `associates_with` mop2 =
-        mop1 == mop2 && isAssociativeMachOp mop1
-
--- special case: (a - b) + c  ==>  a + (c - b)
-cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
-   | not (isLit arg1) && not (isPicReg arg1)
-   = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
-
--- special case: (PicBaseReg + lit) + N  ==>  PicBaseReg + (lit+N)
---
--- this is better because lit+N is a single link-time constant (e.g. a
--- CmmLabelOff), so the right-hand expression needs only one
--- instruction, whereas the left needs two.  This happens when pointer
--- tagging gives us label+offset, and PIC turns the label into
--- PicBaseReg + label.
---
-cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
-                          , CmmLit (CmmInt n rep) ]
-  | isPicReg pic
-  = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
-  where off = fromIntegral (narrowS rep n)
-
--- Make a RegOff if we can
-cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
-  = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
-cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
-  = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
-cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
-  = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
-cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
-  = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
-
--- Fold label(+/-)offset into a CmmLit where possible
-
-cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
-  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
-cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
-  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
-cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
-  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
-
-
--- Comparison of literal with widened operand: perform the comparison
--- at the smaller width, as long as the literal is within range.
-
--- We can't do the reverse trick, when the operand is narrowed:
--- narrowing throws away bits from the operand, there's no way to do
--- the same comparison at the larger size.
-
-cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
-  |     -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
-    platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
-        -- if the operand is widened:
-    Just (rep, signed, narrow_fn) <- maybe_conversion conv,
-        -- and this is a comparison operation:
-    Just narrow_cmp <- maybe_comparison cmp rep signed,
-        -- and the literal fits in the smaller size:
-    i == narrow_fn rep i
-        -- then we can do the comparison at the smaller size
-  = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
- where
-    maybe_conversion (MO_UU_Conv from to)
-        | to > from
-        = Just (from, False, narrowU)
-    maybe_conversion (MO_SS_Conv from to)
-        | to > from
-        = Just (from, True, narrowS)
-
-        -- don't attempt to apply this optimisation when the source
-        -- is a float; see #1916
-    maybe_conversion _ = Nothing
-
-        -- careful (#2080): if the original comparison was signed, but
-        -- we were doing an unsigned widen, then we must do an
-        -- unsigned comparison at the smaller size.
-    maybe_comparison (MO_U_Gt _) rep _     = Just (MO_U_Gt rep)
-    maybe_comparison (MO_U_Ge _) rep _     = Just (MO_U_Ge rep)
-    maybe_comparison (MO_U_Lt _) rep _     = Just (MO_U_Lt rep)
-    maybe_comparison (MO_U_Le _) rep _     = Just (MO_U_Le rep)
-    maybe_comparison (MO_Eq   _) rep _     = Just (MO_Eq   rep)
-    maybe_comparison (MO_S_Gt _) rep True  = Just (MO_S_Gt rep)
-    maybe_comparison (MO_S_Ge _) rep True  = Just (MO_S_Ge rep)
-    maybe_comparison (MO_S_Lt _) rep True  = Just (MO_S_Lt rep)
-    maybe_comparison (MO_S_Le _) rep True  = Just (MO_S_Le rep)
-    maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
-    maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
-    maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
-    maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
-    maybe_comparison _ _ _ = Nothing
-
--- We can often do something with constants of 0 and 1 ...
--- See Note [Comparison operators]
-
-cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
-  = case mop of
-        -- Arithmetic
-        MO_Add   _ -> Just x   -- x + 0 = x
-        MO_Sub   _ -> Just x   -- x - 0 = x
-        MO_Mul   _ -> Just y   -- x * 0 = 0
-
-        -- Logical operations
-        MO_And   _ -> Just y   -- x &     0 = 0
-        MO_Or    _ -> Just x   -- x |     0 = x
-        MO_Xor   _ -> Just x   -- x `xor` 0 = x
-
-        -- Shifts
-        MO_Shl   _ -> Just x   -- x << 0 = x
-        MO_S_Shr _ -> Just x   -- ditto shift-right
-        MO_U_Shr _ -> Just x
-
-        -- Comparisons; these ones are trickier
-        -- See Note [Comparison operators]
-        MO_Ne    _ | isComparisonExpr x -> Just x                -- (x > y) != 0  =  x > y
-        MO_Eq    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) == 0  =  x <= y
-        MO_U_Gt  _ | isComparisonExpr x -> Just x                -- (x > y) > 0   =  x > y
-        MO_S_Gt  _ | isComparisonExpr x -> Just x                -- ditto
-        MO_U_Lt  _ | isComparisonExpr x -> Just zero             -- (x > y) < 0  =  0
-        MO_S_Lt  _ | isComparisonExpr x -> Just zero
-        MO_U_Ge  _ | isComparisonExpr x -> Just one              -- (x > y) >= 0  =  1
-        MO_S_Ge  _ | isComparisonExpr x -> Just one
-
-        MO_U_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) <= 0  =  x <= y
-        MO_S_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
-        _ -> Nothing
-  where
-    zero = CmmLit (CmmInt 0 (wordWidth dflags))
-    one  = CmmLit (CmmInt 1 (wordWidth dflags))
-
-cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
-  = case mop of
-        -- Arithmetic: x*1 = x, etc
-        MO_Mul    _ -> Just x
-        MO_S_Quot _ -> Just x
-        MO_U_Quot _ -> Just x
-        MO_S_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
-        MO_U_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
-
-        -- Comparisons; trickier
-        -- See Note [Comparison operators]
-        MO_Ne    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) != 1  =  x<=y
-        MO_Eq    _ | isComparisonExpr x -> Just x                -- (x>y) == 1  =  x>y
-        MO_U_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) < 1   =  x<=y
-        MO_S_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- ditto
-        MO_U_Gt  _ | isComparisonExpr x -> Just zero             -- (x>y) > 1   = 0
-        MO_S_Gt  _ | isComparisonExpr x -> Just zero
-        MO_U_Le  _ | isComparisonExpr x -> Just one              -- (x>y) <= 1  = 1
-        MO_S_Le  _ | isComparisonExpr x -> Just one
-        MO_U_Ge  _ | isComparisonExpr x -> Just x                -- (x>y) >= 1  = x>y
-        MO_S_Ge  _ | isComparisonExpr x -> Just x
-        _ -> Nothing
-  where
-    zero = CmmLit (CmmInt 0 (wordWidth dflags))
-    one  = CmmLit (CmmInt 1 (wordWidth dflags))
-
--- Now look for multiplication/division by powers of 2 (integers).
-
-cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
-  = case mop of
-        MO_Mul rep
-           | Just p <- exactLog2 n ->
-                 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
-        MO_U_Quot rep
-           | Just p <- exactLog2 n ->
-                 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
-        MO_U_Rem rep
-           | Just _ <- exactLog2 n ->
-                 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
-        MO_S_Quot rep
-           | Just p <- exactLog2 n,
-             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
-                                -- it is a reg.  FIXME: remove this restriction.
-                Just (cmmMachOpFold dflags (MO_S_Shr rep)
-                  [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
-        MO_S_Rem rep
-           | Just p <- exactLog2 n,
-             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
-                                -- it is a reg.  FIXME: remove this restriction.
-                -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
-                -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
-                -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
-                Just (cmmMachOpFold dflags (MO_Sub rep)
-                    [x, cmmMachOpFold dflags (MO_And rep)
-                      [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
-        _ -> Nothing
-  where
-    -- In contrast with unsigned integers, for signed ones
-    -- shift right is not the same as quot, because it rounds
-    -- to minus infinity, whereas quot rounds toward zero.
-    -- To fix this up, we add one less than the divisor to the
-    -- dividend if it is a negative number.
-    --
-    -- to avoid a test/jump, we use the following sequence:
-    --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
-    --      x2 = y & (divisor-1)
-    --      result = x + x2
-    -- this could be done a bit more simply using conditional moves,
-    -- but we're processor independent here.
-    --
-    -- we optimise the divide by 2 case slightly, generating
-    --      x1 = x >> word_size-1  (unsigned)
-    --      return = x + x1
-    signedQuotRemHelper :: Width -> Integer -> CmmExpr
-    signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
-      where
-        bits = fromIntegral (widthInBits rep) - 1
-        shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
-        x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
-        x2 = if p == 1 then x1 else
-             CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
-
--- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
--- Unfortunately this needs a unique supply because x might not be a
--- register.  See #2253 (program 6) for an example.
-
-
--- Anything else is just too hard.
-
-cmmMachOpFoldM _ _ _ = Nothing
-
-{- Note [Comparison operators]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If we have
-   CmmCondBranch ((x>#y) == 1) t f
-we really want to convert to
-   CmmCondBranch (x>#y) t f
-
-That's what the constant-folding operations on comparison operators do above.
--}
-
-
--- -----------------------------------------------------------------------------
--- Utils
-
-isPicReg :: CmmExpr -> Bool
-isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
-isPicReg _ = False
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
deleted file mode 100644
index e568378197..0000000000
--- a/compiler/cmm/CmmParse.y
+++ /dev/null
@@ -1,1442 +0,0 @@
------------------------------------------------------------------------------
---
--- (c) The University of Glasgow, 2004-2012
---
--- Parser for concrete Cmm.
---
------------------------------------------------------------------------------
-
-{- -----------------------------------------------------------------------------
-Note [Syntax of .cmm files]
-
-NOTE: You are very much on your own in .cmm.  There is very little
-error checking at all:
-
-  * Type errors are detected by the (optional) -dcmm-lint pass, if you
-    don't turn this on then a type error will likely result in a panic
-    from the native code generator.
-
-  * Passing the wrong number of arguments or arguments of the wrong
-    type is not detected.
-
-There are two ways to write .cmm code:
-
- (1) High-level Cmm code delegates the stack handling to GHC, and
-     never explicitly mentions Sp or registers.
-
- (2) Low-level Cmm manages the stack itself, and must know about
-     calling conventions.
-
-Whether you want high-level or low-level Cmm is indicated by the
-presence of an argument list on a procedure.  For example:
-
-foo ( gcptr a, bits32 b )
-{
-  // this is high-level cmm code
-
-  if (b > 0) {
-     // we can make tail calls passing arguments:
-     jump stg_ap_0_fast(a);
-  }
-
-  push (stg_upd_frame_info, a) {
-    // stack frames can be explicitly pushed
-
-    (x,y) = call wibble(a,b,3,4);
-      // calls pass arguments and return results using the native
-      // Haskell calling convention.  The code generator will automatically
-      // construct a stack frame and an info table for the continuation.
-
-    return (x,y);
-      // we can return multiple values from the current proc
-  }
-}
-
-bar
-{
-  // this is low-level cmm code, indicated by the fact that we did not
-  // put an argument list on bar.
-
-  x = R1;  // the calling convention is explicit: better be careful
-           // that this works on all platforms!
-
-  jump %ENTRY_CODE(Sp(0))
-}
-
-Here is a list of rules for high-level and low-level code.  If you
-break the rules, you get a panic (for using a high-level construct in
-a low-level proc), or wrong code (when using low-level code in a
-high-level proc).  This stuff isn't checked! (TODO!)
-
-High-level only:
-
-  - tail-calls with arguments, e.g.
-    jump stg_fun (arg1, arg2);
-
-  - function calls:
-    (ret1,ret2) = call stg_fun (arg1, arg2);
-
-    This makes a call with the NativeNodeCall convention, and the
-    values are returned to the following code using the NativeReturn
-    convention.
-
-  - returning:
-    return (ret1, ret2)
-
-    These use the NativeReturn convention to return zero or more
-    results to the caller.
-
-  - pushing stack frames:
-    push (info_ptr, field1, ..., fieldN) { ... statements ... }
-
-  - reserving temporary stack space:
-
-      reserve N = x { ... }
-
-    this reserves an area of size N (words) on the top of the stack,
-    and binds its address to x (a local register).  Typically this is
-    used for allocating temporary storage for passing to foreign
-    functions.
-
-    Note that if you make any native calls or invoke the GC in the
-    scope of the reserve block, you are responsible for ensuring that
-    the stack you reserved is laid out correctly with an info table.
-
-Low-level only:
-
-  - References to Sp, R1-R8, F1-F4 etc.
-
-    NB. foreign calls may clobber the argument registers R1-R8, F1-F4
-    etc., so ensure they are saved into variables around foreign
-    calls.
-
-  - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp
-    directly.
-
-Both high-level and low-level code can use a raw tail-call:
-
-    jump stg_fun [R1,R2]
-
-NB. you *must* specify the list of GlobalRegs that are passed via a
-jump, otherwise the register allocator will assume that all the
-GlobalRegs are dead at the jump.
-
-
-Calling Conventions
--------------------
-
-High-level procedures use the NativeNode calling convention, or the
-NativeReturn convention if the 'return' keyword is used (see Stack
-Frames below).
-
-Low-level procedures implement their own calling convention, so it can
-be anything at all.
-
-If a low-level procedure implements the NativeNode calling convention,
-then it can be called by high-level code using an ordinary function
-call.  In general this is hard to arrange because the calling
-convention depends on the number of physical registers available for
-parameter passing, but there are two cases where the calling
-convention is platform-independent:
-
- - Zero arguments.
-
- - One argument of pointer or non-pointer word type; this is always
-   passed in R1 according to the NativeNode convention.
-
- - Returning a single value; these conventions are fixed and platform
-   independent.
-
-
-Stack Frames
-------------
-
-A stack frame is written like this:
-
-INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN )
-               return ( arg1, ..., argM )
-{
-  ... code ...
-}
-
-where field1 ... fieldN are the fields of the stack frame (with types)
-arg1...argN are the values returned to the stack frame (with types).
-The return values are assumed to be passed according to the
-NativeReturn convention.
-
-On entry to the code, the stack frame looks like:
-
-   |----------|
-   | fieldN   |
-   |   ...    |
-   | field1   |
-   |----------|
-   | info_ptr |
-   |----------|
-   |  argN    |
-   |   ...    | <- Sp
-
-and some of the args may be in registers.
-
-We prepend the code by a copyIn of the args, and assign all the stack
-frame fields to their formals.  The initial "arg offset" for stack
-layout purposes consists of the whole stack frame plus any args that
-might be on the stack.
-
-A tail-call may pass a stack frame to the callee using the following
-syntax:
-
-jump f (info_ptr, field1,..,fieldN) (arg1,..,argN)
-
-where info_ptr and field1..fieldN describe the stack frame, and
-arg1..argN are the arguments passed to f using the NativeNodeCall
-convention. Note if a field is longer than a word (e.g. a D_ on
-a 32-bit machine) then the call will push as many words as
-necessary to the stack to accommodate it (e.g. 2).
-
-
------------------------------------------------------------------------------ -}
-
-{
-{-# LANGUAGE TupleSections #-}
-
-module CmmParse ( parseCmmFile ) where
-
-import GhcPrelude
-
-import GHC.StgToCmm.ExtCode
-import CmmCallConv
-import GHC.StgToCmm.Prof
-import GHC.StgToCmm.Heap
-import GHC.StgToCmm.Monad hiding ( getCode, getCodeR, getCodeScoped, emitLabel, emit
-                               , emitStore, emitAssign, emitOutOfLine, withUpdFrameOff
-                               , getUpdFrameOff )
-import qualified GHC.StgToCmm.Monad as F
-import GHC.StgToCmm.Utils
-import GHC.StgToCmm.Foreign
-import GHC.StgToCmm.Expr
-import GHC.StgToCmm.Closure
-import GHC.StgToCmm.Layout     hiding (ArgRep(..))
-import GHC.StgToCmm.Ticky
-import GHC.StgToCmm.Bind  ( emitBlackHoleCode, emitUpdateFrame )
-import CoreSyn          ( Tickish(SourceNote) )
-
-import CmmOpt
-import MkGraph
-import Cmm
-import CmmUtils
-import CmmSwitch        ( mkSwitchTargets )
-import CmmInfo
-import BlockId
-import CmmLex
-import CLabel
-import SMRep
-import Lexer
-import CmmMonad
-
-import CostCentre
-import ForeignCall
-import Module
-import GHC.Platform
-import Literal
-import Unique
-import UniqFM
-import SrcLoc
-import DynFlags
-import ErrUtils
-import StringBuffer
-import FastString
-import Panic
-import Constants
-import Outputable
-import BasicTypes
-import Bag              ( emptyBag, unitBag )
-import Var
-
-import Control.Monad
-import Data.Array
-import Data.Char        ( ord )
-import System.Exit
-import Data.Maybe
-import qualified Data.Map as M
-import qualified Data.ByteString.Char8 as BS8
-
-#include "HsVersions.h"
-}
-
-%expect 0
-
-%token
-        ':'     { L _ (CmmT_SpecChar ':') }
-        ';'     { L _ (CmmT_SpecChar ';') }
-        '{'     { L _ (CmmT_SpecChar '{') }
-        '}'     { L _ (CmmT_SpecChar '}') }
-        '['     { L _ (CmmT_SpecChar '[') }
-        ']'     { L _ (CmmT_SpecChar ']') }
-        '('     { L _ (CmmT_SpecChar '(') }
-        ')'     { L _ (CmmT_SpecChar ')') }
-        '='     { L _ (CmmT_SpecChar '=') }
-        '`'     { L _ (CmmT_SpecChar '`') }
-        '~'     { L _ (CmmT_SpecChar '~') }
-        '/'     { L _ (CmmT_SpecChar '/') }
-        '*'     { L _ (CmmT_SpecChar '*') }
-        '%'     { L _ (CmmT_SpecChar '%') }
-        '-'     { L _ (CmmT_SpecChar '-') }
-        '+'     { L _ (CmmT_SpecChar '+') }
-        '&'     { L _ (CmmT_SpecChar '&') }
-        '^'     { L _ (CmmT_SpecChar '^') }
-        '|'     { L _ (CmmT_SpecChar '|') }
-        '>'     { L _ (CmmT_SpecChar '>') }
-        '<'     { L _ (CmmT_SpecChar '<') }
-        ','     { L _ (CmmT_SpecChar ',') }
-        '!'     { L _ (CmmT_SpecChar '!') }
-
-        '..'    { L _ (CmmT_DotDot) }
-        '::'    { L _ (CmmT_DoubleColon) }
-        '>>'    { L _ (CmmT_Shr) }
-        '<<'    { L _ (CmmT_Shl) }
-        '>='    { L _ (CmmT_Ge) }
-        '<='    { L _ (CmmT_Le) }
-        '=='    { L _ (CmmT_Eq) }
-        '!='    { L _ (CmmT_Ne) }
-        '&&'    { L _ (CmmT_BoolAnd) }
-        '||'    { L _ (CmmT_BoolOr) }
-
-        'True'  { L _ (CmmT_True ) }
-        'False' { L _ (CmmT_False) }
-        'likely'{ L _ (CmmT_likely)}
-
-        'CLOSURE'       { L _ (CmmT_CLOSURE) }
-        'INFO_TABLE'    { L _ (CmmT_INFO_TABLE) }
-        'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) }
-        'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) }
-        'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) }
-        'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) }
-        'else'          { L _ (CmmT_else) }
-        'export'        { L _ (CmmT_export) }
-        'section'       { L _ (CmmT_section) }
-        'goto'          { L _ (CmmT_goto) }
-        'if'            { L _ (CmmT_if) }
-        'call'          { L _ (CmmT_call) }
-        'jump'          { L _ (CmmT_jump) }
-        'foreign'       { L _ (CmmT_foreign) }
-        'never'         { L _ (CmmT_never) }
-        'prim'          { L _ (CmmT_prim) }
-        'reserve'       { L _ (CmmT_reserve) }
-        'return'        { L _ (CmmT_return) }
-        'returns'       { L _ (CmmT_returns) }
-        'import'        { L _ (CmmT_import) }
-        'switch'        { L _ (CmmT_switch) }
-        'case'          { L _ (CmmT_case) }
-        'default'       { L _ (CmmT_default) }
-        'push'          { L _ (CmmT_push) }
-        'unwind'        { L _ (CmmT_unwind) }
-        'bits8'         { L _ (CmmT_bits8) }
-        'bits16'        { L _ (CmmT_bits16) }
-        'bits32'        { L _ (CmmT_bits32) }
-        'bits64'        { L _ (CmmT_bits64) }
-        'bits128'       { L _ (CmmT_bits128) }
-        'bits256'       { L _ (CmmT_bits256) }
-        'bits512'       { L _ (CmmT_bits512) }
-        'float32'       { L _ (CmmT_float32) }
-        'float64'       { L _ (CmmT_float64) }
-        'gcptr'         { L _ (CmmT_gcptr) }
-
-        GLOBALREG       { L _ (CmmT_GlobalReg   $$) }
-        NAME            { L _ (CmmT_Name        $$) }
-        STRING          { L _ (CmmT_String      $$) }
-        INT             { L _ (CmmT_Int         $$) }
-        FLOAT           { L _ (CmmT_Float       $$) }
-
-%monad { PD } { >>= } { return }
-%lexer { cmmlex } { L _ CmmT_EOF }
-%name cmmParse cmm
-%tokentype { Located CmmToken }
-
--- C-- operator precedences, taken from the C-- spec
-%right '||'     -- non-std extension, called %disjoin in C--
-%right '&&'     -- non-std extension, called %conjoin in C--
-%right '!'
-%nonassoc '>=' '>' '<=' '<' '!=' '=='
-%left '|'
-%left '^'
-%left '&'
-%left '>>' '<<'
-%left '-' '+'
-%left '/' '*' '%'
-%right '~'
-
-%%
-
-cmm     :: { CmmParse () }
-        : {- empty -}                   { return () }
-        | cmmtop cmm                    { do $1; $2 }
-
-cmmtop  :: { CmmParse () }
-        : cmmproc                       { $1 }
-        | cmmdata                       { $1 }
-        | decl                          { $1 }
-        | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
-                {% liftP . withThisPackage $ \pkg ->
-                   do lits <- sequence $6;
-                      staticClosure pkg $3 $5 (map getLit lits) }
-
--- The only static closures in the RTS are dummy closures like
--- stg_END_TSO_QUEUE_closure and stg_dummy_ret.  We don't need
--- to provide the full generality of static closures here.
--- In particular:
---      * CCS can always be CCS_DONT_CARE
---      * closure is always extern
---      * payload is always empty
---      * we can derive closure and info table labels from a single NAME
-
-cmmdata :: { CmmParse () }
-        : 'section' STRING '{' data_label statics '}'
-                { do lbl <- $4;
-                     ss <- sequence $5;
-                     code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
-
-data_label :: { CmmParse CLabel }
-    : NAME ':'
-                {% liftP . withThisPackage $ \pkg ->
-                   return (mkCmmDataLabel pkg $1) }
-
-statics :: { [CmmParse [CmmStatic]] }
-        : {- empty -}                   { [] }
-        | static statics                { $1 : $2 }
-
-static  :: { CmmParse [CmmStatic] }
-        : type expr ';' { do e <- $2;
-                             return [CmmStaticLit (getLit e)] }
-        | type ';'                      { return [CmmUninitialised
-                                                        (widthInBytes (typeWidth $1))] }
-        | 'bits8' '[' ']' STRING ';'    { return [mkString $4] }
-        | 'bits8' '[' INT ']' ';'       { return [CmmUninitialised
-                                                        (fromIntegral $3)] }
-        | typenot8 '[' INT ']' ';'      { return [CmmUninitialised
-                                                (widthInBytes (typeWidth $1) *
-                                                        fromIntegral $3)] }
-        | 'CLOSURE' '(' NAME lits ')'
-                { do { lits <- sequence $4
-                ; dflags <- getDynFlags
-                     ; return $ map CmmStaticLit $
-                        mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
-                         -- mkForeignLabel because these are only used
-                         -- for CHARLIKE and INTLIKE closures in the RTS.
-                        dontCareCCS (map getLit lits) [] [] [] } }
-        -- arrays of closures required for the CHARLIKE & INTLIKE arrays
-
-lits    :: { [CmmParse CmmExpr] }
-        : {- empty -}           { [] }
-        | ',' expr lits         { $2 : $3 }
-
-cmmproc :: { CmmParse () }
-        : info maybe_conv maybe_formals maybe_body
-                { do ((entry_ret_label, info, stk_formals, formals), agraph) <-
-                       getCodeScoped $ loopDecls $ do {
-                         (entry_ret_label, info, stk_formals) <- $1;
-                         dflags <- getDynFlags;
-                         formals <- sequence (fromMaybe [] $3);
-                         withName (showSDoc dflags (ppr entry_ret_label))
-                           $4;
-                         return (entry_ret_label, info, stk_formals, formals) }
-                     let do_layout = isJust $3
-                     code (emitProcWithStackFrame $2 info
-                                entry_ret_label stk_formals formals agraph
-                                do_layout ) }
-
-maybe_conv :: { Convention }
-           : {- empty -}        { NativeNodeCall }
-           | 'return'           { NativeReturn }
-
-maybe_body :: { CmmParse () }
-           : ';'                { return () }
-           | '{' body '}'       { withSourceNote $1 $3 $2 }
-
-info    :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
-        : NAME
-                {% liftP . withThisPackage $ \pkg ->
-                   do   newFunctionName $1 pkg
-                        return (mkCmmCodeLabel pkg $1, Nothing, []) }
-
-
-        | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
-                -- ptrs, nptrs, closure type, description, type
-                {% liftP . withThisPackage $ \pkg ->
-                   do dflags <- getDynFlags
-                      let prof = profilingInfo dflags $11 $13
-                          rep  = mkRTSRep (fromIntegral $9) $
-                                   mkHeapRep dflags False (fromIntegral $5)
-                                                   (fromIntegral $7) Thunk
-                              -- not really Thunk, but that makes the info table
-                              -- we want.
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
-
-        | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
-                -- ptrs, nptrs, closure type, description, type, fun type
-                {% liftP . withThisPackage $ \pkg ->
-                   do dflags <- getDynFlags
-                      let prof = profilingInfo dflags $11 $13
-                          ty   = Fun 0 (ArgSpec (fromIntegral $15))
-                                -- Arity zero, arg_type $15
-                          rep = mkRTSRep (fromIntegral $9) $
-                                    mkHeapRep dflags False (fromIntegral $5)
-                                                    (fromIntegral $7) ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
-                -- we leave most of the fields zero here.  This is only used
-                -- to generate the BCO info table in the RTS at the moment.
-
-        | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
-                -- ptrs, nptrs, tag, closure type, description, type
-                {% liftP . withThisPackage $ \pkg ->
-                   do dflags <- getDynFlags
-                      let prof = profilingInfo dflags $13 $15
-                          ty  = Constr (fromIntegral $9)  -- Tag
-                                       (BS8.pack $13)
-                          rep = mkRTSRep (fromIntegral $11) $
-                                  mkHeapRep dflags False (fromIntegral $5)
-                                                  (fromIntegral $7) ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
-                              []) }
-
-                     -- If profiling is on, this string gets duplicated,
-                     -- but that's the way the old code did it we can fix it some other time.
-
-        | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
-                -- selector, closure type, description, type
-                {% liftP . withThisPackage $ \pkg ->
-                   do dflags <- getDynFlags
-                      let prof = profilingInfo dflags $9 $11
-                          ty  = ThunkSelector (fromIntegral $5)
-                          rep = mkRTSRep (fromIntegral $7) $
-                                   mkHeapRep dflags False 0 0 ty
-                      return (mkCmmEntryLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
-
-        | 'INFO_TABLE_RET' '(' NAME ',' INT ')'
-                -- closure type (no live regs)
-                {% liftP . withThisPackage $ \pkg ->
-                   do let prof = NoProfilingInfo
-                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep []
-                      return (mkCmmRetLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              []) }
-
-        | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
-                -- closure type, live regs
-                {% liftP . withThisPackage $ \pkg ->
-                   do dflags <- getDynFlags
-                      live <- sequence $7
-                      let prof = NoProfilingInfo
-                          -- drop one for the info pointer
-                          bitmap = mkLiveness dflags (drop 1 live)
-                          rep  = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
-                      return (mkCmmRetLabel pkg $3,
-                              Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
-                                           , cit_rep = rep
-                                           , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
-                              live) }
-
-body    :: { CmmParse () }
-        : {- empty -}                   { return () }
-        | decl body                     { do $1; $2 }
-        | stmt body                     { do $1; $2 }
-
-decl    :: { CmmParse () }
-        : type names ';'                { mapM_ (newLocal $1) $2 }
-        | 'import' importNames ';'      { mapM_ newImport $2 }
-        | 'export' names ';'            { return () }  -- ignore exports
-
-
--- an imported function name, with optional packageId
-importNames
-        :: { [(FastString, CLabel)] }
-        : importName                    { [$1] }
-        | importName ',' importNames    { $1 : $3 }
-
-importName
-        :: { (FastString,  CLabel) }
-
-        -- A label imported without an explicit packageId.
-        --      These are taken to come from some foreign, unnamed package.
-        : NAME
-        { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) }
-
-        -- as previous 'NAME', but 'IsData'
-        | 'CLOSURE' NAME
-        { ($2, mkForeignLabel $2 Nothing ForeignLabelInExternalPackage IsData) }
-
-        -- A label imported with an explicit packageId.
-        | STRING NAME
-        { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) }
-
-
-names   :: { [FastString] }
-        : NAME                          { [$1] }
-        | NAME ',' names                { $1 : $3 }
-
-stmt    :: { CmmParse () }
-        : ';'                                   { return () }
-
-        | NAME ':'
-                { do l <- newLabel $1; emitLabel l }
-
-
-
-        | lreg '=' expr ';'
-                { do reg <- $1; e <- $3; withSourceNote $2 $4 (emitAssign reg e) }
-        | type '[' expr ']' '=' expr ';'
-                { withSourceNote $2 $7 (doStore $1 $3 $6) }
-
-        -- Gah! We really want to say "foreign_results" but that causes
-        -- a shift/reduce conflict with assignment.  We either
-        -- we expand out the no-result and single result cases or
-        -- we tweak the syntax to avoid the conflict.  The later
-        -- option is taken here because the other way would require
-        -- multiple levels of expanding and get unwieldy.
-        | foreign_results 'foreign' STRING foreignLabel '(' cmm_hint_exprs0 ')' safety opt_never_returns ';'
-                {% foreignCall $3 $1 $4 $6 $8 $9 }
-        | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';'
-                {% primCall $1 $4 $6 }
-        -- stmt-level macros, stealing syntax from ordinary C-- function calls.
-        -- Perhaps we ought to use the %%-form?
-        | NAME '(' exprs0 ')' ';'
-                {% stmtMacro $1 $3  }
-        | 'switch' maybe_range expr '{' arms default '}'
-                { do as <- sequence $5; doSwitch $2 $3 as $6 }
-        | 'goto' NAME ';'
-                { do l <- lookupLabel $2; emit (mkBranch l) }
-        | 'return' '(' exprs0 ')' ';'
-                { doReturn $3 }
-        | 'jump' expr vols ';'
-                { doRawJump $2 $3 }
-        | 'jump' expr '(' exprs0 ')' ';'
-                { doJumpWithStack $2 [] $4 }
-        | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';'
-                { doJumpWithStack $2 $4 $7 }
-        | 'call' expr '(' exprs0 ')' ';'
-                { doCall $2 [] $4 }
-        | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';'
-                { doCall $6 $2 $8 }
-        | 'if' bool_expr cond_likely 'goto' NAME
-                { do l <- lookupLabel $5; cmmRawIf $2 l $3 }
-        | 'if' bool_expr cond_likely '{' body '}' else
-                { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 }
-        | 'push' '(' exprs0 ')' maybe_body
-                { pushStackFrame $3 $5 }
-        | 'reserve' expr '=' lreg maybe_body
-                { reserveStackFrame $2 $4 $5 }
-        | 'unwind' unwind_regs ';'
-                { $2 >>= code . emitUnwind }
-
-unwind_regs
-        :: { CmmParse [(GlobalReg, Maybe CmmExpr)] }
-        : GLOBALREG '=' expr_or_unknown ',' unwind_regs
-                { do e <- $3; rest <- $5; return (($1, e) : rest) }
-        | GLOBALREG '=' expr_or_unknown
-                { do e <- $3; return [($1, e)] }
-
--- | Used by unwind to indicate unknown unwinding values.
-expr_or_unknown
-        :: { CmmParse (Maybe CmmExpr) }
-        : 'return'
-                { do return Nothing }
-        | expr
-                { do e <- $1; return (Just e) }
-
-foreignLabel     :: { CmmParse CmmExpr }
-        : NAME                          { return (CmmLit (CmmLabel (mkForeignLabel $1 Nothing ForeignLabelInThisPackage IsFunction))) }
-
-opt_never_returns :: { CmmReturnInfo }
-        :                               { CmmMayReturn }
-        | 'never' 'returns'             { CmmNeverReturns }
-
-bool_expr :: { CmmParse BoolExpr }
-        : bool_op                       { $1 }
-        | expr                          { do e <- $1; return (BoolTest e) }
-
-bool_op :: { CmmParse BoolExpr }
-        : bool_expr '&&' bool_expr      { do e1 <- $1; e2 <- $3;
-                                          return (BoolAnd e1 e2) }
-        | bool_expr '||' bool_expr      { do e1 <- $1; e2 <- $3;
-                                          return (BoolOr e1 e2)  }
-        | '!' bool_expr                 { do e <- $2; return (BoolNot e) }
-        | '(' bool_op ')'               { $2 }
-
-safety  :: { Safety }
-        : {- empty -}                   { PlayRisky }
-        | STRING                        {% parseSafety $1 }
-
-vols    :: { [GlobalReg] }
-        : '[' ']'                       { [] }
-        | '[' '*' ']'                   {% do df <- getDynFlags
-                                         ; return (realArgRegsCover df) }
-                                           -- All of them. See comment attached
-                                           -- to realArgRegsCover
-        | '[' globals ']'               { $2 }
-
-globals :: { [GlobalReg] }
-        : GLOBALREG                     { [$1] }
-        | GLOBALREG ',' globals         { $1 : $3 }
-
-maybe_range :: { Maybe (Integer,Integer) }
-        : '[' INT '..' INT ']'  { Just ($2, $4) }
-        | {- empty -}           { Nothing }
-
-arms    :: { [CmmParse ([Integer],Either BlockId (CmmParse ()))] }
-        : {- empty -}                   { [] }
-        | arm arms                      { $1 : $2 }
-
-arm     :: { CmmParse ([Integer],Either BlockId (CmmParse ())) }
-        : 'case' ints ':' arm_body      { do b <- $4; return ($2, b) }
-
-arm_body :: { CmmParse (Either BlockId (CmmParse ())) }
-        : '{' body '}'                  { return (Right (withSourceNote $1 $3 $2)) }
-        | 'goto' NAME ';'               { do l <- lookupLabel $2; return (Left l) }
-
-ints    :: { [Integer] }
-        : INT                           { [ $1 ] }
-        | INT ',' ints                  { $1 : $3 }
-
-default :: { Maybe (CmmParse ()) }
-        : 'default' ':' '{' body '}'    { Just (withSourceNote $3 $5 $4) }
-        -- taking a few liberties with the C-- syntax here; C-- doesn't have
-        -- 'default' branches
-        | {- empty -}                   { Nothing }
-
--- Note: OldCmm doesn't support a first class 'else' statement, though
--- CmmNode does.
-else    :: { CmmParse () }
-        : {- empty -}                   { return () }
-        | 'else' '{' body '}'           { withSourceNote $2 $4 $3 }
-
-cond_likely :: { Maybe Bool }
-        : '(' 'likely' ':' 'True'  ')'  { Just True  }
-        | '(' 'likely' ':' 'False' ')'  { Just False }
-        | {- empty -}                   { Nothing }
-
-
--- we have to write this out longhand so that Happy's precedence rules
--- can kick in.
-expr    :: { CmmParse CmmExpr }
-        : expr '/' expr                 { mkMachOp MO_U_Quot [$1,$3] }
-        | expr '*' expr                 { mkMachOp MO_Mul [$1,$3] }
-        | expr '%' expr                 { mkMachOp MO_U_Rem [$1,$3] }
-        | expr '-' expr                 { mkMachOp MO_Sub [$1,$3] }
-        | expr '+' expr                 { mkMachOp MO_Add [$1,$3] }
-        | expr '>>' expr                { mkMachOp MO_U_Shr [$1,$3] }
-        | expr '<<' expr                { mkMachOp MO_Shl [$1,$3] }
-        | expr '&' expr                 { mkMachOp MO_And [$1,$3] }
-        | expr '^' expr                 { mkMachOp MO_Xor [$1,$3] }
-        | expr '|' expr                 { mkMachOp MO_Or [$1,$3] }
-        | expr '>=' expr                { mkMachOp MO_U_Ge [$1,$3] }
-        | expr '>' expr                 { mkMachOp MO_U_Gt [$1,$3] }
-        | expr '<=' expr                { mkMachOp MO_U_Le [$1,$3] }
-        | expr '<' expr                 { mkMachOp MO_U_Lt [$1,$3] }
-        | expr '!=' expr                { mkMachOp MO_Ne [$1,$3] }
-        | expr '==' expr                { mkMachOp MO_Eq [$1,$3] }
-        | '~' expr                      { mkMachOp MO_Not [$2] }
-        | '-' expr                      { mkMachOp MO_S_Neg [$2] }
-        | expr0 '`' NAME '`' expr0      {% do { mo <- nameToMachOp $3 ;
-                                                return (mkMachOp mo [$1,$5]) } }
-        | expr0                         { $1 }
-
-expr0   :: { CmmParse CmmExpr }
-        : INT   maybe_ty         { return (CmmLit (CmmInt $1 (typeWidth $2))) }
-        | FLOAT maybe_ty         { return (CmmLit (CmmFloat $1 (typeWidth $2))) }
-        | STRING                 { do s <- code (newStringCLit $1);
-                                      return (CmmLit s) }
-        | reg                    { $1 }
-        | type '[' expr ']'      { do e <- $3; return (CmmLoad e $1) }
-        | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 }
-        | '(' expr ')'           { $2 }
-
-
--- leaving out the type of a literal gives you the native word size in C--
-maybe_ty :: { CmmType }
-        : {- empty -}                   {% do dflags <- getDynFlags; return $ bWord dflags }
-        | '::' type                     { $2 }
-
-cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] }
-        : {- empty -}                   { [] }
-        | cmm_hint_exprs                { $1 }
-
-cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] }
-        : cmm_hint_expr                 { [$1] }
-        | cmm_hint_expr ',' cmm_hint_exprs      { $1 : $3 }
-
-cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) }
-        : expr                          { do e <- $1;
-                                             return (e, inferCmmHint e) }
-        | expr STRING                   {% do h <- parseCmmHint $2;
-                                              return $ do
-                                                e <- $1; return (e, h) }
-
-exprs0  :: { [CmmParse CmmExpr] }
-        : {- empty -}                   { [] }
-        | exprs                         { $1 }
-
-exprs   :: { [CmmParse CmmExpr] }
-        : expr                          { [ $1 ] }
-        | expr ',' exprs                { $1 : $3 }
-
-reg     :: { CmmParse CmmExpr }
-        : NAME                  { lookupName $1 }
-        | GLOBALREG             { return (CmmReg (CmmGlobal $1)) }
-
-foreign_results :: { [CmmParse (LocalReg, ForeignHint)] }
-        : {- empty -}                   { [] }
-        | '(' foreign_formals ')' '='   { $2 }
-
-foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] }
-        : foreign_formal                        { [$1] }
-        | foreign_formal ','                    { [$1] }
-        | foreign_formal ',' foreign_formals    { $1 : $3 }
-
-foreign_formal :: { CmmParse (LocalReg, ForeignHint) }
-        : local_lreg            { do e <- $1; return (e, inferCmmHint (CmmReg (CmmLocal e))) }
-        | STRING local_lreg     {% do h <- parseCmmHint $1;
-                                      return $ do
-                                         e <- $2; return (e,h) }
-
-local_lreg :: { CmmParse LocalReg }
-        : NAME                  { do e <- lookupName $1;
-                                     return $
-                                       case e of
-                                        CmmReg (CmmLocal r) -> r
-                                        other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") }
-
-lreg    :: { CmmParse CmmReg }
-        : NAME                  { do e <- lookupName $1;
-                                     return $
-                                       case e of
-                                        CmmReg r -> r
-                                        other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") }
-        | GLOBALREG             { return (CmmGlobal $1) }
-
-maybe_formals :: { Maybe [CmmParse LocalReg] }
-        : {- empty -}           { Nothing }
-        | '(' formals0 ')'      { Just $2 }
-
-formals0 :: { [CmmParse LocalReg] }
-        : {- empty -}           { [] }
-        | formals               { $1 }
-
-formals :: { [CmmParse LocalReg] }
-        : formal ','            { [$1] }
-        | formal                { [$1] }
-        | formal ',' formals       { $1 : $3 }
-
-formal :: { CmmParse LocalReg }
-        : type NAME             { newLocal $1 $2 }
-
-type    :: { CmmType }
-        : 'bits8'               { b8 }
-        | typenot8              { $1 }
-
-typenot8 :: { CmmType }
-        : 'bits16'              { b16 }
-        | 'bits32'              { b32 }
-        | 'bits64'              { b64 }
-        | 'bits128'             { b128 }
-        | 'bits256'             { b256 }
-        | 'bits512'             { b512 }
-        | 'float32'             { f32 }
-        | 'float64'             { f64 }
-        | 'gcptr'               {% do dflags <- getDynFlags; return $ gcWord dflags }
-
-{
-section :: String -> SectionType
-section "text"      = Text
-section "data"      = Data
-section "rodata"    = ReadOnlyData
-section "relrodata" = RelocatableReadOnlyData
-section "bss"       = UninitialisedData
-section s           = OtherSection s
-
-mkString :: String -> CmmStatic
-mkString s = CmmString (BS8.pack s)
-
--- |
--- Given an info table, decide what the entry convention for the proc
--- is.  That is, for an INFO_TABLE_RET we want the return convention,
--- otherwise it is a NativeNodeCall.
---
-infoConv :: Maybe CmmInfoTable -> Convention
-infoConv Nothing = NativeNodeCall
-infoConv (Just info)
-  | isStackRep (cit_rep info) = NativeReturn
-  | otherwise                 = NativeNodeCall
-
--- mkMachOp infers the type of the MachOp from the type of its first
--- argument.  We assume that this is correct: for MachOps that don't have
--- symmetrical args (e.g. shift ops), the first arg determines the type of
--- the op.
-mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr
-mkMachOp fn args = do
-  dflags <- getDynFlags
-  arg_exprs <- sequence args
-  return (CmmMachOp (fn (typeWidth (cmmExprType dflags (head arg_exprs)))) arg_exprs)
-
-getLit :: CmmExpr -> CmmLit
-getLit (CmmLit l) = l
-getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)])  = CmmInt (negate i) r
-getLit _ = panic "invalid literal" -- TODO messy failure
-
-nameToMachOp :: FastString -> PD (Width -> MachOp)
-nameToMachOp name =
-  case lookupUFM machOps name of
-        Nothing -> fail ("unknown primitive " ++ unpackFS name)
-        Just m  -> return m
-
-exprOp :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse CmmExpr)
-exprOp name args_code = do
-  dflags <- getDynFlags
-  case lookupUFM (exprMacros dflags) name of
-     Just f  -> return $ do
-        args <- sequence args_code
-        return (f args)
-     Nothing -> do
-        mo <- nameToMachOp name
-        return $ mkMachOp mo args_code
-
-exprMacros :: DynFlags -> UniqFM ([CmmExpr] -> CmmExpr)
-exprMacros dflags = listToUFM [
-  ( fsLit "ENTRY_CODE",   \ [x] -> entryCode dflags x ),
-  ( fsLit "INFO_PTR",     \ [x] -> closureInfoPtr dflags x ),
-  ( fsLit "STD_INFO",     \ [x] -> infoTable dflags x ),
-  ( fsLit "FUN_INFO",     \ [x] -> funInfoTable dflags x ),
-  ( fsLit "GET_ENTRY",    \ [x] -> entryCode dflags (closureInfoPtr dflags x) ),
-  ( fsLit "GET_STD_INFO", \ [x] -> infoTable dflags (closureInfoPtr dflags x) ),
-  ( fsLit "GET_FUN_INFO", \ [x] -> funInfoTable dflags (closureInfoPtr dflags x) ),
-  ( fsLit "INFO_TYPE",    \ [x] -> infoTableClosureType dflags x ),
-  ( fsLit "INFO_PTRS",    \ [x] -> infoTablePtrs dflags x ),
-  ( fsLit "INFO_NPTRS",   \ [x] -> infoTableNonPtrs dflags x )
-  ]
-
--- we understand a subset of C-- primitives:
-machOps = listToUFM $
-        map (\(x, y) -> (mkFastString x, y)) [
-        ( "add",        MO_Add ),
-        ( "sub",        MO_Sub ),
-        ( "eq",         MO_Eq ),
-        ( "ne",         MO_Ne ),
-        ( "mul",        MO_Mul ),
-        ( "neg",        MO_S_Neg ),
-        ( "quot",       MO_S_Quot ),
-        ( "rem",        MO_S_Rem ),
-        ( "divu",       MO_U_Quot ),
-        ( "modu",       MO_U_Rem ),
-
-        ( "ge",         MO_S_Ge ),
-        ( "le",         MO_S_Le ),
-        ( "gt",         MO_S_Gt ),
-        ( "lt",         MO_S_Lt ),
-
-        ( "geu",        MO_U_Ge ),
-        ( "leu",        MO_U_Le ),
-        ( "gtu",        MO_U_Gt ),
-        ( "ltu",        MO_U_Lt ),
-
-        ( "and",        MO_And ),
-        ( "or",         MO_Or ),
-        ( "xor",        MO_Xor ),
-        ( "com",        MO_Not ),
-        ( "shl",        MO_Shl ),
-        ( "shrl",       MO_U_Shr ),
-        ( "shra",       MO_S_Shr ),
-
-        ( "fadd",       MO_F_Add ),
-        ( "fsub",       MO_F_Sub ),
-        ( "fneg",       MO_F_Neg ),
-        ( "fmul",       MO_F_Mul ),
-        ( "fquot",      MO_F_Quot ),
-
-        ( "feq",        MO_F_Eq ),
-        ( "fne",        MO_F_Ne ),
-        ( "fge",        MO_F_Ge ),
-        ( "fle",        MO_F_Le ),
-        ( "fgt",        MO_F_Gt ),
-        ( "flt",        MO_F_Lt ),
-
-        ( "lobits8",  flip MO_UU_Conv W8  ),
-        ( "lobits16", flip MO_UU_Conv W16 ),
-        ( "lobits32", flip MO_UU_Conv W32 ),
-        ( "lobits64", flip MO_UU_Conv W64 ),
-
-        ( "zx16",     flip MO_UU_Conv W16 ),
-        ( "zx32",     flip MO_UU_Conv W32 ),
-        ( "zx64",     flip MO_UU_Conv W64 ),
-
-        ( "sx16",     flip MO_SS_Conv W16 ),
-        ( "sx32",     flip MO_SS_Conv W32 ),
-        ( "sx64",     flip MO_SS_Conv W64 ),
-
-        ( "f2f32",    flip MO_FF_Conv W32 ),  -- TODO; rounding mode
-        ( "f2f64",    flip MO_FF_Conv W64 ),  -- TODO; rounding mode
-        ( "f2i8",     flip MO_FS_Conv W8 ),
-        ( "f2i16",    flip MO_FS_Conv W16 ),
-        ( "f2i32",    flip MO_FS_Conv W32 ),
-        ( "f2i64",    flip MO_FS_Conv W64 ),
-        ( "i2f32",    flip MO_SF_Conv W32 ),
-        ( "i2f64",    flip MO_SF_Conv W64 )
-        ]
-
-callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
-callishMachOps = listToUFM $
-        map (\(x, y) -> (mkFastString x, y)) [
-        ( "read_barrier", (MO_ReadBarrier,)),
-        ( "write_barrier", (MO_WriteBarrier,)),
-        ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
-        ( "memset", memcpyLikeTweakArgs MO_Memset ),
-        ( "memmove", memcpyLikeTweakArgs MO_Memmove ),
-        ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ),
-
-        ("prefetch0", (MO_Prefetch_Data 0,)),
-        ("prefetch1", (MO_Prefetch_Data 1,)),
-        ("prefetch2", (MO_Prefetch_Data 2,)),
-        ("prefetch3", (MO_Prefetch_Data 3,)),
-
-        ( "popcnt8",  (MO_PopCnt W8,)),
-        ( "popcnt16", (MO_PopCnt W16,)),
-        ( "popcnt32", (MO_PopCnt W32,)),
-        ( "popcnt64", (MO_PopCnt W64,)),
-
-        ( "pdep8",  (MO_Pdep W8,)),
-        ( "pdep16", (MO_Pdep W16,)),
-        ( "pdep32", (MO_Pdep W32,)),
-        ( "pdep64", (MO_Pdep W64,)),
-
-        ( "pext8",  (MO_Pext W8,)),
-        ( "pext16", (MO_Pext W16,)),
-        ( "pext32", (MO_Pext W32,)),
-        ( "pext64", (MO_Pext W64,)),
-
-        ( "cmpxchg8",  (MO_Cmpxchg W8,)),
-        ( "cmpxchg16", (MO_Cmpxchg W16,)),
-        ( "cmpxchg32", (MO_Cmpxchg W32,)),
-        ( "cmpxchg64", (MO_Cmpxchg W64,))
-
-        -- ToDo: the rest, maybe
-        -- edit: which rest?
-        -- also: how do we tell CMM Lint how to type check callish macops?
-    ]
-  where
-    memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr])
-    memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument"
-    memcpyLikeTweakArgs op args@(_:_) =
-        (op align, args')
-      where
-        args' = init args
-        align = case last args of
-          CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger
-          e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e)
-        -- The alignment of memcpy-ish operations must be a
-        -- compile-time constant. We verify this here, passing it around
-        -- in the MO_* constructor. In order to do this, however, we
-        -- must intercept the arguments in primCall.
-
-parseSafety :: String -> PD Safety
-parseSafety "safe"   = return PlaySafe
-parseSafety "unsafe" = return PlayRisky
-parseSafety "interruptible" = return PlayInterruptible
-parseSafety str      = fail ("unrecognised safety: " ++ str)
-
-parseCmmHint :: String -> PD ForeignHint
-parseCmmHint "ptr"    = return AddrHint
-parseCmmHint "signed" = return SignedHint
-parseCmmHint str      = fail ("unrecognised hint: " ++ str)
-
--- labels are always pointers, so we might as well infer the hint
-inferCmmHint :: CmmExpr -> ForeignHint
-inferCmmHint (CmmLit (CmmLabel _)) = AddrHint
-inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint
-inferCmmHint _ = NoHint
-
-isPtrGlobalReg Sp                    = True
-isPtrGlobalReg SpLim                 = True
-isPtrGlobalReg Hp                    = True
-isPtrGlobalReg HpLim                 = True
-isPtrGlobalReg CCCS                  = True
-isPtrGlobalReg CurrentTSO            = True
-isPtrGlobalReg CurrentNursery        = True
-isPtrGlobalReg (VanillaReg _ VGcPtr) = True
-isPtrGlobalReg _                     = False
-
-happyError :: PD a
-happyError = PD $ \_ s -> unP srcParseFail s
-
--- -----------------------------------------------------------------------------
--- Statement-level macros
-
-stmtMacro :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse ())
-stmtMacro fun args_code = do
-  case lookupUFM stmtMacros fun of
-    Nothing -> fail ("unknown macro: " ++ unpackFS fun)
-    Just fcode -> return $ do
-        args <- sequence args_code
-        code (fcode args)
-
-stmtMacros :: UniqFM ([CmmExpr] -> FCode ())
-stmtMacros = listToUFM [
-  ( fsLit "CCS_ALLOC",             \[words,ccs]  -> profAlloc words ccs ),
-  ( fsLit "ENTER_CCS_THUNK",       \[e] -> enterCostCentreThunk e ),
-
-  ( fsLit "CLOSE_NURSERY",         \[]  -> emitCloseNursery ),
-  ( fsLit "OPEN_NURSERY",          \[]  -> emitOpenNursery ),
-
-  -- completely generic heap and stack checks, for use in high-level cmm.
-  ( fsLit "HP_CHK_GEN",            \[bytes] ->
-                                      heapStackCheckGen Nothing (Just bytes) ),
-  ( fsLit "STK_CHK_GEN",           \[] ->
-                                      heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ),
-
-  -- A stack check for a fixed amount of stack.  Sounds a bit strange, but
-  -- we use the stack for a bit of temporary storage in a couple of primops
-  ( fsLit "STK_CHK_GEN_N",         \[bytes] ->
-                                      heapStackCheckGen (Just bytes) Nothing ),
-
-  -- A stack check on entry to a thunk, where the argument is the thunk pointer.
-  ( fsLit "STK_CHK_NP"   ,         \[node] -> entryHeapCheck' False node 0 [] (return ())),
-
-  ( fsLit "LOAD_THREAD_STATE",     \[] -> emitLoadThreadState ),
-  ( fsLit "SAVE_THREAD_STATE",     \[] -> emitSaveThreadState ),
-
-  ( fsLit "LDV_ENTER",             \[e] -> ldvEnter e ),
-  ( fsLit "LDV_RECORD_CREATE",     \[e] -> ldvRecordCreate e ),
-
-  ( fsLit "PUSH_UPD_FRAME",        \[sp,e] -> emitPushUpdateFrame sp e ),
-  ( fsLit "SET_HDR",               \[ptr,info,ccs] ->
-                                        emitSetDynHdr ptr info ccs ),
-  ( fsLit "TICK_ALLOC_PRIM",       \[hdr,goods,slop] ->
-                                        tickyAllocPrim hdr goods slop ),
-  ( fsLit "TICK_ALLOC_PAP",        \[goods,slop] ->
-                                        tickyAllocPAP goods slop ),
-  ( fsLit "TICK_ALLOC_UP_THK",     \[goods,slop] ->
-                                        tickyAllocThunk goods slop ),
-  ( fsLit "UPD_BH_UPDATABLE",      \[reg] -> emitBlackHoleCode reg )
- ]
-
-emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode ()
-emitPushUpdateFrame sp e = do
-  dflags <- getDynFlags
-  emitUpdateFrame dflags sp mkUpdInfoLabel e
-
-pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse ()
-pushStackFrame fields body = do
-  dflags <- getDynFlags
-  exprs <- sequence fields
-  updfr_off <- getUpdFrameOff
-  let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old
-                                           [] updfr_off exprs
-  emit g
-  withUpdFrameOff new_updfr_off body
-
-reserveStackFrame
-  :: CmmParse CmmExpr
-  -> CmmParse CmmReg
-  -> CmmParse ()
-  -> CmmParse ()
-reserveStackFrame psize preg body = do
-  dflags <- getDynFlags
-  old_updfr_off <- getUpdFrameOff
-  reg <- preg
-  esize <- psize
-  let size = case constantFoldExpr dflags esize of
-               CmmLit (CmmInt n _) -> n
-               _other -> pprPanic "CmmParse: not a compile-time integer: "
-                            (ppr esize)
-  let frame = old_updfr_off + wORD_SIZE dflags * fromIntegral size
-  emitAssign reg (CmmStackSlot Old frame)
-  withUpdFrameOff frame body
-
-profilingInfo dflags desc_str ty_str
-  = if not (gopt Opt_SccProfilingOn dflags)
-    then NoProfilingInfo
-    else ProfilingInfo (BS8.pack desc_str) (BS8.pack ty_str)
-
-staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
-staticClosure pkg cl_label info payload
-  = do dflags <- getDynFlags
-       let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
-       code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
-
-foreignCall
-        :: String
-        -> [CmmParse (LocalReg, ForeignHint)]
-        -> CmmParse CmmExpr
-        -> [CmmParse (CmmExpr, ForeignHint)]
-        -> Safety
-        -> CmmReturnInfo
-        -> PD (CmmParse ())
-foreignCall conv_string results_code expr_code args_code safety ret
-  = do  conv <- case conv_string of
-          "C" -> return CCallConv
-          "stdcall" -> return StdCallConv
-          _ -> fail ("unknown calling convention: " ++ conv_string)
-        return $ do
-          dflags <- getDynFlags
-          results <- sequence results_code
-          expr <- expr_code
-          args <- sequence args_code
-          let
-                  expr' = adjCallTarget dflags conv expr args
-                  (arg_exprs, arg_hints) = unzip args
-                  (res_regs,  res_hints) = unzip results
-                  fc = ForeignConvention conv arg_hints res_hints ret
-                  target = ForeignTarget expr' fc
-          _ <- code $ emitForeignCall safety res_regs target arg_exprs
-          return ()
-
-
-doReturn :: [CmmParse CmmExpr] -> CmmParse ()
-doReturn exprs_code = do
-  dflags <- getDynFlags
-  exprs <- sequence exprs_code
-  updfr_off <- getUpdFrameOff
-  emit (mkReturnSimple dflags exprs updfr_off)
-
-mkReturnSimple  :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkReturnSimple dflags actuals updfr_off =
-  mkReturn dflags e actuals updfr_off
-  where e = entryCode dflags (CmmLoad (CmmStackSlot Old updfr_off)
-                             (gcWord dflags))
-
-doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse ()
-doRawJump expr_code vols = do
-  dflags <- getDynFlags
-  expr <- expr_code
-  updfr_off <- getUpdFrameOff
-  emit (mkRawJump dflags expr updfr_off vols)
-
-doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr]
-                -> [CmmParse CmmExpr] -> CmmParse ()
-doJumpWithStack expr_code stk_code args_code = do
-  dflags <- getDynFlags
-  expr <- expr_code
-  stk_args <- sequence stk_code
-  args <- sequence args_code
-  updfr_off <- getUpdFrameOff
-  emit (mkJumpExtra dflags NativeNodeCall expr args updfr_off stk_args)
-
-doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr]
-       -> CmmParse ()
-doCall expr_code res_code args_code = do
-  dflags <- getDynFlags
-  expr <- expr_code
-  args <- sequence args_code
-  ress <- sequence res_code
-  updfr_off <- getUpdFrameOff
-  c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off []
-  emit c
-
-adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ]
-              -> CmmExpr
--- On Windows, we have to add the '@N' suffix to the label when making
--- a call with the stdcall calling convention.
-adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args
- | platformOS (targetPlatform dflags) == OSMinGW32
-  = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args))))
-  where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e)))
-                 -- c.f. CgForeignCall.emitForeignCall
-adjCallTarget _ _ expr _
-  = expr
-
-primCall
-        :: [CmmParse (CmmFormal, ForeignHint)]
-        -> FastString
-        -> [CmmParse CmmExpr]
-        -> PD (CmmParse ())
-primCall results_code name args_code
-  = case lookupUFM callishMachOps name of
-        Nothing -> fail ("unknown primitive " ++ unpackFS name)
-        Just f  -> return $ do
-                results <- sequence results_code
-                args <- sequence args_code
-                let (p, args') = f args
-                code (emitPrimCall (map fst results) p args')
-
-doStore :: CmmType -> CmmParse CmmExpr  -> CmmParse CmmExpr -> CmmParse ()
-doStore rep addr_code val_code
-  = do dflags <- getDynFlags
-       addr <- addr_code
-       val <- val_code
-        -- if the specified store type does not match the type of the expr
-        -- on the rhs, then we insert a coercion that will cause the type
-        -- mismatch to be flagged by cmm-lint.  If we don't do this, then
-        -- the store will happen at the wrong type, and the error will not
-        -- be noticed.
-       let val_width = typeWidth (cmmExprType dflags val)
-           rep_width = typeWidth rep
-       let coerce_val
-                | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val]
-                | otherwise              = val
-       emitStore addr coerce_val
-
--- -----------------------------------------------------------------------------
--- If-then-else and boolean expressions
-
-data BoolExpr
-  = BoolExpr `BoolAnd` BoolExpr
-  | BoolExpr `BoolOr`  BoolExpr
-  | BoolNot BoolExpr
-  | BoolTest CmmExpr
-
--- ToDo: smart constructors which simplify the boolean expression.
-
-cmmIfThenElse cond then_part else_part likely = do
-     then_id <- newBlockId
-     join_id <- newBlockId
-     c <- cond
-     emitCond c then_id likely
-     else_part
-     emit (mkBranch join_id)
-     emitLabel then_id
-     then_part
-     -- fall through to join
-     emitLabel join_id
-
-cmmRawIf cond then_id likely = do
-    c <- cond
-    emitCond c then_id likely
-
--- 'emitCond cond true_id'  emits code to test whether the cond is true,
--- branching to true_id if so, and falling through otherwise.
-emitCond (BoolTest e) then_id likely = do
-  else_id <- newBlockId
-  emit (mkCbranch e then_id else_id likely)
-  emitLabel else_id
-emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely
-  | Just op' <- maybeInvertComparison op
-  = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely)
-emitCond (BoolNot e) then_id likely = do
-  else_id <- newBlockId
-  emitCond e else_id likely
-  emit (mkBranch then_id)
-  emitLabel else_id
-emitCond (e1 `BoolOr` e2) then_id likely = do
-  emitCond e1 then_id likely
-  emitCond e2 then_id likely
-emitCond (e1 `BoolAnd` e2) then_id likely = do
-        -- we'd like to invert one of the conditionals here to avoid an
-        -- extra branch instruction, but we can't use maybeInvertComparison
-        -- here because we can't look too closely at the expression since
-        -- we're in a loop.
-  and_id <- newBlockId
-  else_id <- newBlockId
-  emitCond e1 and_id likely
-  emit (mkBranch else_id)
-  emitLabel and_id
-  emitCond e2 then_id likely
-  emitLabel else_id
-
--- -----------------------------------------------------------------------------
--- Source code notes
-
--- | Generate a source note spanning from "a" to "b" (inclusive), then
--- proceed with parsing. This allows debugging tools to reason about
--- locations in Cmm code.
-withSourceNote :: Located a -> Located b -> CmmParse c -> CmmParse c
-withSourceNote a b parse = do
-  name <- getName
-  case combineSrcSpans (getLoc a) (getLoc b) of
-    RealSrcSpan span -> code (emitTick (SourceNote span name)) >> parse
-    _other           -> parse
-
--- -----------------------------------------------------------------------------
--- Table jumps
-
--- We use a simplified form of C-- switch statements for now.  A
--- switch statement always compiles to a table jump.  Each arm can
--- specify a list of values (not ranges), and there can be a single
--- default branch.  The range of the table is given either by the
--- optional range on the switch (eg. switch [0..7] {...}), or by
--- the minimum/maximum values from the branches.
-
-doSwitch :: Maybe (Integer,Integer)
-         -> CmmParse CmmExpr
-         -> [([Integer],Either BlockId (CmmParse ()))]
-         -> Maybe (CmmParse ()) -> CmmParse ()
-doSwitch mb_range scrut arms deflt
-   = do
-        -- Compile code for the default branch
-        dflt_entry <-
-                case deflt of
-                  Nothing -> return Nothing
-                  Just e  -> do b <- forkLabelledCode e; return (Just b)
-
-        -- Compile each case branch
-        table_entries <- mapM emitArm arms
-        let table = M.fromList (concat table_entries)
-
-        dflags <- getDynFlags
-        let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range
-
-        expr <- scrut
-        -- ToDo: check for out of range and jump to default if necessary
-        emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table)
-   where
-        emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)]
-        emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ]
-        emitArm (ints,Right code) = do
-           blockid <- forkLabelledCode code
-           return [ (i,blockid) | i <- ints ]
-
-forkLabelledCode :: CmmParse () -> CmmParse BlockId
-forkLabelledCode p = do
-  (_,ag) <- getCodeScoped p
-  l <- newBlockId
-  emitOutOfLine l ag
-  return l
-
--- -----------------------------------------------------------------------------
--- Putting it all together
-
--- The initial environment: we define some constants that the compiler
--- knows about here.
-initEnv :: DynFlags -> Env
-initEnv dflags = listToUFM [
-  ( fsLit "SIZEOF_StgHeader",
-    VarN (CmmLit (CmmInt (fromIntegral (fixedHdrSize dflags)) (wordWidth dflags)) )),
-  ( fsLit "SIZEOF_StgInfoTable",
-    VarN (CmmLit (CmmInt (fromIntegral (stdInfoTableSizeB dflags)) (wordWidth dflags)) ))
-  ]
-
-parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe CmmGroup)
-parseCmmFile dflags filename = withTiming dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ()) $ do
-  buf <- hGetStringBuffer filename
-  let
-        init_loc = mkRealSrcLoc (mkFastString filename) 1 1
-        init_state = (mkPState dflags buf init_loc) { lex_state = [0] }
-                -- reset the lex_state: the Lexer monad leaves some stuff
-                -- in there we don't want.
-  case unPD cmmParse dflags init_state of
-    PFailed pst ->
-        return (getMessages pst dflags, Nothing)
-    POk pst code -> do
-        st <- initC
-        let fcode = getCmm $ unEC code "global" (initEnv dflags) [] >> return ()
-            (cmm,_) = runC dflags no_module st fcode
-        let ms = getMessages pst dflags
-        if (errorsFound dflags ms)
-         then return (ms, Nothing)
-         else return (ms, Just cmm)
-  where
-        no_module = panic "parseCmmFile: no module"
-}
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
deleted file mode 100644
index e7689a6bfe..0000000000
--- a/compiler/cmm/CmmPipeline.hs
+++ /dev/null
@@ -1,367 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-
-module CmmPipeline (
-  -- | Converts C-- with an implicit stack and native C-- calls into
-  -- optimized, CPS converted and native-call-less C--.  The latter
-  -- C-- can be used to generate assembly.
-  cmmPipeline
-) where
-
-import GhcPrelude
-
-import Cmm
-import CmmLint
-import CmmBuildInfoTables
-import CmmCommonBlockElim
-import CmmImplementSwitchPlans
-import CmmProcPoint
-import CmmContFlowOpt
-import CmmLayoutStack
-import CmmSink
-import Hoopl.Collections
-
-import UniqSupply
-import DynFlags
-import ErrUtils
-import HscTypes
-import Control.Monad
-import Outputable
-import GHC.Platform
-
------------------------------------------------------------------------------
--- | Top level driver for C-- pipeline
------------------------------------------------------------------------------
-
-cmmPipeline
- :: HscEnv -- Compilation env including
-           -- dynamic flags: -dcmm-lint -ddump-cmm-cps
- -> ModuleSRTInfo        -- Info about SRTs generated so far
- -> CmmGroup             -- Input C-- with Procedures
- -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
-
-cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
-  do let dflags = hsc_dflags hsc_env
-
-     tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
-
-     (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
-     dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms)
-
-     return (srtInfo, cmms)
-
-  where forceRes (info, group) =
-          info `seq` foldr (\decl r -> decl `seq` r) () group
-
-        dflags = hsc_dflags hsc_env
-
-cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
-cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
-cpsTop hsc_env proc =
-    do
-       ----------- Control-flow optimisations ----------------------------------
-
-       -- The first round of control-flow optimisation speeds up the
-       -- later passes by removing lots of empty blocks, so we do it
-       -- even when optimisation isn't turned on.
-       --
-       CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
-            return $ cmmCfgOptsProc splitting_proc_points proc
-       dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
-
-       let !TopInfo {stack_info=StackInfo { arg_space = entry_off
-                                          , do_layout = do_layout }} = h
-
-       ----------- Eliminate common blocks -------------------------------------
-       g <- {-# SCC "elimCommonBlocks" #-}
-            condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
-                          Opt_D_dump_cmm_cbe "Post common block elimination"
-
-       -- Any work storing block Labels must be performed _after_
-       -- elimCommonBlocks
-
-       ----------- Implement switches ------------------------------------------
-       g <- {-# SCC "createSwitchPlans" #-}
-            runUniqSM $ cmmImplementSwitchPlans dflags g
-       dump Opt_D_dump_cmm_switch "Post switch plan" g
-
-       ----------- Proc points -------------------------------------------------
-       let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
-       proc_points <-
-          if splitting_proc_points
-             then do
-               pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
-                  minimalProcPointSet (targetPlatform dflags) call_pps g
-               dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
-                     FormatCMM (ppr l $$ ppr pp $$ ppr g)
-               return pp
-             else
-               return call_pps
-
-       ----------- Layout the stack and manifest Sp ----------------------------
-       (g, stackmaps) <-
-            {-# SCC "layoutStack" #-}
-            if do_layout
-               then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
-               else return (g, mapEmpty)
-       dump Opt_D_dump_cmm_sp "Layout Stack" g
-
-       ----------- Sink and inline assignments  --------------------------------
-       g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
-            condPass Opt_CmmSink (cmmSink dflags) g
-                     Opt_D_dump_cmm_sink "Sink assignments"
-
-       ------------- CAF analysis ----------------------------------------------
-       let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
-       dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv)
-
-       g <- if splitting_proc_points
-            then do
-               ------------- Split into separate procedures -----------------------
-               let pp_map = {-# SCC "procPointAnalysis" #-}
-                            procPointAnalysis proc_points g
-               dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map"
-                  FormatCMM (ppr pp_map)
-               g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
-                    splitAtProcPoints dflags l call_pps proc_points pp_map
-                                      (CmmProc h l v g)
-               dumps Opt_D_dump_cmm_split "Post splitting" g
-               return g
-             else do
-               -- attach info tables to return points
-               return $ [attachContInfoTables call_pps (CmmProc h l v g)]
-
-       ------------- Populate info tables with stack info -----------------
-       g <- {-# SCC "setInfoTableStackMap" #-}
-            return $ map (setInfoTableStackMap dflags stackmaps) g
-       dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g
-
-       ----------- Control-flow optimisations -----------------------------
-       g <- {-# SCC "cmmCfgOpts(2)" #-}
-            return $ if optLevel dflags >= 1
-                     then map (cmmCfgOptsProc splitting_proc_points) g
-                     else g
-       g <- return (map removeUnreachableBlocksProc g)
-            -- See Note [unreachable blocks]
-       dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
-
-       return (cafEnv, g)
-
-  where dflags = hsc_dflags hsc_env
-        platform = targetPlatform dflags
-        dump = dumpGraph dflags
-
-        dumps flag name
-           = mapM_ (dumpWith dflags flag name FormatCMM . ppr)
-
-        condPass flag pass g dumpflag dumpname =
-            if gopt flag dflags
-               then do
-                    g <- return $ pass g
-                    dump dumpflag dumpname g
-                    return g
-               else return g
-
-        -- we don't need to split proc points for the NCG, unless
-        -- tablesNextToCode is off.  The latter is because we have no
-        -- label to put on info tables for basic blocks that are not
-        -- the entry point.
-        splitting_proc_points = hscTarget dflags /= HscAsm
-                             || not (tablesNextToCode dflags)
-                             || -- Note [inconsistent-pic-reg]
-                                usingInconsistentPicReg
-        usingInconsistentPicReg
-           = case (platformArch platform, platformOS platform, positionIndependent dflags)
-             of   (ArchX86, OSDarwin, pic) -> pic
-                  _                        -> False
-
--- Note [Sinking after stack layout]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- In the past we considered running sinking pass also before stack
--- layout, but after making some measurements we realized that:
---
---   a) running sinking only before stack layout produces slower
---      code than running sinking only before stack layout
---
---   b) running sinking both before and after stack layout produces
---      code that has the same performance as when running sinking
---      only after stack layout.
---
--- In other words sinking before stack layout doesn't buy as anything.
---
--- An interesting question is "why is it better to run sinking after
--- stack layout"? It seems that the major reason are stores and loads
--- generated by stack layout. Consider this code before stack layout:
---
---  c1E:
---      _c1C::P64 = R3;
---      _c1B::P64 = R2;
---      _c1A::P64 = R1;
---      I64[(young<c1D> + 8)] = c1D;
---      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
---  c1D:
---      R3 = _c1C::P64;
---      R2 = _c1B::P64;
---      R1 = _c1A::P64;
---      call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
---
--- Stack layout pass will save all local variables live across a call
--- (_c1C, _c1B and _c1A in this example) on the stack just before
--- making a call and reload them from the stack after returning from a
--- call:
---
---  c1E:
---      _c1C::P64 = R3;
---      _c1B::P64 = R2;
---      _c1A::P64 = R1;
---      I64[Sp - 32] = c1D;
---      P64[Sp - 24] = _c1A::P64;
---      P64[Sp - 16] = _c1B::P64;
---      P64[Sp - 8] = _c1C::P64;
---      Sp = Sp - 32;
---      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
---  c1D:
---      _c1A::P64 = P64[Sp + 8];
---      _c1B::P64 = P64[Sp + 16];
---      _c1C::P64 = P64[Sp + 24];
---      R3 = _c1C::P64;
---      R2 = _c1B::P64;
---      R1 = _c1A::P64;
---      Sp = Sp + 32;
---      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
---
--- If we don't run sinking pass after stack layout we are basically
--- left with such code. However, running sinking on this code can lead
--- to significant improvements:
---
---  c1E:
---      I64[Sp - 32] = c1D;
---      P64[Sp - 24] = R1;
---      P64[Sp - 16] = R2;
---      P64[Sp - 8] = R3;
---      Sp = Sp - 32;
---      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
---  c1D:
---      R3 = P64[Sp + 24];
---      R2 = P64[Sp + 16];
---      R1 = P64[Sp + 8];
---      Sp = Sp + 32;
---      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
---
--- Now we only have 9 assignments instead of 15.
---
--- There is one case when running sinking before stack layout could
--- be beneficial. Consider this:
---
---   L1:
---      x = y
---      call f() returns L2
---   L2: ...x...y...
---
--- Since both x and y are live across a call to f, they will be stored
--- on the stack during stack layout and restored after the call:
---
---   L1:
---      x = y
---      P64[Sp - 24] = L2
---      P64[Sp - 16] = x
---      P64[Sp - 8]  = y
---      Sp = Sp - 24
---      call f() returns L2
---   L2:
---      y = P64[Sp + 16]
---      x = P64[Sp + 8]
---      Sp = Sp + 24
---      ...x...y...
---
--- However, if we run sinking before stack layout we would propagate x
--- to its usage place (both x and y must be local register for this to
--- be possible - global registers cannot be floated past a call):
---
---   L1:
---      x = y
---      call f() returns L2
---   L2: ...y...y...
---
--- Thus making x dead at the call to f(). If we ran stack layout now
--- we would generate less stores and loads:
---
---   L1:
---      x = y
---      P64[Sp - 16] = L2
---      P64[Sp - 8]  = y
---      Sp = Sp - 16
---      call f() returns L2
---   L2:
---      y = P64[Sp + 8]
---      Sp = Sp + 16
---      ...y...y...
---
--- But since we don't see any benefits from running sinking before stack
--- layout, this situation probably doesn't arise too often in practice.
---
-
-{- Note [inconsistent-pic-reg]
-
-On x86/Darwin, PIC is implemented by inserting a sequence like
-
-    call 1f
- 1: popl %reg
-
-at the proc entry point, and then referring to labels as offsets from
-%reg.  If we don't split proc points, then we could have many entry
-points in a proc that would need this sequence, and each entry point
-would then get a different value for %reg.  If there are any join
-points, then at the join point we don't have a consistent value for
-%reg, so we don't know how to refer to labels.
-
-Hence, on x86/Darwin, we have to split proc points, and then each proc
-point will get its own PIC initialisation sequence.
-
-This isn't an issue on x86/ELF, where the sequence is
-
-    call 1f
- 1: popl %reg
-    addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg
-
-so %reg always has a consistent value: the address of
-_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
-
--}
-
-{- Note [unreachable blocks]
-
-The control-flow optimiser sometimes leaves unreachable blocks behind
-containing junk code.  These aren't necessarily a problem, but
-removing them is good because it might save time in the native code
-generator later.
-
--}
-
-runUniqSM :: UniqSM a -> IO a
-runUniqSM m = do
-  us <- mkSplitUniqSupply 'u'
-  return (initUs_ us m)
-
-
-dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
-dumpGraph dflags flag name g = do
-  when (gopt Opt_DoCmmLinting dflags) $ do_lint g
-  dumpWith dflags flag name FormatCMM (ppr g)
- where
-  do_lint g = case cmmLintGraph dflags g of
-                 Just err -> do { fatalErrorMsg dflags err
-                                ; ghcExit dflags 1
-                                }
-                 Nothing  -> return ()
-
-dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO ()
-dumpWith dflags flag txt fmt sdoc = do
-  dumpIfSet_dyn dflags flag txt fmt sdoc
-  when (not (dopt flag dflags)) $
-    -- If `-ddump-cmm-verbose -ddump-to-file` is specified,
-    -- dump each Cmm pipeline stage output to a separate file.  #16930
-    when (dopt Opt_D_dump_cmm_verbose dflags)
-      $ dumpAction dflags (mkDumpStyle dflags alwaysQualify)
-                   (dumpOptionsFromFlag flag) txt fmt sdoc
-  dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
deleted file mode 100644
index 746a175cfe..0000000000
--- a/compiler/cmm/CmmProcPoint.hs
+++ /dev/null
@@ -1,496 +0,0 @@
-{-# LANGUAGE GADTs, DisambiguateRecordFields, BangPatterns #-}
-
-module CmmProcPoint
-    ( ProcPointSet, Status(..)
-    , callProcPoints, minimalProcPointSet
-    , splitAtProcPoints, procPointAnalysis
-    , attachContInfoTables
-    )
-where
-
-import GhcPrelude hiding (last, unzip, succ, zip)
-
-import DynFlags
-import BlockId
-import CLabel
-import Cmm
-import PprCmm () -- For Outputable instances
-import CmmUtils
-import CmmInfo
-import CmmLive
-import CmmSwitch
-import Data.List (sortBy)
-import Maybes
-import Control.Monad
-import Outputable
-import GHC.Platform
-import UniqSupply
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Dataflow
-import Hoopl.Graph
-import Hoopl.Label
-
--- Compute a minimal set of proc points for a control-flow graph.
-
--- Determine a protocol for each proc point (which live variables will
--- be passed as arguments and which will be on the stack).
-
-{-
-A proc point is a basic block that, after CPS transformation, will
-start a new function.  The entry block of the original function is a
-proc point, as is the continuation of each function call.
-A third kind of proc point arises if we want to avoid copying code.
-Suppose we have code like the following:
-
-  f() {
-    if (...) { ..1..; call foo(); ..2..}
-    else     { ..3..; call bar(); ..4..}
-    x = y + z;
-    return x;
-  }
-
-The statement 'x = y + z' can be reached from two different proc
-points: the continuations of foo() and bar().  We would prefer not to
-put a copy in each continuation; instead we would like 'x = y + z' to
-be the start of a new procedure to which the continuations can jump:
-
-  f_cps () {
-    if (...) { ..1..; push k_foo; jump foo_cps(); }
-    else     { ..3..; push k_bar; jump bar_cps(); }
-  }
-  k_foo() { ..2..; jump k_join(y, z); }
-  k_bar() { ..4..; jump k_join(y, z); }
-  k_join(y, z) { x = y + z; return x; }
-
-You might think then that a criterion to make a node a proc point is
-that it is directly reached by two distinct proc points.  (Note
-[Direct reachability].)  But this criterion is a bit too simple; for
-example, 'return x' is also reached by two proc points, yet there is
-no point in pulling it out of k_join.  A good criterion would be to
-say that a node should be made a proc point if it is reached by a set
-of proc points that is different than its immediate dominator.  NR
-believes this criterion can be shown to produce a minimum set of proc
-points, and given a dominator tree, the proc points can be chosen in
-time linear in the number of blocks.  Lacking a dominator analysis,
-however, we turn instead to an iterative solution, starting with no
-proc points and adding them according to these rules:
-
-  1. The entry block is a proc point.
-  2. The continuation of a call is a proc point.
-  3. A node is a proc point if it is directly reached by more proc
-     points than one of its predecessors.
-
-Because we don't understand the problem very well, we apply rule 3 at
-most once per iteration, then recompute the reachability information.
-(See Note [No simple dataflow].)  The choice of the new proc point is
-arbitrary, and I don't know if the choice affects the final solution,
-so I don't know if the number of proc points chosen is the
-minimum---but the set will be minimal.
-
-
-
-Note [Proc-point analysis]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Given a specified set of proc-points (a set of block-ids), "proc-point
-analysis" figures out, for every block, which proc-point it belongs to.
-All the blocks belonging to proc-point P will constitute a single
-top-level C procedure.
-
-A non-proc-point block B "belongs to" a proc-point P iff B is
-reachable from P without going through another proc-point.
-
-Invariant: a block B should belong to at most one proc-point; if it
-belongs to two, that's a bug.
-
-Note [Non-existing proc-points]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-On some architectures it might happen that the list of proc-points
-computed before stack layout pass will be invalidated by the stack
-layout. This will happen if stack layout removes from the graph
-blocks that were determined to be proc-points. Later on in the pipeline
-we use list of proc-points to perform [Proc-point analysis], but
-if a proc-point does not exist anymore then we will get compiler panic.
-See #8205.
--}
-
-type ProcPointSet = LabelSet
-
-data Status
-  = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
-  | ProcPoint               -- this block is itself a proc point
-
-instance Outputable Status where
-  ppr (ReachedBy ps)
-      | setNull ps = text "<not-reached>"
-      | otherwise = text "reached by" <+>
-                    (hsep $ punctuate comma $ map ppr $ setElems ps)
-  ppr ProcPoint = text "<procpt>"
-
---------------------------------------------------
--- Proc point analysis
-
--- Once you know what the proc-points are, figure out
--- what proc-points each block is reachable from
--- See Note [Proc-point analysis]
-procPointAnalysis :: ProcPointSet -> CmmGraph -> LabelMap Status
-procPointAnalysis procPoints cmmGraph@(CmmGraph {g_graph = graph}) =
-    analyzeCmmFwd procPointLattice procPointTransfer cmmGraph initProcPoints
-  where
-    initProcPoints =
-        mkFactBase
-            procPointLattice
-            [ (id, ProcPoint)
-            | id <- setElems procPoints
-            -- See Note [Non-existing proc-points]
-            , id `setMember` labelsInGraph
-            ]
-    labelsInGraph = labelsDefined graph
-
-procPointTransfer :: TransferFun Status
-procPointTransfer block facts =
-    let label = entryLabel block
-        !fact = case getFact procPointLattice label facts of
-            ProcPoint -> ReachedBy $! setSingleton label
-            f -> f
-        result = map (\id -> (id, fact)) (successors block)
-    in mkFactBase procPointLattice result
-
-procPointLattice :: DataflowLattice Status
-procPointLattice = DataflowLattice unreached add_to
-  where
-    unreached = ReachedBy setEmpty
-    add_to (OldFact ProcPoint) _ = NotChanged ProcPoint
-    add_to _ (NewFact ProcPoint) = Changed ProcPoint -- because of previous case
-    add_to (OldFact (ReachedBy p)) (NewFact (ReachedBy p'))
-        | setSize union > setSize p = Changed (ReachedBy union)
-        | otherwise = NotChanged (ReachedBy p)
-      where
-        union = setUnion p' p
-
-----------------------------------------------------------------------
-
--- It is worth distinguishing two sets of proc points: those that are
--- induced by calls in the original graph and those that are
--- introduced because they're reachable from multiple proc points.
---
--- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
-callProcPoints      :: CmmGraph -> ProcPointSet
-callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g
-  where add :: LabelSet -> CmmBlock -> LabelSet
-        add set b = case lastNode b of
-                      CmmCall {cml_cont = Just k} -> setInsert k set
-                      CmmForeignCall {succ=k}     -> setInsert k set
-                      _ -> set
-
-minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
-                    -> UniqSM ProcPointSet
--- Given the set of successors of calls (which must be proc-points)
--- figure out the minimal set of necessary proc-points
-minimalProcPointSet platform callProcPoints g
-  = extendPPSet platform g (revPostorder g) callProcPoints
-
-extendPPSet
-    :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
-extendPPSet platform g blocks procPoints =
-    let env = procPointAnalysis procPoints g
-        add pps block = let id = entryLabel block
-                        in  case mapLookup id env of
-                              Just ProcPoint -> setInsert id pps
-                              _ -> pps
-        procPoints' = foldlGraphBlocks add setEmpty g
-        newPoints = mapMaybe ppSuccessor blocks
-        newPoint  = listToMaybe newPoints
-        ppSuccessor b =
-            let nreached id = case mapLookup id env `orElse`
-                                    pprPanic "no ppt" (ppr id <+> ppr b) of
-                                ProcPoint -> 1
-                                ReachedBy ps -> setSize ps
-                block_procpoints = nreached (entryLabel b)
-                -- | Looking for a successor of b that is reached by
-                -- more proc points than b and is not already a proc
-                -- point.  If found, it can become a proc point.
-                newId succ_id = not (setMember succ_id procPoints') &&
-                                nreached succ_id > block_procpoints
-            in  listToMaybe $ filter newId $ successors b
-
-    in case newPoint of
-         Just id ->
-             if setMember id procPoints'
-                then panic "added old proc pt"
-                else extendPPSet platform g blocks (setInsert id procPoints')
-         Nothing -> return procPoints'
-
-
--- At this point, we have found a set of procpoints, each of which should be
--- the entry point of a procedure.
--- Now, we create the procedure for each proc point,
--- which requires that we:
--- 1. build a map from proc points to the blocks reachable from the proc point
--- 2. turn each branch to a proc point into a jump
--- 3. turn calls and returns into jumps
--- 4. build info tables for the procedures -- and update the info table for
---    the SRTs in the entry procedure as well.
--- Input invariant: A block should only be reachable from a single ProcPoint.
--- ToDo: use the _ret naming convention that the old code generator
--- used. -- EZY
-splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> LabelMap Status ->
-                     CmmDecl -> UniqSM [CmmDecl]
-splitAtProcPoints dflags entry_label callPPs procPoints procMap
-                  (CmmProc (TopInfo {info_tbls = info_tbls})
-                           top_l _ g@(CmmGraph {g_entry=entry})) =
-  do -- Build a map from procpoints to the blocks they reach
-     let add_block
-             :: LabelMap (LabelMap CmmBlock)
-             -> CmmBlock
-             -> LabelMap (LabelMap CmmBlock)
-         add_block graphEnv b =
-           case mapLookup bid procMap of
-             Just ProcPoint -> add graphEnv bid bid b
-             Just (ReachedBy set) ->
-               case setElems set of
-                 []   -> graphEnv
-                 [id] -> add graphEnv id bid b
-                 _    -> panic "Each block should be reachable from only one ProcPoint"
-             Nothing -> graphEnv
-           where bid = entryLabel b
-         add graphEnv procId bid b = mapInsert procId graph' graphEnv
-               where graph  = mapLookup procId graphEnv `orElse` mapEmpty
-                     graph' = mapInsert bid b graph
-
-     let liveness = cmmGlobalLiveness dflags g
-     let ppLiveness pp = filter isArgReg $
-                         regSetToList $
-                         expectJust "ppLiveness" $ mapLookup pp liveness
-
-     graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g
-
-     -- Build a map from proc point BlockId to pairs of:
-     --  * Labels for their new procedures
-     --  * Labels for the info tables of their new procedures (only if
-     --    the proc point is a callPP)
-     -- Due to common blockification, we may overestimate the set of procpoints.
-     let add_label map pp = mapInsert pp lbls map
-           where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls))
-                      | otherwise   = (block_lbl, guard (setMember pp callPPs) >>
-                                                    Just info_table_lbl)
-                      where block_lbl      = blockLbl pp
-                            info_table_lbl = infoTblLbl pp
-
-         procLabels :: LabelMap (CLabel, Maybe CLabel)
-         procLabels = foldl' add_label mapEmpty
-                             (filter (flip mapMember (toBlockMap g)) (setElems procPoints))
-
-     -- In each new graph, add blocks jumping off to the new procedures,
-     -- and replace branches to procpoints with branches to the jump-off blocks
-     let add_jump_block
-             :: (LabelMap Label, [CmmBlock])
-             -> (Label, CLabel)
-             -> UniqSM (LabelMap Label, [CmmBlock])
-         add_jump_block (env, bs) (pp, l) =
-           do bid <- liftM mkBlockId getUniqueM
-              let b = blockJoin (CmmEntry bid GlobalScope) emptyBlock jump
-                  live = ppLiveness pp
-                  jump = CmmCall (CmmLit (CmmLabel l)) Nothing live 0 0 0
-              return (mapInsert pp bid env, b : bs)
-
-         add_jumps
-             :: LabelMap CmmGraph
-             -> (Label, LabelMap CmmBlock)
-             -> UniqSM (LabelMap CmmGraph)
-         add_jumps newGraphEnv (ppId, blockEnv) =
-           do let needed_jumps = -- find which procpoints we currently branch to
-                    mapFoldr add_if_branch_to_pp [] blockEnv
-                  add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
-                  add_if_branch_to_pp block rst =
-                    case lastNode block of
-                      CmmBranch id          -> add_if_pp id rst
-                      CmmCondBranch _ ti fi _ -> add_if_pp ti (add_if_pp fi rst)
-                      CmmSwitch _ ids       -> foldr add_if_pp rst $ switchTargetsToList ids
-                      _                     -> rst
-
-                  -- when jumping to a PP that has an info table, if
-                  -- tablesNextToCode is off we must jump to the entry
-                  -- label instead.
-                  jump_label (Just info_lbl) _
-                             | tablesNextToCode dflags = info_lbl
-                             | otherwise               = toEntryLbl info_lbl
-                  jump_label Nothing         block_lbl = block_lbl
-
-                  add_if_pp id rst = case mapLookup id procLabels of
-                                       Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst
-                                       Nothing                 -> rst
-              (jumpEnv, jumpBlocks) <-
-                 foldM add_jump_block (mapEmpty, []) needed_jumps
-                  -- update the entry block
-              let b = expectJust "block in env" $ mapLookup ppId blockEnv
-                  blockEnv' = mapInsert ppId b blockEnv
-                  -- replace branches to procpoints with branches to jumps
-                  blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
-                  -- add the jump blocks to the graph
-                  blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks
-              let g' = ofBlockMap ppId blockEnv'''
-              -- pprTrace "g' pre jumps" (ppr g') $ do
-              return (mapInsert ppId g' newGraphEnv)
-
-     graphEnv <- foldM add_jumps mapEmpty $ mapToList graphEnv
-
-     let to_proc (bid, g)
-             | bid == entry
-             =  CmmProc (TopInfo {info_tbls  = info_tbls,
-                                  stack_info = stack_info})
-                        top_l live g'
-             | otherwise
-             = case expectJust "pp label" $ mapLookup bid procLabels of
-                 (lbl, Just info_lbl)
-                    -> CmmProc (TopInfo { info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl)
-                                        , stack_info=stack_info})
-                               lbl live g'
-                 (lbl, Nothing)
-                    -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info})
-                               lbl live g'
-                where
-                 g' = replacePPIds g
-                 live = ppLiveness (g_entry g')
-                 stack_info = StackInfo { arg_space = 0
-                                        , updfr_space =  Nothing
-                                        , do_layout = True }
-                               -- cannot use panic, this is printed by -ddump-cmm
-
-         -- References to procpoint IDs can now be replaced with the
-         -- infotable's label
-         replacePPIds g = {-# SCC "replacePPIds" #-}
-                          mapGraphNodes (id, mapExp repl, mapExp repl) g
-           where repl e@(CmmLit (CmmBlock bid)) =
-                   case mapLookup bid procLabels of
-                     Just (_, Just info_lbl)  -> CmmLit (CmmLabel info_lbl)
-                     _ -> e
-                 repl e = e
-
-     -- The C back end expects to see return continuations before the
-     -- call sites.  Here, we sort them in reverse order -- it gets
-     -- reversed later.
-     let (_, block_order) =
-             foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int)
-                   (revPostorder g)
-         add_block_num (i, map) block =
-           (i + 1, mapInsert (entryLabel block) i map)
-         sort_fn (bid, _) (bid', _) =
-           compare (expectJust "block_order" $ mapLookup bid  block_order)
-                   (expectJust "block_order" $ mapLookup bid' block_order)
-     procs <- return $ map to_proc $ sortBy sort_fn $ mapToList graphEnv
-     return -- pprTrace "procLabels" (ppr procLabels)
-            -- pprTrace "splitting graphs" (ppr procs)
-            procs
-splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t]
-
--- Only called from CmmProcPoint.splitAtProcPoints. NB. does a
--- recursive lookup, see comment below.
-replaceBranches :: LabelMap BlockId -> CmmGraph -> CmmGraph
-replaceBranches env cmmg
-  = {-# SCC "replaceBranches" #-}
-    ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg
-  where
-    f block = replaceLastNode block $ last (lastNode block)
-
-    last :: CmmNode O C -> CmmNode O C
-    last (CmmBranch id)          = CmmBranch (lookup id)
-    last (CmmCondBranch e ti fi l) = CmmCondBranch e (lookup ti) (lookup fi) l
-    last (CmmSwitch e ids)       = CmmSwitch e (mapSwitchTargets lookup ids)
-    last l@(CmmCall {})          = l { cml_cont = Nothing }
-            -- NB. remove the continuation of a CmmCall, since this
-            -- label will now be in a different CmmProc.  Not only
-            -- is this tidier, it stops CmmLint from complaining.
-    last l@(CmmForeignCall {})   = l
-    lookup id = fmap lookup (mapLookup id env) `orElse` id
-            -- XXX: this is a recursive lookup, it follows chains
-            -- until the lookup returns Nothing, at which point we
-            -- return the last BlockId
-
--- --------------------------------------------------------------
--- Not splitting proc points: add info tables for continuations
-
-attachContInfoTables :: ProcPointSet -> CmmDecl -> CmmDecl
-attachContInfoTables call_proc_points (CmmProc top_info top_l live g)
- = CmmProc top_info{info_tbls = info_tbls'} top_l live g
- where
-   info_tbls' = mapUnion (info_tbls top_info) $
-                mapFromList [ (l, mkEmptyContInfoTable (infoTblLbl l))
-                            | l <- setElems call_proc_points
-                            , l /= g_entry g ]
-attachContInfoTables _ other_decl
- = other_decl
-
-----------------------------------------------------------------
-
-{-
-Note [Direct reachability]
-
-Block B is directly reachable from proc point P iff control can flow
-from P to B without passing through an intervening proc point.
--}
-
-----------------------------------------------------------------
-
-{-
-Note [No simple dataflow]
-
-Sadly, it seems impossible to compute the proc points using a single
-dataflow pass.  One might attempt to use this simple lattice:
-
-  data Location = Unknown
-                | InProc BlockId -- node is in procedure headed by the named proc point
-                | ProcPoint      -- node is itself a proc point
-
-At a join, a node in two different blocks becomes a proc point.
-The difficulty is that the change of information during iterative
-computation may promote a node prematurely.  Here's a program that
-illustrates the difficulty:
-
-  f () {
-  entry:
-    ....
-  L1:
-    if (...) { ... }
-    else { ... }
-
-  L2: if (...) { g(); goto L1; }
-      return x + y;
-  }
-
-The only proc-point needed (besides the entry) is L1.  But in an
-iterative analysis, consider what happens to L2.  On the first pass
-through, it rises from Unknown to 'InProc entry', but when L1 is
-promoted to a proc point (because it's the successor of g()), L1's
-successors will be promoted to 'InProc L1'.  The problem hits when the
-new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
-The join operation makes it a proc point when in fact it needn't be,
-because its immediate dominator L1 is already a proc point and there
-are no other proc points that directly reach L2.
--}
-
-
-
-{- Note [Separate Adams optimization]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It may be worthwhile to attempt the Adams optimization by rewriting
-the graph before the assignment of proc-point protocols.  Here are a
-couple of rules:
-
-  g() returns to k;                    g() returns to L;
-  k: CopyIn c ress; goto L:
-   ...                        ==>        ...
-  L: // no CopyIn node here            L: CopyIn c ress;
-
-
-And when c == c' and ress == ress', this also:
-
-  g() returns to k;                    g() returns to L;
-  k: CopyIn c ress; goto L:
-   ...                        ==>        ...
-  L: CopyIn c' ress'                   L: CopyIn c' ress' ;
-
-In both cases the goal is to eliminate k.
--}
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
deleted file mode 100644
index 7d945b0396..0000000000
--- a/compiler/cmm/CmmSink.hs
+++ /dev/null
@@ -1,854 +0,0 @@
-{-# LANGUAGE GADTs #-}
-module CmmSink (
-     cmmSink
-  ) where
-
-import GhcPrelude
-
-import Cmm
-import CmmOpt
-import CmmLive
-import CmmUtils
-import Hoopl.Block
-import Hoopl.Label
-import Hoopl.Collections
-import Hoopl.Graph
-import GHC.Platform.Regs
-import GHC.Platform (isARM, platformArch)
-
-import DynFlags
-import Unique
-import UniqFM
-
-import qualified Data.IntSet as IntSet
-import Data.List (partition)
-import qualified Data.Set as Set
-import Data.Maybe
-
--- Compact sets for membership tests of local variables.
-
-type LRegSet = IntSet.IntSet
-
-emptyLRegSet :: LRegSet
-emptyLRegSet = IntSet.empty
-
-nullLRegSet :: LRegSet -> Bool
-nullLRegSet = IntSet.null
-
-insertLRegSet :: LocalReg -> LRegSet -> LRegSet
-insertLRegSet l = IntSet.insert (getKey (getUnique l))
-
-elemLRegSet :: LocalReg -> LRegSet -> Bool
-elemLRegSet l = IntSet.member (getKey (getUnique l))
-
--- -----------------------------------------------------------------------------
--- Sinking and inlining
-
--- This is an optimisation pass that
---  (a) moves assignments closer to their uses, to reduce register pressure
---  (b) pushes assignments into a single branch of a conditional if possible
---  (c) inlines assignments to registers that are mentioned only once
---  (d) discards dead assignments
---
--- This tightens up lots of register-heavy code.  It is particularly
--- helpful in the Cmm generated by the Stg->Cmm code generator, in
--- which every function starts with a copyIn sequence like:
---
---    x1 = R1
---    x2 = Sp[8]
---    x3 = Sp[16]
---    if (Sp - 32 < SpLim) then L1 else L2
---
--- we really want to push the x1..x3 assignments into the L2 branch.
---
--- Algorithm:
---
---  * Start by doing liveness analysis.
---
---  * Keep a list of assignments A; earlier ones may refer to later ones.
---    Currently we only sink assignments to local registers, because we don't
---    have liveness information about global registers.
---
---  * Walk forwards through the graph, look at each node N:
---
---    * If it is a dead assignment, i.e. assignment to a register that is
---      not used after N, discard it.
---
---    * Try to inline based on current list of assignments
---      * If any assignments in A (1) occur only once in N, and (2) are
---        not live after N, inline the assignment and remove it
---        from A.
---
---      * If an assignment in A is cheap (RHS is local register), then
---        inline the assignment and keep it in A in case it is used afterwards.
---
---      * Otherwise don't inline.
---
---    * If N is assignment to a local register pick up the assignment
---      and add it to A.
---
---    * If N is not an assignment to a local register:
---      * remove any assignments from A that conflict with N, and
---        place them before N in the current block.  We call this
---        "dropping" the assignments.
---
---      * An assignment conflicts with N if it:
---        - assigns to a register mentioned in N
---        - mentions a register assigned by N
---        - reads from memory written by N
---      * do this recursively, dropping dependent assignments
---
---    * At an exit node:
---      * drop any assignments that are live on more than one successor
---        and are not trivial
---      * if any successor has more than one predecessor (a join-point),
---        drop everything live in that successor. Since we only propagate
---        assignments that are not dead at the successor, we will therefore
---        eliminate all assignments dead at this point. Thus analysis of a
---        join-point will always begin with an empty list of assignments.
---
---
--- As a result of above algorithm, sinking deletes some dead assignments
--- (transitively, even).  This isn't as good as removeDeadAssignments,
--- but it's much cheaper.
-
--- -----------------------------------------------------------------------------
--- things that we aren't optimising very well yet.
---
--- -----------
--- (1) From GHC's FastString.hashStr:
---
---  s2ay:
---      if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
---  c2gn:
---      R1 = _s2au::I64;
---      call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
---  c2gp:
---      _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
---                                 4091);
---      _s2an::I64 = _s2an::I64 + 1;
---      _s2au::I64 = _s2cO::I64;
---      goto s2ay;
---
--- a nice loop, but we didn't eliminate the silly assignment at the end.
--- See Note [dependent assignments], which would probably fix this.
--- This is #8336.
---
--- -----------
--- (2) From stg_atomically_frame in PrimOps.cmm
---
--- We have a diamond control flow:
---
---     x = ...
---       |
---      / \
---     A   B
---      \ /
---       |
---    use of x
---
--- Now x won't be sunk down to its use, because we won't push it into
--- both branches of the conditional.  We certainly do have to check
--- that we can sink it past all the code in both A and B, but having
--- discovered that, we could sink it to its use.
---
-
--- -----------------------------------------------------------------------------
-
-type Assignment = (LocalReg, CmmExpr, AbsMem)
-  -- Assignment caches AbsMem, an abstraction of the memory read by
-  -- the RHS of the assignment.
-
-type Assignments = [Assignment]
-  -- A sequence of assignments; kept in *reverse* order
-  -- So the list [ x=e1, y=e2 ] means the sequence of assignments
-  --     y = e2
-  --     x = e1
-
-cmmSink :: DynFlags -> CmmGraph -> CmmGraph
-cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
-  where
-  liveness = cmmLocalLiveness dflags graph
-  getLive l = mapFindWithDefault Set.empty l liveness
-
-  blocks = revPostorder graph
-
-  join_pts = findJoinPoints blocks
-
-  sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock]
-  sink _ [] = []
-  sink sunk (b:bs) =
-    -- pprTrace "sink" (ppr lbl) $
-    blockJoin first final_middle final_last : sink sunk' bs
-    where
-      lbl = entryLabel b
-      (first, middle, last) = blockSplit b
-
-      succs = successors last
-
-      -- Annotate the middle nodes with the registers live *after*
-      -- the node.  This will help us decide whether we can inline
-      -- an assignment in the current node or not.
-      live = Set.unions (map getLive succs)
-      live_middle = gen_kill dflags last live
-      ann_middles = annotate dflags live_middle (blockToList middle)
-
-      -- Now sink and inline in this block
-      (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
-      fold_last = constantFoldNode dflags last
-      (final_last, assigs') = tryToInline dflags live fold_last assigs
-
-      -- We cannot sink into join points (successors with more than
-      -- one predecessor), so identify the join points and the set
-      -- of registers live in them.
-      (joins, nonjoins) = partition (`mapMember` join_pts) succs
-      live_in_joins = Set.unions (map getLive joins)
-
-      -- We do not want to sink an assignment into multiple branches,
-      -- so identify the set of registers live in multiple successors.
-      -- This is made more complicated because when we sink an assignment
-      -- into one branch, this might change the set of registers that are
-      -- now live in multiple branches.
-      init_live_sets = map getLive nonjoins
-      live_in_multi live_sets r =
-         case filter (Set.member r) live_sets of
-           (_one:_two:_) -> True
-           _ -> False
-
-      -- Now, drop any assignments that we will not sink any further.
-      (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
-
-      drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
-          where
-            should_drop =  conflicts dflags a final_last
-                        || not (isTrivial dflags rhs) && live_in_multi live_sets r
-                        || r `Set.member` live_in_joins
-
-            live_sets' | should_drop = live_sets
-                       | otherwise   = map upd live_sets
-
-            upd set | r `Set.member` set = set `Set.union` live_rhs
-                    | otherwise          = set
-
-            live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
-
-      final_middle = foldl' blockSnoc middle' dropped_last
-
-      sunk' = mapUnion sunk $
-                 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
-                             | l <- succs ]
-
-{- TODO: enable this later, when we have some good tests in place to
-   measure the effect and tune it.
-
--- small: an expression we don't mind duplicating
-isSmall :: CmmExpr -> Bool
-isSmall (CmmReg (CmmLocal _)) = True  --
-isSmall (CmmLit _) = True
-isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
-isSmall (CmmRegOff (CmmLocal _) _) = True
-isSmall _ = False
--}
-
---
--- We allow duplication of trivial expressions: registers (both local and
--- global) and literals.
---
-isTrivial :: DynFlags -> CmmExpr -> Bool
-isTrivial _ (CmmReg (CmmLocal _)) = True
-isTrivial dflags (CmmReg (CmmGlobal r)) = -- see Note [Inline GlobalRegs?]
-  if isARM (platformArch (targetPlatform dflags))
-  then True -- CodeGen.Platform.ARM does not have globalRegMaybe
-  else isJust (globalRegMaybe (targetPlatform dflags) r)
-  -- GlobalRegs that are loads from BaseReg are not trivial
-isTrivial _ (CmmLit _) = True
-isTrivial _ _          = False
-
---
--- annotate each node with the set of registers live *after* the node
---
-annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
-annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
-  where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
-
---
--- Find the blocks that have multiple successors (join points)
---
-findJoinPoints :: [CmmBlock] -> LabelMap Int
-findJoinPoints blocks = mapFilter (>1) succ_counts
- where
-  all_succs = concatMap successors blocks
-
-  succ_counts :: LabelMap Int
-  succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
-
---
--- filter the list of assignments to remove any assignments that
--- are not live in a continuation.
---
-filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
-filterAssignments dflags live assigs = reverse (go assigs [])
-  where go []             kept = kept
-        go (a@(r,_,_):as) kept | needed    = go as (a:kept)
-                               | otherwise = go as kept
-           where
-              needed = r `Set.member` live
-                       || any (conflicts dflags a) (map toNode kept)
-                       --  Note that we must keep assignments that are
-                       -- referred to by other assignments we have
-                       -- already kept.
-
--- -----------------------------------------------------------------------------
--- Walk through the nodes of a block, sinking and inlining assignments
--- as we go.
---
--- On input we pass in a:
---    * list of nodes in the block
---    * a list of assignments that appeared *before* this block and
---      that are being sunk.
---
--- On output we get:
---    * a new block
---    * a list of assignments that will be placed *after* that block.
---
-
-walk :: DynFlags
-     -> [(LocalRegSet, CmmNode O O)]    -- nodes of the block, annotated with
-                                        -- the set of registers live *after*
-                                        -- this node.
-
-     -> Assignments                     -- The current list of
-                                        -- assignments we are sinking.
-                                        -- Earlier assignments may refer
-                                        -- to later ones.
-
-     -> ( Block CmmNode O O             -- The new block
-        , Assignments                   -- Assignments to sink further
-        )
-
-walk dflags nodes assigs = go nodes emptyBlock assigs
- where
-   go []               block as = (block, as)
-   go ((live,node):ns) block as
-    | shouldDiscard node live           = go ns block as
-       -- discard dead assignment
-    | Just a <- shouldSink dflags node2 = go ns block (a : as1)
-    | otherwise                         = go ns block' as'
-    where
-      node1 = constantFoldNode dflags node
-
-      (node2, as1) = tryToInline dflags live node1 as
-
-      (dropped, as') = dropAssignmentsSimple dflags
-                          (\a -> conflicts dflags a node2) as1
-
-      block' = foldl' blockSnoc block dropped `blockSnoc` node2
-
-
---
--- Heuristic to decide whether to pick up and sink an assignment
--- Currently we pick up all assignments to local registers.  It might
--- be profitable to sink assignments to global regs too, but the
--- liveness analysis doesn't track those (yet) so we can't.
---
-shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
-shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
-  where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
-shouldSink _ _other = Nothing
-
---
--- discard dead assignments.  This doesn't do as good a job as
--- removeDeadAssignments, because it would need multiple passes
--- to get all the dead code, but it catches the common case of
--- superfluous reloads from the stack that the stack allocator
--- leaves behind.
---
--- Also we catch "r = r" here.  You might think it would fall
--- out of inlining, but the inliner will see that r is live
--- after the instruction and choose not to inline r in the rhs.
---
-shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
-shouldDiscard node live
-   = case node of
-       CmmAssign r (CmmReg r') | r == r' -> True
-       CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
-       _otherwise -> False
-
-
-toNode :: Assignment -> CmmNode O O
-toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
-
-dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
-                      -> ([CmmNode O O], Assignments)
-dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
-
-dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
-                -> ([CmmNode O O], Assignments)
-dropAssignments dflags should_drop state assigs
- = (dropped, reverse kept)
- where
-   (dropped,kept) = go state assigs [] []
-
-   go _ []             dropped kept = (dropped, kept)
-   go state (assig : rest) dropped kept
-      | conflict  = go state' rest (toNode assig : dropped) kept
-      | otherwise = go state' rest dropped (assig:kept)
-      where
-        (dropit, state') = should_drop assig state
-        conflict = dropit || any (conflicts dflags assig) dropped
-
-
--- -----------------------------------------------------------------------------
--- Try to inline assignments into a node.
--- This also does constant folding for primpops, since
--- inlining opens up opportunities for doing so.
-
-tryToInline
-   :: DynFlags
-   -> LocalRegSet               -- set of registers live after this
-                                -- node.  We cannot inline anything
-                                -- that is live after the node, unless
-                                -- it is small enough to duplicate.
-   -> CmmNode O x               -- The node to inline into
-   -> Assignments               -- Assignments to inline
-   -> (
-        CmmNode O x             -- New node
-      , Assignments             -- Remaining assignments
-      )
-
-tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
- where
-  usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
-  usages = foldLocalRegsUsed dflags addUsage emptyUFM node
-
-  go _usages node _skipped [] = (node, [])
-
-  go usages node skipped (a@(l,rhs,_) : rest)
-   | cannot_inline           = dont_inline
-   | occurs_none             = discard  -- Note [discard during inlining]
-   | occurs_once             = inline_and_discard
-   | isTrivial dflags rhs    = inline_and_keep
-   | otherwise               = dont_inline
-   where
-        inline_and_discard = go usages' inl_node skipped rest
-          where usages' = foldLocalRegsUsed dflags addUsage usages rhs
-
-        discard = go usages node skipped rest
-
-        dont_inline        = keep node  -- don't inline the assignment, keep it
-        inline_and_keep    = keep inl_node -- inline the assignment, keep it
-
-        keep node' = (final_node, a : rest')
-          where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
-                usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
-                                            usages rhs
-                -- we must not inline anything that is mentioned in the RHS
-                -- of a binding that we have already skipped, so we set the
-                -- usages of the regs on the RHS to 2.
-
-        cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
-                        || l `elemLRegSet` skipped
-                        || not (okToInline dflags rhs node)
-
-        l_usages = lookupUFM usages l
-        l_live   = l `elemRegSet` live
-
-        occurs_once = not l_live && l_usages == Just 1
-        occurs_none = not l_live && l_usages == Nothing
-
-        inl_node = improveConditional (mapExpDeep inl_exp node)
-
-        inl_exp :: CmmExpr -> CmmExpr
-        -- inl_exp is where the inlining actually takes place!
-        inl_exp (CmmReg    (CmmLocal l'))     | l == l' = rhs
-        inl_exp (CmmRegOff (CmmLocal l') off) | l == l'
-                    = cmmOffset dflags rhs off
-                    -- re-constant fold after inlining
-        inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args
-        inl_exp other = other
-
-
-{- Note [improveConditional]
-
-cmmMachOpFold tries to simplify conditionals to turn things like
-  (a == b) != 1
-into
-  (a != b)
-but there's one case it can't handle: when the comparison is over
-floating-point values, we can't invert it, because floating-point
-comparisons aren't invertible (because of NaNs).
-
-But we *can* optimise this conditional by swapping the true and false
-branches. Given
-  CmmCondBranch ((a >## b) != 1) t f
-we can turn it into
-  CmmCondBranch (a >## b) f t
-
-So here we catch conditionals that weren't optimised by cmmMachOpFold,
-and apply above transformation to eliminate the comparison against 1.
-
-It's tempting to just turn every != into == and then let cmmMachOpFold
-do its thing, but that risks changing a nice fall-through conditional
-into one that requires two jumps. (see swapcond_last in
-CmmContFlowOpt), so instead we carefully look for just the cases where
-we can eliminate a comparison.
--}
-improveConditional :: CmmNode O x -> CmmNode O x
-improveConditional
-  (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l)
-  | neLike mop, isComparisonExpr x
-  = CmmCondBranch x f t (fmap not l)
-  where
-    neLike (MO_Ne _) = True
-    neLike (MO_U_Lt _) = True   -- (x<y) < 1 behaves like (x<y) != 1
-    neLike (MO_S_Lt _) = True   -- (x<y) < 1 behaves like (x<y) != 1
-    neLike _ = False
-improveConditional other = other
-
--- Note [dependent assignments]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- If our assignment list looks like
---
---    [ y = e,  x = ... y ... ]
---
--- We cannot inline x.  Remember this list is really in reverse order,
--- so it means  x = ... y ...; y = e
---
--- Hence if we inline x, the outer assignment to y will capture the
--- reference in x's right hand side.
---
--- In this case we should rename the y in x's right-hand side,
--- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
--- Now we can go ahead and inline x.
---
--- For now we do nothing, because this would require putting
--- everything inside UniqSM.
---
--- One more variant of this (#7366):
---
---   [ y = e, y = z ]
---
--- If we don't want to inline y = e, because y is used many times, we
--- might still be tempted to inline y = z (because we always inline
--- trivial rhs's).  But of course we can't, because y is equal to e,
--- not z.
-
--- Note [discard during inlining]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Opportunities to discard assignments sometimes appear after we've
--- done some inlining.  Here's an example:
---
---      x = R1;
---      y = P64[x + 7];
---      z = P64[x + 15];
---      /* z is dead */
---      R1 = y & (-8);
---
--- The x assignment is trivial, so we inline it in the RHS of y, and
--- keep both x and y.  z gets dropped because it is dead, then we
--- inline y, and we have a dead assignment to x.  If we don't notice
--- that x is dead in tryToInline, we end up retaining it.
-
-addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
-addUsage m r = addToUFM_C (+) m r 1
-
-regsUsedIn :: LRegSet -> CmmExpr -> Bool
-regsUsedIn ls _ | nullLRegSet ls = False
-regsUsedIn ls e = wrapRecExpf f e False
-  where f (CmmReg (CmmLocal l))      _ | l `elemLRegSet` ls = True
-        f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
-        f _ z = z
-
--- we don't inline into CmmUnsafeForeignCall if the expression refers
--- to global registers.  This is a HACK to avoid global registers
--- clashing with C argument-passing registers, really the back-end
--- ought to be able to handle it properly, but currently neither PprC
--- nor the NCG can do it.  See Note [Register parameter passing]
--- See also GHC.StgToCmm.Foreign.load_args_into_temps.
-okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
-okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
-    not (globalRegistersConflict dflags expr node)
-okToInline _ _ _ = True
-
--- -----------------------------------------------------------------------------
-
--- | @conflicts (r,e) node@ is @False@ if and only if the assignment
--- @r = e@ can be safely commuted past statement @node@.
-conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
-conflicts dflags (r, rhs, addr) node
-
-  -- (1) node defines registers used by rhs of assignment. This catches
-  -- assignments and all three kinds of calls. See Note [Sinking and calls]
-  | globalRegistersConflict dflags rhs node                       = True
-  | localRegistersConflict  dflags rhs node                       = True
-
-  -- (2) node uses register defined by assignment
-  | foldRegsUsed dflags (\b r' -> r == r' || b) False node        = True
-
-  -- (3) a store to an address conflicts with a read of the same memory
-  | CmmStore addr' e <- node
-  , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
-
-  -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
-  | HeapMem    <- addr, CmmAssign (CmmGlobal Hp) _ <- node        = True
-  | StackMem   <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
-  | SpMem{}    <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
-
-  -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
-  | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem      = True
-
-  -- (6) native calls clobber any memory
-  | CmmCall{} <- node, memConflicts addr AnyMem                   = True
-
-  -- (7) otherwise, no conflict
-  | otherwise = False
-
--- Returns True if node defines any global registers that are used in the
--- Cmm expression
-globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
-globalRegistersConflict dflags expr node =
-    foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
-                 False node
-
--- Returns True if node defines any local registers that are used in the
--- Cmm expression
-localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
-localRegistersConflict dflags expr node =
-    foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal  r) expr)
-                 False node
-
--- Note [Sinking and calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~
---
--- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
--- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
--- stack layout (see Note [Sinking after stack layout]) which leads to two
--- invariants related to calls:
---
---   a) during stack layout phase all safe foreign calls are turned into
---      unsafe foreign calls (see Note [Lower safe foreign calls]). This
---      means that we will never encounter CmmForeignCall node when running
---      sinking after stack layout
---
---   b) stack layout saves all variables live across a call on the stack
---      just before making a call (remember we are not sinking assignments to
---      stack):
---
---       L1:
---          x = R1
---          P64[Sp - 16] = L2
---          P64[Sp - 8]  = x
---          Sp = Sp - 16
---          call f() returns L2
---       L2:
---
---      We will attempt to sink { x = R1 } but we will detect conflict with
---      { P64[Sp - 8]  = x } and hence we will drop { x = R1 } without even
---      checking whether it conflicts with { call f() }. In this way we will
---      never need to check any assignment conflicts with CmmCall. Remember
---      that we still need to check for potential memory conflicts.
---
--- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
--- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
--- This assumption holds only when we do sinking after stack layout. If we run
--- it before stack layout we need to check for possible conflicts with all three
--- kinds of calls. Our `conflicts` function does that by using a generic
--- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
--- UserOfRegs typeclasses.
---
-
--- An abstraction of memory read or written.
-data AbsMem
-  = NoMem            -- no memory accessed
-  | AnyMem           -- arbitrary memory
-  | HeapMem          -- definitely heap memory
-  | StackMem         -- definitely stack memory
-  | SpMem            -- <size>[Sp+n]
-       {-# UNPACK #-} !Int
-       {-# UNPACK #-} !Int
-
--- Having SpMem is important because it lets us float loads from Sp
--- past stores to Sp as long as they don't overlap, and this helps to
--- unravel some long sequences of
---    x1 = [Sp + 8]
---    x2 = [Sp + 16]
---    ...
---    [Sp + 8]  = xi
---    [Sp + 16] = xj
---
--- Note that SpMem is invalidated if Sp is changed, but the definition
--- of 'conflicts' above handles that.
-
--- ToDo: this won't currently fix the following commonly occurring code:
---    x1 = [R1 + 8]
---    x2 = [R1 + 16]
---    ..
---    [Hp - 8] = x1
---    [Hp - 16] = x2
---    ..
-
--- because [R1 + 8] and [Hp - 8] are both HeapMem.  We know that
--- assignments to [Hp + n] do not conflict with any other heap memory,
--- but this is tricky to nail down.  What if we had
---
---   x = Hp + n
---   [x] = ...
---
---  the store to [x] should be "new heap", not "old heap".
---  Furthermore, you could imagine that if we started inlining
---  functions in Cmm then there might well be reads of heap memory
---  that was written in the same basic block.  To take advantage of
---  non-aliasing of heap memory we will have to be more clever.
-
--- Note [Foreign calls clobber heap]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- It is tempting to say that foreign calls clobber only
--- non-heap/stack memory, but unfortunately we break this invariant in
--- the RTS.  For example, in stg_catch_retry_frame we call
--- stmCommitNestedTransaction() which modifies the contents of the
--- TRec it is passed (this actually caused incorrect code to be
--- generated).
---
--- Since the invariant is true for the majority of foreign calls,
--- perhaps we ought to have a special annotation for calls that can
--- modify heap/stack memory.  For now we just use the conservative
--- definition here.
---
--- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and
--- therefore we should never float any memory operations across one of
--- these calls.
-
-
-bothMems :: AbsMem -> AbsMem -> AbsMem
-bothMems NoMem    x         = x
-bothMems x        NoMem     = x
-bothMems HeapMem  HeapMem   = HeapMem
-bothMems StackMem StackMem     = StackMem
-bothMems (SpMem o1 w1) (SpMem o2 w2)
-  | o1 == o2  = SpMem o1 (max w1 w2)
-  | otherwise = StackMem
-bothMems SpMem{}  StackMem  = StackMem
-bothMems StackMem SpMem{}   = StackMem
-bothMems _         _        = AnyMem
-
-memConflicts :: AbsMem -> AbsMem -> Bool
-memConflicts NoMem      _          = False
-memConflicts _          NoMem      = False
-memConflicts HeapMem    StackMem   = False
-memConflicts StackMem   HeapMem    = False
-memConflicts SpMem{}    HeapMem    = False
-memConflicts HeapMem    SpMem{}    = False
-memConflicts (SpMem o1 w1) (SpMem o2 w2)
-  | o1 < o2   = o1 + w1 > o2
-  | otherwise = o2 + w2 > o1
-memConflicts _         _         = True
-
-exprMem :: DynFlags -> CmmExpr -> AbsMem
-exprMem dflags (CmmLoad addr w)  = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
-exprMem dflags (CmmMachOp _ es)  = foldr bothMems NoMem (map (exprMem dflags) es)
-exprMem _      _                 = NoMem
-
-loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
-loadAddr dflags e w =
-  case e of
-   CmmReg r       -> regAddr dflags r 0 w
-   CmmRegOff r i  -> regAddr dflags r i w
-   _other | regUsedIn dflags spReg e -> StackMem
-          | otherwise -> AnyMem
-
-regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
-regAddr _      (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
-regAddr _      (CmmGlobal Hp) _ _ = HeapMem
-regAddr _      (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
-regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
-regAddr _      _ _ _ = AnyMem
-
-{-
-Note [Inline GlobalRegs?]
-
-Should we freely inline GlobalRegs?
-
-Actually it doesn't make a huge amount of difference either way, so we
-*do* currently treat GlobalRegs as "trivial" and inline them
-everywhere, but for what it's worth, here is what I discovered when I
-(SimonM) looked into this:
-
-Common sense says we should not inline GlobalRegs, because when we
-have
-
-  x = R1
-
-the register allocator will coalesce this assignment, generating no
-code, and simply record the fact that x is bound to $rbx (or
-whatever).  Furthermore, if we were to sink this assignment, then the
-range of code over which R1 is live increases, and the range of code
-over which x is live decreases.  All things being equal, it is better
-for x to be live than R1, because R1 is a fixed register whereas x can
-live in any register.  So we should neither sink nor inline 'x = R1'.
-
-However, not inlining GlobalRegs can have surprising
-consequences. e.g. (cgrun020)
-
-  c3EN:
-      _s3DB::P64 = R1;
-      _c3ES::P64 = _s3DB::P64 & 7;
-      if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV;
-  c3EU:
-      _s3DD::P64 = P64[_s3DB::P64 + 6];
-      _s3DE::P64 = P64[_s3DB::P64 + 14];
-      I64[Sp - 8] = c3F0;
-      R1 = _s3DE::P64;
-      P64[Sp] = _s3DD::P64;
-
-inlining the GlobalReg gives:
-
-  c3EN:
-      if (R1 & 7 >= 2) goto c3EU; else goto c3EV;
-  c3EU:
-      I64[Sp - 8] = c3F0;
-      _s3DD::P64 = P64[R1 + 6];
-      R1 = P64[R1 + 14];
-      P64[Sp] = _s3DD::P64;
-
-but if we don't inline the GlobalReg, instead we get:
-
-      _s3DB::P64 = R1;
-      if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV;
-  c3EU:
-      I64[Sp - 8] = c3F0;
-      R1 = P64[_s3DB::P64 + 14];
-      P64[Sp] = P64[_s3DB::P64 + 6];
-
-This looks better - we managed to inline _s3DD - but in fact it
-generates an extra reg-reg move:
-
-.Lc3EU:
-        movq $c3F0_info,-8(%rbp)
-        movq %rbx,%rax
-        movq 14(%rbx),%rbx
-        movq 6(%rax),%rax
-        movq %rax,(%rbp)
-
-because _s3DB is now live across the R1 assignment, we lost the
-benefit of coalescing.
-
-Who is at fault here?  Perhaps if we knew that _s3DB was an alias for
-R1, then we would not sink a reference to _s3DB past the R1
-assignment.  Or perhaps we *should* do that - we might gain by sinking
-it, despite losing the coalescing opportunity.
-
-Sometimes not inlining global registers wins by virtue of the rule
-about not inlining into arguments of a foreign call, e.g. (T7163) this
-is what happens when we inlined F1:
-
-      _s3L2::F32 = F1;
-      _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32);
-      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(_c3O3::F32);
-
-but if we don't inline F1:
-
-      (_s3L7::F32) = call "ccall" arg hints:  []  result hints:  [] rintFloat(%MO_F_Mul_W32(_s3L2::F32,
-                                                                                            10.0 :: W32));
--}
diff --git a/compiler/cmm/CmmSwitch.hs b/compiler/cmm/CmmSwitch.hs
deleted file mode 100644
index 26bf5c4ce9..0000000000
--- a/compiler/cmm/CmmSwitch.hs
+++ /dev/null
@@ -1,500 +0,0 @@
-{-# LANGUAGE GADTs #-}
-module CmmSwitch (
-     SwitchTargets,
-     mkSwitchTargets,
-     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
-     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
-     switchTargetsToList, eqSwitchTargetWith,
-
-     SwitchPlan(..),
-     targetSupportsSwitch,
-     createSwitchPlan,
-  ) where
-
-import GhcPrelude
-
-import Outputable
-import DynFlags
-import Hoopl.Label (Label)
-
-import Data.Maybe
-import Data.List (groupBy)
-import Data.Function (on)
-import qualified Data.Map as M
-
--- Note [Cmm Switches, the general plan]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Compiling a high-level switch statement, as it comes out of a STG case
--- expression, for example, allows for a surprising amount of design decisions.
--- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
--- well as from the actual code generation.
---
--- The overall plan is:
---  * The Stg → Cmm transformation creates a single `SwitchTargets` in
---    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm/Utils.hs.
---    At this stage, they are unsuitable for code generation.
---  * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these
---    switch statements with code that is suitable for code generation, i.e.
---    a nice balanced tree of decisions with dense jump tables in the leafs.
---    The actual planning of this tree is performed in pure code in createSwitchPlan
---    in this module. See Note [createSwitchPlan].
---  * The actual code generation will not do any further processing and
---    implement each CmmSwitch with a jump tables.
---
--- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch
--- statements alone, as we can turn a SwitchTargets value into a nice
--- switch-statement in LLVM resp. C, and leave the rest to the compiler.
---
--- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are
--- separated.
-
------------------------------------------------------------------------------
--- Note [Magic Constants in CmmSwitch]
---
--- There are a lot of heuristics here that depend on magic values where it is
--- hard to determine the "best" value (for whatever that means). These are the
--- magic values:
-
--- | Number of consecutive default values allowed in a jump table. If there are
--- more of them, the jump tables are split.
---
--- Currently 7, as it costs 7 words of additional code when a jump table is
--- split (at least on x64, determined experimentally).
-maxJumpTableHole :: Integer
-maxJumpTableHole = 7
-
--- | Minimum size of a jump table. If the number is smaller, the switch is
--- implemented using conditionals.
--- Currently 5, because an if-then-else tree of 4 values is nice and compact.
-minJumpTableSize :: Int
-minJumpTableSize = 5
-
--- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
-minJumpTableOffset :: Integer
-minJumpTableOffset = 2
-
-
------------------------------------------------------------------------------
--- Switch Targets
-
--- Note [SwitchTargets]:
--- ~~~~~~~~~~~~~~~~~~~~~
---
--- The branches of a switch are stored in a SwitchTargets, which consists of an
--- (optional) default jump target, and a map from values to jump targets.
---
--- If the default jump target is absent, the behaviour of the switch outside the
--- values of the map is undefined.
---
--- We use an Integer for the keys the map so that it can be used in switches on
--- unsigned as well as signed integers.
---
--- The map may be empty (we prune out-of-range branches here, so it could be us
--- emptying it).
---
--- Before code generation, the table needs to be brought into a form where all
--- entries are non-negative, so that it can be compiled into a jump table.
--- See switchTargetsToTable.
-
-
--- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
--- value, and knows whether the value is signed, the possible range, an
--- optional default value and a map from values to jump labels.
-data SwitchTargets =
-    SwitchTargets
-        Bool                       -- Signed values
-        (Integer, Integer)         -- Range
-        (Maybe Label)              -- Default value
-        (M.Map Integer Label)      -- The branches
-    deriving (Show, Eq)
-
--- | The smart constructor mkSwitchTargets normalises the map a bit:
---  * No entries outside the range
---  * No entries equal to the default
---  * No default if all elements have explicit values
-mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
-mkSwitchTargets signed range@(lo,hi) mbdef ids
-    = SwitchTargets signed range mbdef' ids'
-  where
-    ids' = dropDefault $ restrict ids
-    mbdef' | defaultNeeded = mbdef
-           | otherwise     = Nothing
-
-    -- Drop entries outside the range, if there is a range
-    restrict = restrictMap (lo,hi)
-
-    -- Drop entries that equal the default, if there is a default
-    dropDefault | Just l <- mbdef = M.filter (/= l)
-                | otherwise       = id
-
-    -- Check if the default is still needed
-    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
-
-
--- | Changes all labels mentioned in the SwitchTargets value
-mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
-mapSwitchTargets f (SwitchTargets signed range mbdef branches)
-    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
-
--- | Returns the list of non-default branches of the SwitchTargets value
-switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
-switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
-
--- | Return the default label of the SwitchTargets value
-switchTargetsDefault :: SwitchTargets -> Maybe Label
-switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
-
--- | Return the range of the SwitchTargets value
-switchTargetsRange :: SwitchTargets -> (Integer, Integer)
-switchTargetsRange (SwitchTargets _ range _ _) = range
-
--- | Return whether this is used for a signed value
-switchTargetsSigned :: SwitchTargets -> Bool
-switchTargetsSigned (SwitchTargets signed _ _ _) = signed
-
--- | switchTargetsToTable creates a dense jump table, usable for code generation.
---
--- Also returns an offset to add to the value; the list is 0-based on the
--- result of that addition.
---
--- The conversion from Integer to Int is a bit of a wart, as the actual
--- scrutinee might be an unsigned word, but it just works, due to wrap-around
--- arithmetic (as verified by the CmmSwitchTest test case).
-switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
-switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
-    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
-  where
-    labelFor i = case M.lookup i branches of Just l -> Just l
-                                             Nothing -> mbdef
-    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
-          | otherwise                           = lo
-
--- Note [Jump Table Offset]
--- ~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Usually, the code for a jump table starting at x will first subtract x from
--- the value, to avoid a large amount of empty entries. But if x is very small,
--- the extra entries are no worse than the subtraction in terms of code size, and
--- not having to do the subtraction is quicker.
---
--- I.e. instead of
---     _u20N:
---             leaq -1(%r14),%rax
---             jmp *_n20R(,%rax,8)
---     _n20R:
---             .quad   _c20p
---             .quad   _c20q
--- do
---     _u20N:
---             jmp *_n20Q(,%r14,8)
---
---     _n20Q:
---             .quad   0
---             .quad   _c20p
---             .quad   _c20q
---             .quad   _c20r
-
--- | The list of all labels occurring in the SwitchTargets value.
-switchTargetsToList :: SwitchTargets -> [Label]
-switchTargetsToList (SwitchTargets _ _ mbdef branches)
-    = maybeToList mbdef ++ M.elems branches
-
--- | Groups cases with equal targets, suitable for pretty-printing to a
--- c-like switch statement with fall-through semantics.
-switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
-switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
-  where
-    groups = map (\xs -> (map fst xs, snd (head xs))) $
-             groupBy ((==) `on` snd) $
-             M.toList branches
-
--- | Custom equality helper, needed for "CmmCommonBlockElim"
-eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
-eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
-    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
-  where
-    goMB Nothing Nothing = True
-    goMB (Just l1) (Just l2) = l1 `eq` l2
-    goMB _ _ = False
-    goList [] [] = True
-    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
-    goList _ _ = False
-
------------------------------------------------------------------------------
--- Code generation for Switches
-
-
--- | A SwitchPlan abstractly describes how a Switch statement ought to be
--- implemented. See Note [createSwitchPlan]
-data SwitchPlan
-    = Unconditionally Label
-    | IfEqual Integer Label SwitchPlan
-    | IfLT Bool Integer SwitchPlan SwitchPlan
-    | JumpTable SwitchTargets
-  deriving Show
---
--- Note [createSwitchPlan]
--- ~~~~~~~~~~~~~~~~~~~~~~~
---
--- A SwitchPlan describes how a Switch statement is to be broken down into
--- smaller pieces suitable for code generation.
---
--- createSwitchPlan creates such a switch plan, in these steps:
---  1. It splits the switch statement at segments of non-default values that
---     are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch]
---  2. Too small jump tables should be avoided, so we break up smaller pieces
---     in breakTooSmall.
---  3. We fill in the segments between those pieces with a jump to the default
---     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
---  4. We find and replace two less-than branches by a single equal-to-test in
---     findSingleValues
---  5. The thus collected pieces are assembled to a balanced binary tree.
-
-{-
-  Note [Two alts + default]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Discussion and a bit more info at #14644
-
-When dealing with a switch of the form:
-switch(e) {
-  case 1: goto l1;
-  case 3000: goto l2;
-  default: goto ldef;
-}
-
-If we treat it as a sparse jump table we would generate:
-
-if (e > 3000) //Check if value is outside of the jump table.
-    goto ldef;
-else {
-    if (e < 3000) { //Compare to upper value
-        if(e != 1) //Compare to remaining value
-            goto ldef;
-          else
-            goto l2;
-    }
-    else
-        goto l1;
-}
-
-Instead we special case this to :
-
-if (e==1) goto l1;
-else if (e==3000) goto l2;
-else goto l3;
-
-This means we have:
-* Less comparisons for: 1,<3000
-* Unchanged for 3000
-* One more for >3000
-
-This improves code in a few ways:
-* One comparison less means smaller code which helps with cache.
-* It exchanges a taken jump for two jumps no taken in the >range case.
-  Jumps not taken are cheaper (See Agner guides) making this about as fast.
-* For all other cases the first range check is removed making it faster.
-
-The end result is that the change is not measurably slower for the case
->3000 and faster for the other cases.
-
-This makes running this kind of match in an inner loop cheaper by 10-20%
-depending on the data.
-In nofib this improves wheel-sieve1 by 4-9% depending on problem
-size.
-
-We could also add a second conditional jump after the comparison to
-keep the range check like this:
-    cmp 3000, rArgument
-    jg <default>
-    je <branch 2>
-While this is fairly cheap it made no big difference for the >3000 case
-and slowed down all other cases making it not worthwhile.
--}
-
-
--- | Does the target support switch out of the box? Then leave this to the
--- target!
-targetSupportsSwitch :: HscTarget -> Bool
-targetSupportsSwitch HscC = True
-targetSupportsSwitch HscLlvm = True
-targetSupportsSwitch _ = False
-
--- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
--- down into smaller pieces suitable for code generation.
-createSwitchPlan :: SwitchTargets -> SwitchPlan
--- Lets do the common case of a singleton map quickly and efficiently (#10677)
-createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
-    | [(x, l)] <- M.toList m
-    = IfEqual x l (Unconditionally defLabel)
--- And another common case, matching "booleans"
-createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
-    | [(x1, l1), (_x2,l2)] <- M.toAscList m
-    --Checking If |range| = 2 is enough if we have two unique literals
-    , hi - lo == 1
-    = IfEqual x1 l1 (Unconditionally l2)
--- See Note [Two alts + default]
-createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
-    | [(x1, l1), (x2,l2)] <- M.toAscList m
-    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
-createSwitchPlan (SwitchTargets signed range mbdef m) =
-    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
-    plan
-  where
-    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
-    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
-    plan = buildTree signed $ flatPlan
-
-
----
---- Step 1: Splitting at large holes
----
-splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
-splitAtHoles _        m | M.null m = []
-splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
-  where
-    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
-    nonHoles = reassocTuples lo holes hi
-
-    (lo,_) = M.findMin m
-    (hi,_) = M.findMax m
-
----
---- Step 2: Avoid small jump tables
----
--- We do not want jump tables below a certain size. This breaks them up
--- (into singleton maps, for now).
-breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
-breakTooSmall m
-  | M.size m > minJumpTableSize = [m]
-  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]
-
----
----  Step 3: Fill in the blanks
----
-
--- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
--- two entries, dividing the range.
--- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
--- the expression is < n, and plan2 otherwise.
-
-type FlatSwitchPlan = SeparatedList Integer SwitchPlan
-
-mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
-
--- If we have no default (i.e. undefined where there is no entry), we can
--- branch at the minimum of each map
-mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
-mkFlatSwitchPlan signed  Nothing _ (m:ms)
-  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
-
--- If we have a default, we have to interleave segments that jump
--- to the default between the maps
-mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
-  where
-    go (lo,hi) []
-        | lo > hi = []
-        | otherwise = [(lo, Unconditionally l)]
-    go (lo,hi) (m:ms)
-        | lo < min
-        = (lo, Unconditionally l) : go (min,hi) (m:ms)
-        | lo == min
-        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
-        | otherwise
-        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
-      where
-        min = fst (M.findMin m)
-        max = fst (M.findMax m)
-
-
-mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
-mkLeafPlan signed mbdef m
-    | [(_,l)] <- M.toList m -- singleton map
-    = Unconditionally l
-    | otherwise
-    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
-  where
-    min = fst (M.findMin m)
-    max = fst (M.findMax m)
-
----
----  Step 4: Reduce the number of branches using ==
----
-
--- A sequence of three unconditional jumps, with the outer two pointing to the
--- same value and the bounds off by exactly one can be improved
-findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
-findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
-  | l == l3 && i + 1 == i'
-  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
-findSingleValues (p, (i,p'):xs)
-  = (p,i) `consSL` findSingleValues (p', xs)
-findSingleValues (p, [])
-  = (p, [])
-
----
----  Step 5: Actually build the tree
----
-
--- Build a balanced tree from a separated list
-buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
-buildTree _ (p,[]) = p
-buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
-  where
-    (sl1, m, sl2) = divideSL sl
-
-
-
---
--- Utility data type: Non-empty lists with extra markers in between each
--- element:
---
-
-type SeparatedList b a = (a, [(b,a)])
-
-consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
-consSL (a, b) (a', xs) = (a, (b,a'):xs)
-
-divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
-divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
-divideSL (p,xs) = ((p, xs1), m, (p', xs2))
-  where
-    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
-
---
--- Other Utilities
---
-
-restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
-restrictMap (lo,hi) m = mid
-  where (_,   mid_hi) = M.split (lo-1) m
-        (mid, _) =      M.split (hi+1) mid_hi
-
--- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
-reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
-reassocTuples initial [] last
-    = [(initial,last)]
-reassocTuples initial ((a,b):tuples) last
-    = (initial,a) : reassocTuples b tuples last
-
--- Note [CmmSwitch vs. CmmImplementSwitchPlans]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- I (Joachim) separated the two somewhat closely related modules
---
---  - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy
---    for implementing a Cmm switch (createSwitchPlan), and
---  - CmmImplementSwitchPlans, which contains the actual Cmm graph modification,
---
--- for these reasons:
---
---  * CmmSwitch is very low in the dependency tree, i.e. does not depend on any
---    GHC specific modules at all (with the exception of Output and Hoople
---    (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very
---    high in the dependency tree.
---  * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but
---    used in CmmNodes.
---  * Because CmmSwitch is low in the dependency tree, the separation allows
---    for more parallelism when building GHC.
---  * The interaction between the modules is very explicit and easy to
---    understand, due to the small and simple interface.
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
deleted file mode 100644
index f8ac71ac89..0000000000
--- a/compiler/cmm/CmmType.hs
+++ /dev/null
@@ -1,432 +0,0 @@
-module CmmType
-    ( CmmType   -- Abstract
-    , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
-    , cInt
-    , cmmBits, cmmFloat
-    , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
-    , isFloatType, isGcPtrType, isBitsType
-    , isWord32, isWord64, isFloat64, isFloat32
-
-    , Width(..)
-    , widthInBits, widthInBytes, widthInLog, widthFromBytes
-    , wordWidth, halfWordWidth, cIntWidth
-    , halfWordMask
-    , narrowU, narrowS
-    , rEP_CostCentreStack_mem_alloc
-    , rEP_CostCentreStack_scc_count
-    , rEP_StgEntCounter_allocs
-    , rEP_StgEntCounter_allocd
-
-    , ForeignHint(..)
-
-    , Length
-    , vec, vec2, vec4, vec8, vec16
-    , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
-    , cmmVec
-    , vecLength, vecElemType
-    , isVecType
-   )
-where
-
-
-import GhcPrelude
-
-import DynFlags
-import FastString
-import Outputable
-
-import Data.Word
-import Data.Int
-
------------------------------------------------------------------------------
---              CmmType
------------------------------------------------------------------------------
-
-  -- NOTE: CmmType is an abstract type, not exported from this
-  --       module so you can easily change its representation
-  --
-  -- However Width is exported in a concrete way,
-  -- and is used extensively in pattern-matching
-
-data CmmType    -- The important one!
-  = CmmType CmmCat Width
-
-data CmmCat                -- "Category" (not exported)
-   = GcPtrCat              -- GC pointer
-   | BitsCat               -- Non-pointer
-   | FloatCat              -- Float
-   | VecCat Length CmmCat  -- Vector
-   deriving( Eq )
-        -- See Note [Signed vs unsigned] at the end
-
-instance Outputable CmmType where
-  ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
-
-instance Outputable CmmCat where
-  ppr FloatCat       = text "F"
-  ppr GcPtrCat       = text "P"
-  ppr BitsCat        = text "I"
-  ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
-
--- Why is CmmType stratified?  For native code generation,
--- most of the time you just want to know what sort of register
--- to put the thing in, and for this you need to know how
--- many bits thing has, and whether it goes in a floating-point
--- register.  By contrast, the distinction between GcPtr and
--- GcNonPtr is of interest to only a few parts of the code generator.
-
--------- Equality on CmmType --------------
--- CmmType is *not* an instance of Eq; sometimes we care about the
--- Gc/NonGc distinction, and sometimes we don't
--- So we use an explicit function to force you to think about it
-cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
-cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
-
-cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
-  -- This equality is temporary; used in CmmLint
-  -- but the RTS files are not yet well-typed wrt pointers
-cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
-   = c1 `weak_eq` c2 && w1==w2
-   where
-     weak_eq :: CmmCat -> CmmCat -> Bool
-     FloatCat         `weak_eq` FloatCat         = True
-     FloatCat         `weak_eq` _other           = False
-     _other           `weak_eq` FloatCat         = False
-     (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
-                                                   && cat1 `weak_eq` cat2
-     (VecCat {})      `weak_eq` _other           = False
-     _other           `weak_eq` (VecCat {})      = False
-     _word1           `weak_eq` _word2           = True        -- Ignores GcPtr
-
---- Simple operations on CmmType -----
-typeWidth :: CmmType -> Width
-typeWidth (CmmType _ w) = w
-
-cmmBits, cmmFloat :: Width -> CmmType
-cmmBits  = CmmType BitsCat
-cmmFloat = CmmType FloatCat
-
--------- Common CmmTypes ------------
--- Floats and words of specific widths
-b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
-b8     = cmmBits W8
-b16    = cmmBits W16
-b32    = cmmBits W32
-b64    = cmmBits W64
-b128   = cmmBits W128
-b256   = cmmBits W256
-b512   = cmmBits W512
-f32    = cmmFloat W32
-f64    = cmmFloat W64
-
--- CmmTypes of native word widths
-bWord :: DynFlags -> CmmType
-bWord dflags = cmmBits (wordWidth dflags)
-
-bHalfWord :: DynFlags -> CmmType
-bHalfWord dflags = cmmBits (halfWordWidth dflags)
-
-gcWord :: DynFlags -> CmmType
-gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
-
-cInt :: DynFlags -> CmmType
-cInt dflags = cmmBits (cIntWidth  dflags)
-
------------- Predicates ----------------
-isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
-isFloatType (CmmType FloatCat    _) = True
-isFloatType _other                  = False
-
-isGcPtrType (CmmType GcPtrCat _) = True
-isGcPtrType _other               = False
-
-isBitsType (CmmType BitsCat _) = True
-isBitsType _                   = False
-
-isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
--- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
--- isFloat32 and 64 are obvious
-
-isWord64 (CmmType BitsCat  W64) = True
-isWord64 (CmmType GcPtrCat W64) = True
-isWord64 _other                 = False
-
-isWord32 (CmmType BitsCat  W32) = True
-isWord32 (CmmType GcPtrCat W32) = True
-isWord32 _other                 = False
-
-isFloat32 (CmmType FloatCat W32) = True
-isFloat32 _other                 = False
-
-isFloat64 (CmmType FloatCat W64) = True
-isFloat64 _other                 = False
-
------------------------------------------------------------------------------
---              Width
------------------------------------------------------------------------------
-
-data Width   = W8 | W16 | W32 | W64
-             | W128
-             | W256
-             | W512
-             deriving (Eq, Ord, Show)
-
-instance Outputable Width where
-   ppr rep = ptext (mrStr rep)
-
-mrStr :: Width -> PtrString
-mrStr = sLit . show
-
-
--------- Common Widths  ------------
-wordWidth :: DynFlags -> Width
-wordWidth dflags
- | wORD_SIZE dflags == 4 = W32
- | wORD_SIZE dflags == 8 = W64
- | otherwise             = panic "MachOp.wordRep: Unknown word size"
-
-halfWordWidth :: DynFlags -> Width
-halfWordWidth dflags
- | wORD_SIZE dflags == 4 = W16
- | wORD_SIZE dflags == 8 = W32
- | otherwise             = panic "MachOp.halfWordRep: Unknown word size"
-
-halfWordMask :: DynFlags -> Integer
-halfWordMask dflags
- | wORD_SIZE dflags == 4 = 0xFFFF
- | wORD_SIZE dflags == 8 = 0xFFFFFFFF
- | otherwise             = panic "MachOp.halfWordMask: Unknown word size"
-
--- cIntRep is the Width for a C-language 'int'
-cIntWidth :: DynFlags -> Width
-cIntWidth dflags = case cINT_SIZE dflags of
-                   4 -> W32
-                   8 -> W64
-                   s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
-
-widthInBits :: Width -> Int
-widthInBits W8   = 8
-widthInBits W16  = 16
-widthInBits W32  = 32
-widthInBits W64  = 64
-widthInBits W128 = 128
-widthInBits W256 = 256
-widthInBits W512 = 512
-
-
-widthInBytes :: Width -> Int
-widthInBytes W8   = 1
-widthInBytes W16  = 2
-widthInBytes W32  = 4
-widthInBytes W64  = 8
-widthInBytes W128 = 16
-widthInBytes W256 = 32
-widthInBytes W512 = 64
-
-
-widthFromBytes :: Int -> Width
-widthFromBytes 1  = W8
-widthFromBytes 2  = W16
-widthFromBytes 4  = W32
-widthFromBytes 8  = W64
-widthFromBytes 16 = W128
-widthFromBytes 32 = W256
-widthFromBytes 64 = W512
-
-widthFromBytes n  = pprPanic "no width for given number of bytes" (ppr n)
-
--- log_2 of the width in bytes, useful for generating shifts.
-widthInLog :: Width -> Int
-widthInLog W8   = 0
-widthInLog W16  = 1
-widthInLog W32  = 2
-widthInLog W64  = 3
-widthInLog W128 = 4
-widthInLog W256 = 5
-widthInLog W512 = 6
-
-
--- widening / narrowing
-
-narrowU :: Width -> Integer -> Integer
-narrowU W8  x = fromIntegral (fromIntegral x :: Word8)
-narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
-narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
-narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
-narrowU _ _ = panic "narrowTo"
-
-narrowS :: Width -> Integer -> Integer
-narrowS W8  x = fromIntegral (fromIntegral x :: Int8)
-narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
-narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
-narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
-narrowS _ _ = panic "narrowTo"
-
------------------------------------------------------------------------------
---              SIMD
------------------------------------------------------------------------------
-
-type Length = Int
-
-vec :: Length -> CmmType -> CmmType
-vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
-  where
-    vecw :: Width
-    vecw = widthFromBytes (l*widthInBytes w)
-
-vec2, vec4, vec8, vec16 :: CmmType -> CmmType
-vec2  = vec 2
-vec4  = vec 4
-vec8  = vec 8
-vec16 = vec 16
-
-vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
-vec2f64 = vec 2 f64
-vec2b64 = vec 2 b64
-vec4f32 = vec 4 f32
-vec4b32 = vec 4 b32
-vec8b16 = vec 8 b16
-vec16b8 = vec 16 b8
-
-cmmVec :: Int -> CmmType -> CmmType
-cmmVec n (CmmType cat w) =
-    CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
-
-vecLength :: CmmType -> Length
-vecLength (CmmType (VecCat l _) _) = l
-vecLength _                        = panic "vecLength: not a vector"
-
-vecElemType :: CmmType -> CmmType
-vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
-  where
-    scalw :: Width
-    scalw = widthFromBytes (widthInBytes w `div` l)
-vecElemType _ = panic "vecElemType: not a vector"
-
-isVecType :: CmmType -> Bool
-isVecType (CmmType (VecCat {}) _) = True
-isVecType _                       = False
-
--------------------------------------------------------------------------
--- Hints
-
--- Hints are extra type information we attach to the arguments and
--- results of a foreign call, where more type information is sometimes
--- needed by the ABI to make the correct kind of call.
-
-data ForeignHint
-  = NoHint | AddrHint | SignedHint
-  deriving( Eq )
-        -- Used to give extra per-argument or per-result
-        -- information needed by foreign calling conventions
-
--------------------------------------------------------------------------
-
--- These don't really belong here, but I don't know where is best to
--- put them.
-
-rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
-rEP_CostCentreStack_mem_alloc dflags
-    = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
-    where pc = platformConstants dflags
-
-rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
-rEP_CostCentreStack_scc_count dflags
-    = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
-    where pc = platformConstants dflags
-
-rEP_StgEntCounter_allocs :: DynFlags -> CmmType
-rEP_StgEntCounter_allocs dflags
-    = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
-    where pc = platformConstants dflags
-
-rEP_StgEntCounter_allocd :: DynFlags -> CmmType
-rEP_StgEntCounter_allocd dflags
-    = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
-    where pc = platformConstants dflags
-
--------------------------------------------------------------------------
-{-      Note [Signed vs unsigned]
-        ~~~~~~~~~~~~~~~~~~~~~~~~~
-Should a CmmType include a signed vs. unsigned distinction?
-
-This is very much like a "hint" in C-- terminology: it isn't necessary
-in order to generate correct code, but it might be useful in that the
-compiler can generate better code if it has access to higher-level
-hints about data.  This is important at call boundaries, because the
-definition of a function is not visible at all of its call sites, so
-the compiler cannot infer the hints.
-
-Here in Cmm, we're taking a slightly different approach.  We include
-the int vs. float hint in the CmmType, because (a) the majority of
-platforms have a strong distinction between float and int registers,
-and (b) we don't want to do any heavyweight hint-inference in the
-native code backend in order to get good code.  We're treating the
-hint more like a type: our Cmm is always completely consistent with
-respect to hints.  All coercions between float and int are explicit.
-
-What about the signed vs. unsigned hint?  This information might be
-useful if we want to keep sub-word-sized values in word-size
-registers, which we must do if we only have word-sized registers.
-
-On such a system, there are two straightforward conventions for
-representing sub-word-sized values:
-
-(a) Leave the upper bits undefined.  Comparison operations must
-    sign- or zero-extend both operands before comparing them,
-    depending on whether the comparison is signed or unsigned.
-
-(b) Always keep the values sign- or zero-extended as appropriate.
-    Arithmetic operations must narrow the result to the appropriate
-    size.
-
-A clever compiler might not use either (a) or (b) exclusively, instead
-it would attempt to minimize the coercions by analysis: the same kind
-of analysis that propagates hints around.  In Cmm we don't want to
-have to do this, so we plump for having richer types and keeping the
-type information consistent.
-
-If signed/unsigned hints are missing from CmmType, then the only
-choice we have is (a), because we don't know whether the result of an
-operation should be sign- or zero-extended.
-
-Many architectures have extending load operations, which work well
-with (b).  To make use of them with (a), you need to know whether the
-value is going to be sign- or zero-extended by an enclosing comparison
-(for example), which involves knowing above the context.  This is
-doable but more complex.
-
-Further complicating the issue is foreign calls: a foreign calling
-convention can specify that signed 8-bit quantities are passed as
-sign-extended 32 bit quantities, for example (this is the case on the
-PowerPC).  So we *do* need sign information on foreign call arguments.
-
-Pros for adding signed vs. unsigned to CmmType:
-
-  - It would let us use convention (b) above, and get easier
-    code generation for extending loads.
-
-  - Less information required on foreign calls.
-
-  - MachOp type would be simpler
-
-Cons:
-
-  - More complexity
-
-  - What is the CmmType for a VanillaReg?  Currently it is
-    always wordRep, but now we have to decide whether it is
-    signed or unsigned.  The same VanillaReg can thus have
-    different CmmType in different parts of the program.
-
-  - Extra coercions cluttering up expressions.
-
-Currently for GHC, the foreign call point is moot, because we do our
-own promotion of sub-word-sized values to word-sized values.  The Int8
-type is represented by an Int# which is kept sign-extended at all times
-(this is slightly naughty, because we're making assumptions about the
-C calling convention rather early on in the compiler).  However, given
-this, the cons outweigh the pros.
-
--}
-
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
deleted file mode 100644
index 8920d2d6b9..0000000000
--- a/compiler/cmm/CmmUtils.hs
+++ /dev/null
@@ -1,607 +0,0 @@
-{-# LANGUAGE GADTs, RankNTypes #-}
-{-# LANGUAGE BangPatterns #-}
-
------------------------------------------------------------------------------
---
--- Cmm utilities.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module CmmUtils(
-        -- CmmType
-        primRepCmmType, slotCmmType, slotForeignHint,
-        typeCmmType, typeForeignHint, primRepForeignHint,
-
-        -- CmmLit
-        zeroCLit, mkIntCLit,
-        mkWordCLit, packHalfWordsCLit,
-        mkByteStringCLit,
-        mkDataLits, mkRODataLits,
-        mkStgWordCLit,
-
-        -- CmmExpr
-        mkIntExpr, zeroExpr,
-        mkLblExpr,
-        cmmRegOff,  cmmOffset,  cmmLabelOff,  cmmOffsetLit,  cmmOffsetExpr,
-        cmmRegOffB, cmmOffsetB, cmmLabelOffB, cmmOffsetLitB, cmmOffsetExprB,
-        cmmRegOffW, cmmOffsetW, cmmLabelOffW, cmmOffsetLitW, cmmOffsetExprW,
-        cmmIndex, cmmIndexExpr, cmmLoadIndex, cmmLoadIndexW,
-        cmmNegate,
-        cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
-        cmmSLtWord,
-        cmmNeWord, cmmEqWord,
-        cmmOrWord, cmmAndWord,
-        cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord,
-        cmmToWord,
-
-        cmmMkAssign,
-
-        isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr,
-
-        baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr,
-        currentTSOExpr, currentNurseryExpr, cccsExpr,
-
-        -- Statics
-        blankWord,
-
-        -- Tagging
-        cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
-        cmmConstrTag1,
-
-        -- Overlap and usage
-        regsOverlap, regUsedIn,
-
-        -- Liveness and bitmaps
-        mkLiveness,
-
-        -- * Operations that probably don't belong here
-        modifyGraph,
-
-        ofBlockMap, toBlockMap,
-        ofBlockList, toBlockList, bodyToBlockList,
-        toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough,
-        foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1,
-
-        -- * Ticks
-        blockTicks
-  ) where
-
-import GhcPrelude
-
-import TyCon    ( PrimRep(..), PrimElemRep(..) )
-import GHC.Types.RepType  ( UnaryType, SlotTy (..), typePrimRep1 )
-
-import SMRep
-import Cmm
-import BlockId
-import CLabel
-import Outputable
-import DynFlags
-import Unique
-import GHC.Platform.Regs
-
-import Data.ByteString (ByteString)
-import qualified Data.ByteString as BS
-import Data.Bits
-import Hoopl.Graph
-import Hoopl.Label
-import Hoopl.Block
-import Hoopl.Collections
-
----------------------------------------------------
---
---      CmmTypes
---
----------------------------------------------------
-
-primRepCmmType :: DynFlags -> PrimRep -> CmmType
-primRepCmmType _      VoidRep          = panic "primRepCmmType:VoidRep"
-primRepCmmType dflags LiftedRep        = gcWord dflags
-primRepCmmType dflags UnliftedRep      = gcWord dflags
-primRepCmmType dflags IntRep           = bWord dflags
-primRepCmmType dflags WordRep          = bWord dflags
-primRepCmmType _      Int8Rep          = b8
-primRepCmmType _      Word8Rep         = b8
-primRepCmmType _      Int16Rep         = b16
-primRepCmmType _      Word16Rep        = b16
-primRepCmmType _      Int32Rep         = b32
-primRepCmmType _      Word32Rep        = b32
-primRepCmmType _      Int64Rep         = b64
-primRepCmmType _      Word64Rep        = b64
-primRepCmmType dflags AddrRep          = bWord dflags
-primRepCmmType _      FloatRep         = f32
-primRepCmmType _      DoubleRep        = f64
-primRepCmmType _      (VecRep len rep) = vec len (primElemRepCmmType rep)
-
-slotCmmType :: DynFlags -> SlotTy -> CmmType
-slotCmmType dflags PtrSlot    = gcWord dflags
-slotCmmType dflags WordSlot   = bWord dflags
-slotCmmType _      Word64Slot = b64
-slotCmmType _      FloatSlot  = f32
-slotCmmType _      DoubleSlot = f64
-
-primElemRepCmmType :: PrimElemRep -> CmmType
-primElemRepCmmType Int8ElemRep   = b8
-primElemRepCmmType Int16ElemRep  = b16
-primElemRepCmmType Int32ElemRep  = b32
-primElemRepCmmType Int64ElemRep  = b64
-primElemRepCmmType Word8ElemRep  = b8
-primElemRepCmmType Word16ElemRep = b16
-primElemRepCmmType Word32ElemRep = b32
-primElemRepCmmType Word64ElemRep = b64
-primElemRepCmmType FloatElemRep  = f32
-primElemRepCmmType DoubleElemRep = f64
-
-typeCmmType :: DynFlags -> UnaryType -> CmmType
-typeCmmType dflags ty = primRepCmmType dflags (typePrimRep1 ty)
-
-primRepForeignHint :: PrimRep -> ForeignHint
-primRepForeignHint VoidRep      = panic "primRepForeignHint:VoidRep"
-primRepForeignHint LiftedRep    = AddrHint
-primRepForeignHint UnliftedRep  = AddrHint
-primRepForeignHint IntRep       = SignedHint
-primRepForeignHint Int8Rep      = SignedHint
-primRepForeignHint Int16Rep     = SignedHint
-primRepForeignHint Int32Rep     = SignedHint
-primRepForeignHint Int64Rep     = SignedHint
-primRepForeignHint WordRep      = NoHint
-primRepForeignHint Word8Rep     = NoHint
-primRepForeignHint Word16Rep    = NoHint
-primRepForeignHint Word32Rep    = NoHint
-primRepForeignHint Word64Rep    = NoHint
-primRepForeignHint AddrRep      = AddrHint -- NB! AddrHint, but NonPtrArg
-primRepForeignHint FloatRep     = NoHint
-primRepForeignHint DoubleRep    = NoHint
-primRepForeignHint (VecRep {})  = NoHint
-
-slotForeignHint :: SlotTy -> ForeignHint
-slotForeignHint PtrSlot       = AddrHint
-slotForeignHint WordSlot      = NoHint
-slotForeignHint Word64Slot    = NoHint
-slotForeignHint FloatSlot     = NoHint
-slotForeignHint DoubleSlot    = NoHint
-
-typeForeignHint :: UnaryType -> ForeignHint
-typeForeignHint = primRepForeignHint . typePrimRep1
-
----------------------------------------------------
---
---      CmmLit
---
----------------------------------------------------
-
--- XXX: should really be Integer, since Int doesn't necessarily cover
--- the full range of target Ints.
-mkIntCLit :: DynFlags -> Int -> CmmLit
-mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags)
-
-mkIntExpr :: DynFlags -> Int -> CmmExpr
-mkIntExpr dflags i = CmmLit $! mkIntCLit dflags i
-
-zeroCLit :: DynFlags -> CmmLit
-zeroCLit dflags = CmmInt 0 (wordWidth dflags)
-
-zeroExpr :: DynFlags -> CmmExpr
-zeroExpr dflags = CmmLit (zeroCLit dflags)
-
-mkWordCLit :: DynFlags -> Integer -> CmmLit
-mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
-
-mkByteStringCLit
-  :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt)
--- We have to make a top-level decl for the string,
--- and return a literal pointing to it
-mkByteStringCLit lbl bytes
-  = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
-  where
-    -- This can not happen for String literals (as there \NUL is replaced by
-    -- C0 80). However, it can happen with Addr# literals.
-    sec = if 0 `BS.elem` bytes then ReadOnlyData else CString
-
-mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
--- Build a data-segment data block
-mkDataLits section lbl lits
-  = CmmData section (Statics lbl $ map CmmStaticLit lits)
-
-mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
--- Build a read-only data block
-mkRODataLits lbl lits
-  = mkDataLits section lbl lits
-  where
-    section | any needsRelocation lits = Section RelocatableReadOnlyData lbl
-            | otherwise                = Section ReadOnlyData lbl
-    needsRelocation (CmmLabel _)      = True
-    needsRelocation (CmmLabelOff _ _) = True
-    needsRelocation _                 = False
-
-mkStgWordCLit :: DynFlags -> StgWord -> CmmLit
-mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags)
-
-packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit
--- Make a single word literal in which the lower_half_word is
--- at the lower address, and the upper_half_word is at the
--- higher address
--- ToDo: consider using half-word lits instead
---       but be careful: that's vulnerable when reversed
-packHalfWordsCLit dflags lower_half_word upper_half_word
-   = if wORDS_BIGENDIAN dflags
-     then mkWordCLit dflags ((l `shiftL` halfWordSizeInBits dflags) .|. u)
-     else mkWordCLit dflags (l .|. (u `shiftL` halfWordSizeInBits dflags))
-    where l = fromStgHalfWord lower_half_word
-          u = fromStgHalfWord upper_half_word
-
----------------------------------------------------
---
---      CmmExpr
---
----------------------------------------------------
-
-mkLblExpr :: CLabel -> CmmExpr
-mkLblExpr lbl = CmmLit (CmmLabel lbl)
-
-cmmOffsetExpr :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
--- assumes base and offset have the same CmmType
-cmmOffsetExpr dflags e (CmmLit (CmmInt n _)) = cmmOffset dflags e (fromInteger n)
-cmmOffsetExpr dflags e byte_off = CmmMachOp (MO_Add (cmmExprWidth dflags e)) [e, byte_off]
-
-cmmOffset :: DynFlags -> CmmExpr -> Int -> CmmExpr
-cmmOffset _ e                 0        = e
-cmmOffset _ (CmmReg reg)      byte_off = cmmRegOff reg byte_off
-cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off)
-cmmOffset _ (CmmLit lit)      byte_off = CmmLit (cmmOffsetLit lit byte_off)
-cmmOffset _ (CmmStackSlot area off) byte_off
-  = CmmStackSlot area (off - byte_off)
-  -- note stack area offsets increase towards lower addresses
-cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2
-  = CmmMachOp (MO_Add rep)
-              [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)]
-cmmOffset dflags expr byte_off
-  = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)]
-  where
-    width = cmmExprWidth dflags expr
-
--- Smart constructor for CmmRegOff.  Same caveats as cmmOffset above.
-cmmRegOff :: CmmReg -> Int -> CmmExpr
-cmmRegOff reg 0        = CmmReg reg
-cmmRegOff reg byte_off = CmmRegOff reg byte_off
-
-cmmOffsetLit :: CmmLit -> Int -> CmmLit
-cmmOffsetLit (CmmLabel l)      byte_off = cmmLabelOff l byte_off
-cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off)
-cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off
-                                        = CmmLabelDiffOff l1 l2 (m+byte_off) w
-cmmOffsetLit (CmmInt m rep)    byte_off = CmmInt (m + fromIntegral byte_off) rep
-cmmOffsetLit _                 byte_off = pprPanic "cmmOffsetLit" (ppr byte_off)
-
-cmmLabelOff :: CLabel -> Int -> CmmLit
--- Smart constructor for CmmLabelOff
-cmmLabelOff lbl 0        = CmmLabel lbl
-cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off
-
--- | Useful for creating an index into an array, with a statically known offset.
--- The type is the element type; used for making the multiplier
-cmmIndex :: DynFlags
-         -> Width       -- Width w
-         -> CmmExpr     -- Address of vector of items of width w
-         -> Int         -- Which element of the vector (0 based)
-         -> CmmExpr     -- Address of i'th element
-cmmIndex dflags width base idx = cmmOffset dflags base (idx * widthInBytes width)
-
--- | Useful for creating an index into an array, with an unknown offset.
-cmmIndexExpr :: DynFlags
-             -> Width           -- Width w
-             -> CmmExpr         -- Address of vector of items of width w
-             -> CmmExpr         -- Which element of the vector (0 based)
-             -> CmmExpr         -- Address of i'th element
-cmmIndexExpr dflags width base (CmmLit (CmmInt n _)) = cmmIndex dflags width base (fromInteger n)
-cmmIndexExpr dflags width base idx =
-  cmmOffsetExpr dflags base byte_off
-  where
-    idx_w = cmmExprWidth dflags idx
-    byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr dflags (widthInLog width)]
-
-cmmLoadIndex :: DynFlags -> CmmType -> CmmExpr -> Int -> CmmExpr
-cmmLoadIndex dflags ty expr ix = CmmLoad (cmmIndex dflags (typeWidth ty) expr ix) ty
-
--- The "B" variants take byte offsets
-cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
-cmmRegOffB = cmmRegOff
-
-cmmOffsetB :: DynFlags -> CmmExpr -> ByteOff -> CmmExpr
-cmmOffsetB = cmmOffset
-
-cmmOffsetExprB :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
-cmmOffsetExprB = cmmOffsetExpr
-
-cmmLabelOffB :: CLabel -> ByteOff -> CmmLit
-cmmLabelOffB = cmmLabelOff
-
-cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit
-cmmOffsetLitB = cmmOffsetLit
-
------------------------
--- The "W" variants take word offsets
-
-cmmOffsetExprW :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
--- The second arg is a *word* offset; need to change it to bytes
-cmmOffsetExprW dflags  e (CmmLit (CmmInt n _)) = cmmOffsetW dflags e (fromInteger n)
-cmmOffsetExprW dflags e wd_off = cmmIndexExpr dflags (wordWidth dflags) e wd_off
-
-cmmOffsetW :: DynFlags -> CmmExpr -> WordOff -> CmmExpr
-cmmOffsetW dflags e n = cmmOffsetB dflags e (wordsToBytes dflags n)
-
-cmmRegOffW :: DynFlags -> CmmReg -> WordOff -> CmmExpr
-cmmRegOffW dflags reg wd_off = cmmRegOffB reg (wordsToBytes dflags wd_off)
-
-cmmOffsetLitW :: DynFlags -> CmmLit -> WordOff -> CmmLit
-cmmOffsetLitW dflags lit wd_off = cmmOffsetLitB lit (wordsToBytes dflags wd_off)
-
-cmmLabelOffW :: DynFlags -> CLabel -> WordOff -> CmmLit
-cmmLabelOffW dflags lbl wd_off = cmmLabelOffB lbl (wordsToBytes dflags wd_off)
-
-cmmLoadIndexW :: DynFlags -> CmmExpr -> Int -> CmmType -> CmmExpr
-cmmLoadIndexW dflags base off ty = CmmLoad (cmmOffsetW dflags base off) ty
-
------------------------
-cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
-  cmmSLtWord,
-  cmmNeWord, cmmEqWord,
-  cmmOrWord, cmmAndWord,
-  cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord
-  :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
-cmmOrWord dflags  e1 e2 = CmmMachOp (mo_wordOr dflags)  [e1, e2]
-cmmAndWord dflags e1 e2 = CmmMachOp (mo_wordAnd dflags) [e1, e2]
-cmmNeWord dflags  e1 e2 = CmmMachOp (mo_wordNe dflags)  [e1, e2]
-cmmEqWord dflags  e1 e2 = CmmMachOp (mo_wordEq dflags)  [e1, e2]
-cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2]
-cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2]
-cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2]
-cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2]
-cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2]
-cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2]
-cmmSubWord dflags e1 e2 = CmmMachOp (mo_wordSub dflags) [e1, e2]
-cmmMulWord dflags e1 e2 = CmmMachOp (mo_wordMul dflags) [e1, e2]
-cmmQuotWord dflags e1 e2 = CmmMachOp (mo_wordUQuot dflags) [e1, e2]
-
-cmmNegate :: DynFlags -> CmmExpr -> CmmExpr
-cmmNegate _      (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
-cmmNegate dflags e                       = CmmMachOp (MO_S_Neg (cmmExprWidth dflags e)) [e]
-
-blankWord :: DynFlags -> CmmStatic
-blankWord dflags = CmmUninitialised (wORD_SIZE dflags)
-
-cmmToWord :: DynFlags -> CmmExpr -> CmmExpr
-cmmToWord dflags e
-  | w == word  = e
-  | otherwise  = CmmMachOp (MO_UU_Conv w word) [e]
-  where
-    w = cmmExprWidth dflags e
-    word = wordWidth dflags
-
-cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr)
-cmmMkAssign dflags expr uq =
-  let !ty = cmmExprType dflags expr
-      reg = (CmmLocal (LocalReg uq ty))
-  in  (CmmAssign reg expr, CmmReg reg)
-
-
----------------------------------------------------
---
---      CmmExpr predicates
---
----------------------------------------------------
-
-isTrivialCmmExpr :: CmmExpr -> Bool
-isTrivialCmmExpr (CmmLoad _ _)      = False
-isTrivialCmmExpr (CmmMachOp _ _)    = False
-isTrivialCmmExpr (CmmLit _)         = True
-isTrivialCmmExpr (CmmReg _)         = True
-isTrivialCmmExpr (CmmRegOff _ _)    = True
-isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot"
-
-hasNoGlobalRegs :: CmmExpr -> Bool
-hasNoGlobalRegs (CmmLoad e _)              = hasNoGlobalRegs e
-hasNoGlobalRegs (CmmMachOp _ es)           = all hasNoGlobalRegs es
-hasNoGlobalRegs (CmmLit _)                 = True
-hasNoGlobalRegs (CmmReg (CmmLocal _))      = True
-hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True
-hasNoGlobalRegs _ = False
-
-isLit :: CmmExpr -> Bool
-isLit (CmmLit _) = True
-isLit _          = False
-
-isComparisonExpr :: CmmExpr -> Bool
-isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
-isComparisonExpr _                  = False
-
----------------------------------------------------
---
---      Tagging
---
----------------------------------------------------
-
--- Tag bits mask
-cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr
-cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags)
-cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags))
-
--- Used to untag a possibly tagged pointer
--- A static label need not be untagged
-cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
-cmmUntag _ e@(CmmLit (CmmLabel _)) = e
--- Default case
-cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags)
-
--- Test if a closure pointer is untagged
-cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags)
-
--- Get constructor tag, but one based.
-cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
-
-
------------------------------------------------------------------------------
--- Overlap and usage
-
--- | Returns True if the two STG registers overlap on the specified
--- platform, in the sense that writing to one will clobber the
--- other. This includes the case that the two registers are the same
--- STG register. See Note [Overlapping global registers] for details.
-regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
-regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
-  | Just real  <- globalRegMaybe (targetPlatform dflags) g,
-    Just real' <- globalRegMaybe (targetPlatform dflags) g',
-    real == real'
-    = True
-regsOverlap _ reg reg' = reg == reg'
-
--- | Returns True if the STG register is used by the expression, in
--- the sense that a store to the register might affect the value of
--- the expression.
---
--- We must check for overlapping registers and not just equal
--- registers here, otherwise CmmSink may incorrectly reorder
--- assignments that conflict due to overlap. See #10521 and Note
--- [Overlapping global registers].
-regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
-regUsedIn dflags = regUsedIn_ where
-  _   `regUsedIn_` CmmLit _         = False
-  reg `regUsedIn_` CmmLoad e  _     = reg `regUsedIn_` e
-  reg `regUsedIn_` CmmReg reg'      = regsOverlap dflags reg reg'
-  reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
-  reg `regUsedIn_` CmmMachOp _ es   = any (reg `regUsedIn_`) es
-  _   `regUsedIn_` CmmStackSlot _ _ = False
-
---------------------------------------------
---
---        mkLiveness
---
----------------------------------------------
-
-mkLiveness :: DynFlags -> [LocalReg] -> Liveness
-mkLiveness _      [] = []
-mkLiveness dflags (reg:regs)
-  = bits ++ mkLiveness dflags regs
-  where
-    sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1)
-            `quot` wORD_SIZE dflags
-            -- number of words, rounded up
-    bits = replicate sizeW is_non_ptr -- True <=> Non Ptr
-
-    is_non_ptr = not $ isGcPtrType (localRegType reg)
-
-
--- ============================================== -
--- ============================================== -
--- ============================================== -
-
----------------------------------------------------
---
---      Manipulating CmmGraphs
---
----------------------------------------------------
-
-modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
-modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
-
-toBlockMap :: CmmGraph -> LabelMap CmmBlock
-toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
-
-ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
-ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
-
-toBlockList :: CmmGraph -> [CmmBlock]
-toBlockList g = mapElems $ toBlockMap g
-
--- | like 'toBlockList', but the entry block always comes first
-toBlockListEntryFirst :: CmmGraph -> [CmmBlock]
-toBlockListEntryFirst g
-  | mapNull m  = []
-  | otherwise  = entry_block : others
-  where
-    m = toBlockMap g
-    entry_id = g_entry g
-    Just entry_block = mapLookup entry_id m
-    others = filter ((/= entry_id) . entryLabel) (mapElems m)
-
--- | Like 'toBlockListEntryFirst', but we strive to ensure that we order blocks
--- so that the false case of a conditional jumps to the next block in the output
--- list of blocks. This matches the way OldCmm blocks were output since in
--- OldCmm the false case was a fallthrough, whereas in Cmm conditional branches
--- have both true and false successors. Block ordering can make a big difference
--- in performance in the LLVM backend. Note that we rely crucially on the order
--- of successors returned for CmmCondBranch by the NonLocal instance for CmmNode
--- defined in cmm/CmmNode.hs. -GBM
-toBlockListEntryFirstFalseFallthrough :: CmmGraph -> [CmmBlock]
-toBlockListEntryFirstFalseFallthrough g
-  | mapNull m  = []
-  | otherwise  = dfs setEmpty [entry_block]
-  where
-    m = toBlockMap g
-    entry_id = g_entry g
-    Just entry_block = mapLookup entry_id m
-
-    dfs :: LabelSet -> [CmmBlock] -> [CmmBlock]
-    dfs _ [] = []
-    dfs visited (block:bs)
-      | id `setMember` visited = dfs visited bs
-      | otherwise              = block : dfs (setInsert id visited) bs'
-      where id = entryLabel block
-            bs' = foldr add_id bs (successors block)
-            add_id id bs = case mapLookup id m of
-                              Just b  -> b : bs
-                              Nothing -> bs
-
-ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
-ofBlockList entry blocks = CmmGraph { g_entry = entry
-                                    , g_graph = GMany NothingO body NothingO }
-  where body = foldr addBlock emptyBody blocks
-
-bodyToBlockList :: Body CmmNode -> [CmmBlock]
-bodyToBlockList body = mapElems body
-
-mapGraphNodes :: ( CmmNode C O -> CmmNode C O
-                 , CmmNode O O -> CmmNode O O
-                 , CmmNode O C -> CmmNode O C)
-              -> CmmGraph -> CmmGraph
-mapGraphNodes funs@(mf,_,_) g =
-  ofBlockMap (entryLabel $ mf $ CmmEntry (g_entry g) GlobalScope) $
-  mapMap (mapBlock3' funs) $ toBlockMap g
-
-mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph
-mapGraphNodes1 f = modifyGraph (mapGraph f)
-
-
-foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a
-foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g
-
-revPostorder :: CmmGraph -> [CmmBlock]
-revPostorder g = {-# SCC "revPostorder" #-}
-    revPostorderFrom (toBlockMap g) (g_entry g)
-
--------------------------------------------------
--- Tick utilities
-
--- | Extract all tick annotations from the given block
-blockTicks :: Block CmmNode C C -> [CmmTickish]
-blockTicks b = reverse $ foldBlockNodesF goStmt b []
-  where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish]
-        goStmt  (CmmTick t) ts = t:ts
-        goStmt  _other      ts = ts
-
-
--- -----------------------------------------------------------------------------
--- Access to common global registers
-
-baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr,
-  spLimExpr, hpLimExpr, cccsExpr :: CmmExpr
-baseExpr = CmmReg baseReg
-spExpr = CmmReg spReg
-spLimExpr = CmmReg spLimReg
-hpExpr = CmmReg hpReg
-hpLimExpr = CmmReg hpLimReg
-currentTSOExpr = CmmReg currentTSOReg
-currentNurseryExpr = CmmReg currentNurseryReg
-cccsExpr = CmmReg cccsReg
diff --git a/compiler/cmm/Debug.hs b/compiler/cmm/Debug.hs
deleted file mode 100644
index 712dd4ba98..0000000000
--- a/compiler/cmm/Debug.hs
+++ /dev/null
@@ -1,546 +0,0 @@
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE MultiWayIf #-}
-
------------------------------------------------------------------------------
---
--- Debugging data
---
--- Association of debug data on the Cmm level, with methods to encode it in
--- event log format for later inclusion in profiling event logs.
---
------------------------------------------------------------------------------
-
-module Debug (
-
-  DebugBlock(..),
-  cmmDebugGen,
-  cmmDebugLabels,
-  cmmDebugLink,
-  debugToMap,
-
-  -- * Unwinding information
-  UnwindTable, UnwindPoint(..),
-  UnwindExpr(..), toUnwindExpr
-  ) where
-
-import GhcPrelude
-
-import BlockId
-import CLabel
-import Cmm
-import CmmUtils
-import CoreSyn
-import FastString      ( nilFS, mkFastString )
-import Module
-import Outputable
-import PprCmmExpr      ( pprExpr )
-import SrcLoc
-import Util            ( seqList )
-
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
-
-import Data.Maybe
-import Data.List     ( minimumBy, nubBy )
-import Data.Ord      ( comparing )
-import qualified Data.Map as Map
-import Data.Either   ( partitionEithers )
-
--- | Debug information about a block of code. Ticks scope over nested
--- blocks.
-data DebugBlock =
-  DebugBlock
-  { dblProcedure  :: !Label        -- ^ Entry label of containing proc
-  , dblLabel      :: !Label        -- ^ Hoopl label
-  , dblCLabel     :: !CLabel       -- ^ Output label
-  , dblHasInfoTbl :: !Bool         -- ^ Has an info table?
-  , dblParent     :: !(Maybe DebugBlock)
-    -- ^ The parent of this proc. See Note [Splitting DebugBlocks]
-  , dblTicks      :: ![CmmTickish] -- ^ Ticks defined in this block
-  , dblSourceTick :: !(Maybe CmmTickish) -- ^ Best source tick covering block
-  , dblPosition   :: !(Maybe Int)  -- ^ Output position relative to
-                                   -- other blocks. @Nothing@ means
-                                   -- the block was optimized out
-  , dblUnwind     :: [UnwindPoint]
-  , dblBlocks     :: ![DebugBlock] -- ^ Nested blocks
-  }
-
-instance Outputable DebugBlock where
-  ppr blk = (if | dblProcedure blk == dblLabel blk
-                -> text "proc"
-                | dblHasInfoTbl blk
-                -> text "pp-blk"
-                | otherwise
-                -> text "blk") <+>
-            ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+>
-            (maybe empty ppr (dblSourceTick blk)) <+>
-            (maybe (text "removed") ((text "pos " <>) . ppr)
-                   (dblPosition blk)) <+>
-            (ppr (dblUnwind blk)) $+$
-            (if null (dblBlocks blk) then empty else nest 4 (ppr (dblBlocks blk)))
-
--- | Intermediate data structure holding debug-relevant context information
--- about a block.
-type BlockContext = (CmmBlock, RawCmmDecl)
-
--- | Extract debug data from a group of procedures. We will prefer
--- source notes that come from the given module (presumably the module
--- that we are currently compiling).
-cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock]
-cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
-  where
-      blockCtxs :: Map.Map CmmTickScope [BlockContext]
-      blockCtxs = blockContexts decls
-
-      -- Analyse tick scope structure: Each one is either a top-level
-      -- tick scope, or the child of another.
-      (topScopes, childScopes)
-        = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
-      findP tsc GlobalScope = Left tsc -- top scope
-      findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc)
-                    | otherwise                   = findP tsc scp'
-        where -- Note that we only following the left parent of
-              -- combined scopes. This loses us ticks, which we will
-              -- recover by copying ticks below.
-              scp' | SubScope _ scp' <- scp      = scp'
-                   | CombinedScope scp' _ <- scp = scp'
-                   | otherwise                   = panic "findP impossible"
-
-      scopeMap = foldr (uncurry insertMulti) Map.empty childScopes
-
-      -- This allows us to recover ticks that we lost by flattening
-      -- the graph. Basically, if the parent is A but the child is
-      -- CBA, we know that there is no BA, because it would have taken
-      -- priority - but there might be a B scope, with ticks that
-      -- would not be associated with our child anymore. Note however
-      -- that there might be other childs (DB), which we have to
-      -- filter out.
-      --
-      -- We expect this to be called rarely, which is why we are not
-      -- trying too hard to be efficient here. In many cases we won't
-      -- have to construct blockCtxsU in the first place.
-      ticksToCopy :: CmmTickScope -> [CmmTickish]
-      ticksToCopy (CombinedScope scp s) = go s
-        where go s | scp `isTickSubScope` s   = [] -- done
-                   | SubScope _ s' <- s       = ticks ++ go s'
-                   | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2
-                   | otherwise                = panic "ticksToCopy impossible"
-                where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs
-      ticksToCopy _ = []
-      bCtxsTicks = concatMap (blockTicks . fst)
-
-      -- Finding the "best" source tick is somewhat arbitrary -- we
-      -- select the first source span, while preferring source ticks
-      -- from the same source file.  Furthermore, dumps take priority
-      -- (if we generated one, we probably want debug information to
-      -- refer to it).
-      bestSrcTick = minimumBy (comparing rangeRating)
-      rangeRating (SourceNote span _)
-        | srcSpanFile span == thisFile = 1
-        | otherwise                    = 2 :: Int
-      rangeRating note                 = pprPanic "rangeRating" (ppr note)
-      thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
-
-      -- Returns block tree for this scope as well as all nested
-      -- scopes. Note that if there are multiple blocks in the (exact)
-      -- same scope we elect one as the "branch" node and add the rest
-      -- as children.
-      blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock
-      blocksForScope cstick scope = mkBlock True (head bctxs)
-        where bctxs = fromJust $ Map.lookup scope blockCtxs
-              nested = fromMaybe [] $ Map.lookup scope scopeMap
-              childs = map (mkBlock False) (tail bctxs) ++
-                       map (blocksForScope stick) nested
-
-              mkBlock :: Bool -> BlockContext -> DebugBlock
-              mkBlock top (block, prc)
-                = DebugBlock { dblProcedure    = g_entry graph
-                             , dblLabel        = label
-                             , dblCLabel       = case info of
-                                 Just (Statics infoLbl _)   -> infoLbl
-                                 Nothing
-                                   | g_entry graph == label -> entryLbl
-                                   | otherwise              -> blockLbl label
-                             , dblHasInfoTbl   = isJust info
-                             , dblParent       = Nothing
-                             , dblTicks        = ticks
-                             , dblPosition     = Nothing -- see cmmDebugLink
-                             , dblSourceTick   = stick
-                             , dblBlocks       = blocks
-                             , dblUnwind       = []
-                             }
-                where (CmmProc infos entryLbl _ graph) = prc
-                      label = entryLabel block
-                      info = mapLookup label infos
-                      blocks | top       = seqList childs childs
-                             | otherwise = []
-
-              -- A source tick scopes over all nested blocks. However
-              -- their source ticks might take priority.
-              isSourceTick SourceNote {} = True
-              isSourceTick _             = False
-              -- Collect ticks from all blocks inside the tick scope.
-              -- We attempt to filter out duplicates while we're at it.
-              ticks = nubBy (flip tickishContains) $
-                      bCtxsTicks bctxs ++ ticksToCopy scope
-              stick = case filter isSourceTick ticks of
-                []     -> cstick
-                sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick)
-
--- | Build a map of blocks sorted by their tick scopes
---
--- This involves a pre-order traversal, as we want blocks in rough
--- control flow order (so ticks have a chance to be sorted in the
--- right order).
-blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext]
-blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls
-  where walkProc :: RawCmmDecl
-                 -> Map.Map CmmTickScope [BlockContext]
-                 -> Map.Map CmmTickScope [BlockContext]
-        walkProc CmmData{}                 m = m
-        walkProc prc@(CmmProc _ _ _ graph) m
-          | mapNull blocks = m
-          | otherwise      = snd $ walkBlock prc entry (emptyLbls, m)
-          where blocks = toBlockMap graph
-                entry  = [mapFind (g_entry graph) blocks]
-                emptyLbls = setEmpty :: LabelSet
-
-        walkBlock :: RawCmmDecl -> [Block CmmNode C C]
-                  -> (LabelSet, Map.Map CmmTickScope [BlockContext])
-                  -> (LabelSet, Map.Map CmmTickScope [BlockContext])
-        walkBlock _   []             c            = c
-        walkBlock prc (block:blocks) (visited, m)
-          | lbl `setMember` visited
-          = walkBlock prc blocks (visited, m)
-          | otherwise
-          = walkBlock prc blocks $
-            walkBlock prc succs
-              (lbl `setInsert` visited,
-               insertMulti scope (block, prc) m)
-          where CmmEntry lbl scope = firstNode block
-                (CmmProc _ _ _ graph) = prc
-                succs = map (flip mapFind (toBlockMap graph))
-                            (successors (lastNode block))
-        mapFind = mapFindWithDefault (error "contextTree: block not found!")
-
-insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a]
-insertMulti k v = Map.insertWith (const (v:)) k [v]
-
-cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label]
-cmmDebugLabels isMeta nats = seqList lbls lbls
-  where -- Find order in which procedures will be generated by the
-        -- back-end (that actually matters for DWARF generation).
-        --
-        -- Note that we might encounter blocks that are missing or only
-        -- consist of meta instructions -- we will declare them missing,
-        -- which will skip debug data generation without messing up the
-        -- block hierarchy.
-        lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats
-        getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs
-        getBlocks _other                         = []
-        allMeta (BasicBlock _ instrs) = all isMeta instrs
-
--- | Sets position and unwind table fields in the debug block tree according to
--- native generated code.
-cmmDebugLink :: [Label] -> LabelMap [UnwindPoint]
-             -> [DebugBlock] -> [DebugBlock]
-cmmDebugLink labels unwindPts blocks = map link blocks
-  where blockPos :: LabelMap Int
-        blockPos = mapFromList $ flip zip [0..] labels
-        link block = block { dblPosition = mapLookup (dblLabel block) blockPos
-                           , dblBlocks   = map link (dblBlocks block)
-                           , dblUnwind   = fromMaybe mempty
-                                         $ mapLookup (dblLabel block) unwindPts
-                           }
-
--- | Converts debug blocks into a label map for easier lookups
-debugToMap :: [DebugBlock] -> LabelMap DebugBlock
-debugToMap = mapUnions . map go
-   where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b)
-
-{-
-Note [What is this unwinding business?]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Unwinding tables are a variety of debugging information used by debugging tools
-to reconstruct the execution history of a program at runtime. These tables
-consist of sets of "instructions", one set for every instruction in the program,
-which describe how to reconstruct the state of the machine at the point where
-the current procedure was called. For instance, consider the following annotated
-pseudo-code,
-
-  a_fun:
-    add rsp, 8            -- unwind: rsp = rsp - 8
-    mov rax, 1            -- unwind: rax = unknown
-    call another_block
-    sub rsp, 8            -- unwind: rsp = rsp
-
-We see that attached to each instruction there is an "unwind" annotation, which
-provides a relationship between each updated register and its value at the
-time of entry to a_fun. This is the sort of information that allows gdb to give
-you a stack backtrace given the execution state of your program. This
-unwinding information is captured in various ways by various debug information
-formats; in the case of DWARF (the only format supported by GHC) it is known as
-Call Frame Information (CFI) and can be found in the .debug.frames section of
-your object files.
-
-Currently we only bother to produce unwinding information for registers which
-are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp
-(which is the STG stack pointer) and $rsp (the C stack pointer).
-
-Let's consider how GHC would annotate a C-- program with unwinding information
-with a typical C-- procedure as would come from the STG-to-Cmm code generator,
-
-  entry()
-     { c2fe:
-           v :: P64 = R2;
-           if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
-       c2ff:
-           R2 = v :: P64;
-           R1 = test_closure;
-           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
-       c2fg:
-           I64[Sp - 8] = c2dD;
-           R1 = v :: P64;
-           Sp = Sp - 8;          // Sp updated here
-           if (R1 & 7 != 0) goto c2dD; else goto c2dE;
-       c2dE:
-           call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
-       c2dD:
-           w :: P64 = R1;
-           Hp = Hp + 48;
-           if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
-       ...
-  },
-
-Let's consider how this procedure will be decorated with unwind information
-(largely by CmmLayoutStack). Naturally, when we enter the procedure `entry` the
-value of Sp is no different from what it was at its call site. Therefore we will
-add an `unwind` statement saying this at the beginning of its unwind-annotated
-code,
-
-  entry()
-     { c2fe:
-           unwind Sp = Just Sp + 0;
-           v :: P64 = R2;
-           if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
-
-After c2fe we may pass to either c2ff or c2fg; let's first consider the
-former. In this case there is nothing in particular that we need to do other
-than reiterate what we already know about Sp,
-
-       c2ff:
-           unwind Sp = Just Sp + 0;
-           R2 = v :: P64;
-           R1 = test_closure;
-           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
-
-In contrast, c2fg updates Sp midway through its body. To ensure that unwinding
-can happen correctly after this point we must include an unwind statement there,
-in addition to the usual beginning-of-block statement,
-
-       c2fg:
-           unwind Sp = Just Sp + 0;
-           I64[Sp - 8] = c2dD;
-           R1 = v :: P64;
-           Sp = Sp - 8;
-           unwind Sp = Just Sp + 8;
-           if (R1 & 7 != 0) goto c2dD; else goto c2dE;
-
-The remaining blocks are simple,
-
-       c2dE:
-           unwind Sp = Just Sp + 8;
-           call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
-       c2dD:
-           unwind Sp = Just Sp + 8;
-           w :: P64 = R1;
-           Hp = Hp + 48;
-           if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
-       ...
-  },
-
-
-The flow of unwinding information through the compiler is a bit convoluted:
-
- * C-- begins life in StgToCmm without any unwind information. This is because we
-   haven't actually done any register assignment or stack layout yet, so there
-   is no need for unwind information.
-
- * CmmLayoutStack figures out how to layout each procedure's stack, and produces
-   appropriate unwinding nodes for each adjustment of the STG Sp register.
-
- * The unwind nodes are carried through the sinking pass. Currently this is
-   guaranteed not to invalidate unwind information since it won't touch stores
-   to Sp, but this will need revisiting if CmmSink gets smarter in the future.
-
- * Eventually we make it to the native code generator backend which can then
-   preserve the unwind nodes in its machine-specific instructions. In so doing
-   the backend can also modify or add unwinding information; this is necessary,
-   for instance, in the case of x86-64, where adjustment of $rsp may be
-   necessary during calls to native foreign code due to the native calling
-   convention.
-
- * The NCG then retrieves the final unwinding table for each block from the
-   backend with extractUnwindPoints.
-
- * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen
-
- * These DebugBlocks are then converted to, e.g., DWARF unwinding tables
-   (by the Dwarf module) and emitted in the final object.
-
-See also:
-  Note [Unwinding information in the NCG] in AsmCodeGen,
-  Note [Unwind pseudo-instruction in Cmm],
-  Note [Debugging DWARF unwinding info].
-
-
-Note [Debugging DWARF unwinding info]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For debugging generated unwinding info I've found it most useful to dump the
-disassembled binary with objdump -D and dump the debug info with
-readelf --debug-dump=frames-interp.
-
-You should get something like this:
-
-  0000000000000010 <stg_catch_frame_info>:
-    10:   48 83 c5 18             add    $0x18,%rbp
-    14:   ff 65 00                jmpq   *0x0(%rbp)
-
-and:
-
-  Contents of the .debug_frame section:
-
-  00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16
-     LOC           CFA      rbp   rsp   ra
-  0000000000000000 rbp+0    v+0   s     c+0
-
-  00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017
-     LOC           CFA      rbp   rsp   ra
-  000000000000000f rbp+0    v+0   s     c+0
-  000000000000000f rbp+24   v+0   s     c+0
-
-To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in
-Appendix 5 (page 101 of the pdf) and more details in the relevant section.
-
-The key thing to keep in mind is that the value at LOC is the value from
-*before* the instruction at LOC executes. In other words it answers the
-question: if my $rip is at LOC, how do I get the relevant values given the
-values obtained through unwinding so far.
-
-If the readelf --debug-dump=frames-interp output looks wrong, it may also be
-useful to look at readelf --debug-dump=frames, which is closer to the
-information that GHC generated.
-
-It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm
--ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm]
-explains how to interpret it.
-
-Inside gdb there are a couple useful commands for inspecting frames.
-For example:
-
-  gdb> info frame <num>
-
-It shows the values of registers obtained through unwinding.
-
-Another useful thing to try when debugging the DWARF unwinding is to enable
-extra debugging output in GDB:
-
-  gdb> set debug frame 1
-
-This makes GDB produce a trace of its internal workings. Having gone this far,
-it's just a tiny step to run GDB in GDB. Make sure you install debugging
-symbols for gdb if you obtain it through a package manager.
-
-Keep in mind that the current release of GDB has an instruction pointer handling
-heuristic that works well for C-like languages, but doesn't always work for
-Haskell. See Note [Info Offset] in Dwarf.Types for more details.
-
-Note [Unwind pseudo-instruction in Cmm]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't
-generate any assembly, but controls what DWARF unwinding information gets
-generated.
-
-It's important to understand what ranges of code the unwind pseudo-instruction
-refers to.
-For a sequence of CmmNodes like:
-
-  A // starts at addr X and ends at addr Y-1
-  unwind Sp = Just Sp + 16;
-  B // starts at addr Y and ends at addr Z
-
-the unwind statement reflects the state after A has executed, but before B
-has executed. If you consult the Note [Debugging DWARF unwinding info], the
-LOC this information will end up in is Y.
--}
-
--- | A label associated with an 'UnwindTable'
-data UnwindPoint = UnwindPoint !CLabel !UnwindTable
-
-instance Outputable UnwindPoint where
-  ppr (UnwindPoint lbl uws) =
-      braces $ ppr lbl<>colon
-      <+> hsep (punctuate comma $ map pprUw $ Map.toList uws)
-    where
-      pprUw (g, expr) = ppr g <> char '=' <> ppr expr
-
--- | Maps registers to expressions that yield their "old" values
--- further up the stack. Most interesting for the stack pointer @Sp@,
--- but might be useful to document saved registers, too. Note that a
--- register's value will be 'Nothing' when the register's previous
--- value cannot be reconstructed.
-type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr)
-
--- | Expressions, used for unwind information
-data UnwindExpr = UwConst !Int                  -- ^ literal value
-                | UwReg !GlobalReg !Int         -- ^ register plus offset
-                | UwDeref UnwindExpr            -- ^ pointer dereferencing
-                | UwLabel CLabel
-                | UwPlus UnwindExpr UnwindExpr
-                | UwMinus UnwindExpr UnwindExpr
-                | UwTimes UnwindExpr UnwindExpr
-                deriving (Eq)
-
-instance Outputable UnwindExpr where
-  pprPrec _ (UwConst i)     = ppr i
-  pprPrec _ (UwReg g 0)     = ppr g
-  pprPrec p (UwReg g x)     = pprPrec p (UwPlus (UwReg g 0) (UwConst x))
-  pprPrec _ (UwDeref e)     = char '*' <> pprPrec 3 e
-  pprPrec _ (UwLabel l)     = pprPrec 3 l
-  pprPrec p (UwPlus e0 e1)  | p <= 0
-                            = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1
-  pprPrec p (UwMinus e0 e1) | p <= 0
-                            = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1
-  pprPrec p (UwTimes e0 e1) | p <= 1
-                            = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1
-  pprPrec _ other           = parens (pprPrec 0 other)
-
--- | Conversion of Cmm expressions to unwind expressions. We check for
--- unsupported operator usages and simplify the expression as far as
--- possible.
-toUnwindExpr :: CmmExpr -> UnwindExpr
-toUnwindExpr (CmmLit (CmmInt i _))       = UwConst (fromIntegral i)
-toUnwindExpr (CmmLit (CmmLabel l))       = UwLabel l
-toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i
-toUnwindExpr (CmmReg (CmmGlobal g))      = UwReg g 0
-toUnwindExpr (CmmLoad e _)               = UwDeref (toUnwindExpr e)
-toUnwindExpr e@(CmmMachOp op [e1, e2])   =
-  case (op, toUnwindExpr e1, toUnwindExpr e2) of
-    (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y)
-    (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y)
-    (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y)
-    (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y)
-    (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y)
-    (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y)
-    (MO_Add{}, u1,        u2       ) -> UwPlus u1 u2
-    (MO_Sub{}, u1,        u2       ) -> UwMinus u1 u2
-    (MO_Mul{}, u1,        u2       ) -> UwTimes u1 u2
-    _otherwise -> pprPanic "Unsupported operator in unwind expression!"
-                           (pprExpr e)
-toUnwindExpr e
-  = pprPanic "Unsupported unwind expression!" (ppr e)
diff --git a/compiler/cmm/Hoopl/Block.hs b/compiler/cmm/Hoopl/Block.hs
deleted file mode 100644
index 07aafe8ae9..0000000000
--- a/compiler/cmm/Hoopl/Block.hs
+++ /dev/null
@@ -1,329 +0,0 @@
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE PolyKinds #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TypeFamilies #-}
-module Hoopl.Block
-    ( Extensibility (..)
-    , O
-    , C
-    , MaybeO(..)
-    , IndexedCO
-    , Block(..)
-    , blockAppend
-    , blockCons
-    , blockFromList
-    , blockJoin
-    , blockJoinHead
-    , blockJoinTail
-    , blockSnoc
-    , blockSplit
-    , blockSplitHead
-    , blockSplitTail
-    , blockToList
-    , emptyBlock
-    , firstNode
-    , foldBlockNodesB
-    , foldBlockNodesB3
-    , foldBlockNodesF
-    , isEmptyBlock
-    , lastNode
-    , mapBlock
-    , mapBlock'
-    , mapBlock3'
-    , replaceFirstNode
-    , replaceLastNode
-    ) where
-
-import GhcPrelude
-
--- -----------------------------------------------------------------------------
--- Shapes: Open and Closed
-
--- | Used at the type level to indicate "open" vs "closed" structure.
-data Extensibility
-  -- | An "open" structure with a unique, unnamed control-flow edge flowing in
-  -- or out. "Fallthrough" and concatenation are permitted at an open point.
-  = Open
-  -- | A "closed" structure which supports control transfer only through the use
-  -- of named labels---no "fallthrough" is permitted. The number of control-flow
-  -- edges is unconstrained.
-  | Closed
-
-type O = 'Open
-type C = 'Closed
-
--- | Either type indexed by closed/open using type families
-type family IndexedCO (ex :: Extensibility) (a :: k) (b :: k) :: k
-type instance IndexedCO C a _b = a
-type instance IndexedCO O _a b = b
-
--- | Maybe type indexed by open/closed
-data MaybeO ex t where
-  JustO    :: t -> MaybeO O t
-  NothingO ::      MaybeO C t
-
--- | Maybe type indexed by closed/open
-data MaybeC ex t where
-  JustC    :: t -> MaybeC C t
-  NothingC ::      MaybeC O t
-
-deriving instance Functor (MaybeO ex)
-deriving instance Functor (MaybeC ex)
-
--- -----------------------------------------------------------------------------
--- The Block type
-
--- | A sequence of nodes.  May be any of four shapes (O/O, O/C, C/O, C/C).
--- Open at the entry means single entry, mutatis mutandis for exit.
--- A closed/closed block is a /basic/ block and can't be extended further.
--- Clients should avoid manipulating blocks and should stick to either nodes
--- or graphs.
-data Block n e x where
-  BlockCO  :: n C O -> Block n O O          -> Block n C O
-  BlockCC  :: n C O -> Block n O O -> n O C -> Block n C C
-  BlockOC  ::          Block n O O -> n O C -> Block n O C
-
-  BNil    :: Block n O O
-  BMiddle :: n O O                      -> Block n O O
-  BCat    :: Block n O O -> Block n O O -> Block n O O
-  BSnoc   :: Block n O O -> n O O       -> Block n O O
-  BCons   :: n O O       -> Block n O O -> Block n O O
-
-
--- -----------------------------------------------------------------------------
--- Simple operations on Blocks
-
--- Predicates
-
-isEmptyBlock :: Block n e x -> Bool
-isEmptyBlock BNil       = True
-isEmptyBlock (BCat l r) = isEmptyBlock l && isEmptyBlock r
-isEmptyBlock _          = False
-
-
--- Building
-
-emptyBlock :: Block n O O
-emptyBlock = BNil
-
-blockCons :: n O O -> Block n O x -> Block n O x
-blockCons n b = case b of
-  BlockOC b l  -> (BlockOC $! (n `blockCons` b)) l
-  BNil{}    -> BMiddle n
-  BMiddle{} -> n `BCons` b
-  BCat{}    -> n `BCons` b
-  BSnoc{}   -> n `BCons` b
-  BCons{}   -> n `BCons` b
-
-blockSnoc :: Block n e O -> n O O -> Block n e O
-blockSnoc b n = case b of
-  BlockCO f b -> BlockCO f $! (b `blockSnoc` n)
-  BNil{}      -> BMiddle n
-  BMiddle{}   -> b `BSnoc` n
-  BCat{}      -> b `BSnoc` n
-  BSnoc{}     -> b `BSnoc` n
-  BCons{}     -> b `BSnoc` n
-
-blockJoinHead :: n C O -> Block n O x -> Block n C x
-blockJoinHead f (BlockOC b l) = BlockCC f b l
-blockJoinHead f b = BlockCO f BNil `cat` b
-
-blockJoinTail :: Block n e O -> n O C -> Block n e C
-blockJoinTail (BlockCO f b) t = BlockCC f b t
-blockJoinTail b t = b `cat` BlockOC BNil t
-
-blockJoin :: n C O -> Block n O O -> n O C -> Block n C C
-blockJoin f b t = BlockCC f b t
-
-blockAppend :: Block n e O -> Block n O x -> Block n e x
-blockAppend = cat
-
-
--- Taking apart
-
-firstNode :: Block n C x -> n C O
-firstNode (BlockCO n _)   = n
-firstNode (BlockCC n _ _) = n
-
-lastNode :: Block n x C -> n O C
-lastNode (BlockOC   _ n) = n
-lastNode (BlockCC _ _ n) = n
-
-blockSplitHead :: Block n C x -> (n C O, Block n O x)
-blockSplitHead (BlockCO n b)   = (n, b)
-blockSplitHead (BlockCC n b t) = (n, BlockOC b t)
-
-blockSplitTail :: Block n e C -> (Block n e O, n O C)
-blockSplitTail (BlockOC b n)   = (b, n)
-blockSplitTail (BlockCC f b t) = (BlockCO f b, t)
-
--- | Split a closed block into its entry node, open middle block, and
--- exit node.
-blockSplit :: Block n C C -> (n C O, Block n O O, n O C)
-blockSplit (BlockCC f b t) = (f, b, t)
-
-blockToList :: Block n O O -> [n O O]
-blockToList b = go b []
-   where go :: Block n O O -> [n O O] -> [n O O]
-         go BNil         r = r
-         go (BMiddle n)  r = n : r
-         go (BCat b1 b2) r = go b1 $! go b2 r
-         go (BSnoc b1 n) r = go b1 (n:r)
-         go (BCons n b1) r = n : go b1 r
-
-blockFromList :: [n O O] -> Block n O O
-blockFromList = foldr BCons BNil
-
--- Modifying
-
-replaceFirstNode :: Block n C x -> n C O -> Block n C x
-replaceFirstNode (BlockCO _ b)   f = BlockCO f b
-replaceFirstNode (BlockCC _ b n) f = BlockCC f b n
-
-replaceLastNode :: Block n x C -> n O C -> Block n x C
-replaceLastNode (BlockOC   b _) n = BlockOC b n
-replaceLastNode (BlockCC l b _) n = BlockCC l b n
-
--- -----------------------------------------------------------------------------
--- General concatenation
-
-cat :: Block n e O -> Block n O x -> Block n e x
-cat x y = case x of
-  BNil -> y
-
-  BlockCO l b1 -> case y of
-                   BlockOC b2 n -> (BlockCC l $! (b1 `cat` b2)) n
-                   BNil         -> x
-                   BMiddle _    -> BlockCO l $! (b1 `cat` y)
-                   BCat{}       -> BlockCO l $! (b1 `cat` y)
-                   BSnoc{}      -> BlockCO l $! (b1 `cat` y)
-                   BCons{}      -> BlockCO l $! (b1 `cat` y)
-
-  BMiddle n -> case y of
-                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
-                   BNil          -> x
-                   BMiddle{}     -> BCons n y
-                   BCat{}        -> BCons n y
-                   BSnoc{}       -> BCons n y
-                   BCons{}       -> BCons n y
-
-  BCat{} -> case y of
-                   BlockOC b3 n2 -> (BlockOC $! (x `cat` b3)) n2
-                   BNil          -> x
-                   BMiddle n     -> BSnoc x n
-                   BCat{}        -> BCat x y
-                   BSnoc{}       -> BCat x y
-                   BCons{}       -> BCat x y
-
-  BSnoc{} -> case y of
-                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
-                   BNil          -> x
-                   BMiddle n     -> BSnoc x n
-                   BCat{}        -> BCat x y
-                   BSnoc{}       -> BCat x y
-                   BCons{}       -> BCat x y
-
-
-  BCons{} -> case y of
-                   BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
-                   BNil          -> x
-                   BMiddle n     -> BSnoc x n
-                   BCat{}        -> BCat x y
-                   BSnoc{}       -> BCat x y
-                   BCons{}       -> BCat x y
-
-
--- -----------------------------------------------------------------------------
--- Mapping
-
--- | map a function over the nodes of a 'Block'
-mapBlock :: (forall e x. n e x -> n' e x) -> Block n e x -> Block n' e x
-mapBlock f (BlockCO n b  ) = BlockCO (f n) (mapBlock f b)
-mapBlock f (BlockOC   b n) = BlockOC       (mapBlock f b) (f n)
-mapBlock f (BlockCC n b m) = BlockCC (f n) (mapBlock f b) (f m)
-mapBlock _  BNil           = BNil
-mapBlock f (BMiddle n)     = BMiddle (f n)
-mapBlock f (BCat b1 b2)    = BCat    (mapBlock f b1) (mapBlock f b2)
-mapBlock f (BSnoc b n)     = BSnoc   (mapBlock f b)  (f n)
-mapBlock f (BCons n b)     = BCons   (f n)  (mapBlock f b)
-
--- | A strict 'mapBlock'
-mapBlock' :: (forall e x. n e x -> n' e x) -> (Block n e x -> Block n' e x)
-mapBlock' f = mapBlock3' (f, f, f)
-
--- | map over a block, with different functions to apply to first nodes,
--- middle nodes and last nodes respectively.  The map is strict.
---
-mapBlock3' :: forall n n' e x .
-             ( n C O -> n' C O
-             , n O O -> n' O O,
-               n O C -> n' O C)
-          -> Block n e x -> Block n' e x
-mapBlock3' (f, m, l) b = go b
-  where go :: forall e x . Block n e x -> Block n' e x
-        go (BlockOC b y)   = (BlockOC $! go b) $! l y
-        go (BlockCO x b)   = (BlockCO $! f x) $! (go b)
-        go (BlockCC x b y) = ((BlockCC $! f x) $! go b) $! (l y)
-        go BNil            = BNil
-        go (BMiddle n)     = BMiddle $! m n
-        go (BCat x y)      = (BCat $! go x) $! (go y)
-        go (BSnoc x n)     = (BSnoc $! go x) $! (m n)
-        go (BCons n x)     = (BCons $! m n) $! (go x)
-
--- -----------------------------------------------------------------------------
--- Folding
-
-
--- | Fold a function over every node in a block, forward or backward.
--- The fold function must be polymorphic in the shape of the nodes.
-foldBlockNodesF3 :: forall n a b c .
-                   ( n C O       -> a -> b
-                   , n O O       -> b -> b
-                   , n O C       -> b -> c)
-                 -> (forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b)
-foldBlockNodesF  :: forall n a .
-                    (forall e x . n e x       -> a -> a)
-                 -> (forall e x . Block n e x -> IndexedCO e a a -> IndexedCO x a a)
-foldBlockNodesB3 :: forall n a b c .
-                   ( n C O       -> b -> c
-                   , n O O       -> b -> b
-                   , n O C       -> a -> b)
-                 -> (forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b)
-foldBlockNodesB  :: forall n a .
-                    (forall e x . n e x       -> a -> a)
-                 -> (forall e x . Block n e x -> IndexedCO x a a -> IndexedCO e a a)
-
-foldBlockNodesF3 (ff, fm, fl) = block
-  where block :: forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b
-        block (BlockCO f b  )   = ff f `cat` block b
-        block (BlockCC f b l)   = ff f `cat` block b `cat` fl l
-        block (BlockOC   b l)   =            block b `cat` fl l
-        block BNil              = id
-        block (BMiddle node)    = fm node
-        block (b1 `BCat`    b2) = block b1 `cat` block b2
-        block (b1 `BSnoc` n)    = block b1 `cat` fm n
-        block (n `BCons` b2)    = fm n `cat` block b2
-        cat :: forall a b c. (a -> b) -> (b -> c) -> a -> c
-        cat f f' = f' . f
-
-foldBlockNodesF f = foldBlockNodesF3 (f, f, f)
-
-foldBlockNodesB3 (ff, fm, fl) = block
-  where block :: forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b
-        block (BlockCO f b  )   = ff f `cat` block b
-        block (BlockCC f b l)   = ff f `cat` block b `cat` fl l
-        block (BlockOC   b l)   =            block b `cat` fl l
-        block BNil              = id
-        block (BMiddle node)    = fm node
-        block (b1 `BCat`    b2) = block b1 `cat` block b2
-        block (b1 `BSnoc` n)    = block b1 `cat` fm n
-        block (n `BCons` b2)    = fm n `cat` block b2
-        cat :: forall a b c. (b -> c) -> (a -> b) -> a -> c
-        cat f f' = f . f'
-
-foldBlockNodesB f = foldBlockNodesB3 (f, f, f)
-
diff --git a/compiler/cmm/Hoopl/Collections.hs b/compiler/cmm/Hoopl/Collections.hs
deleted file mode 100644
index 4c5516be79..0000000000
--- a/compiler/cmm/Hoopl/Collections.hs
+++ /dev/null
@@ -1,177 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE DeriveTraversable #-}
-{-# LANGUAGE DeriveFoldable #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-
-module Hoopl.Collections
-    ( IsSet(..)
-    , setInsertList, setDeleteList, setUnions
-    , IsMap(..)
-    , mapInsertList, mapDeleteList, mapUnions
-    , UniqueMap, UniqueSet
-    ) where
-
-import GhcPrelude
-
-import qualified Data.IntMap.Strict as M
-import qualified Data.IntSet as S
-
-import Data.List (foldl1')
-
-class IsSet set where
-  type ElemOf set
-
-  setNull :: set -> Bool
-  setSize :: set -> Int
-  setMember :: ElemOf set -> set -> Bool
-
-  setEmpty :: set
-  setSingleton :: ElemOf set -> set
-  setInsert :: ElemOf set -> set -> set
-  setDelete :: ElemOf set -> set -> set
-
-  setUnion :: set -> set -> set
-  setDifference :: set -> set -> set
-  setIntersection :: set -> set -> set
-  setIsSubsetOf :: set -> set -> Bool
-  setFilter :: (ElemOf set -> Bool) -> set -> set
-
-  setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b
-  setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b
-
-  setElems :: set -> [ElemOf set]
-  setFromList :: [ElemOf set] -> set
-
--- Helper functions for IsSet class
-setInsertList :: IsSet set => [ElemOf set] -> set -> set
-setInsertList keys set = foldl' (flip setInsert) set keys
-
-setDeleteList :: IsSet set => [ElemOf set] -> set -> set
-setDeleteList keys set = foldl' (flip setDelete) set keys
-
-setUnions :: IsSet set => [set] -> set
-setUnions [] = setEmpty
-setUnions sets = foldl1' setUnion sets
-
-
-class IsMap map where
-  type KeyOf map
-
-  mapNull :: map a -> Bool
-  mapSize :: map a -> Int
-  mapMember :: KeyOf map -> map a -> Bool
-  mapLookup :: KeyOf map -> map a -> Maybe a
-  mapFindWithDefault :: a -> KeyOf map -> map a -> a
-
-  mapEmpty :: map a
-  mapSingleton :: KeyOf map -> a -> map a
-  mapInsert :: KeyOf map -> a -> map a -> map a
-  mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a
-  mapDelete :: KeyOf map -> map a -> map a
-  mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a
-  mapAdjust :: (a -> a) -> KeyOf map -> map a -> map a
-
-  mapUnion :: map a -> map a -> map a
-  mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a
-  mapDifference :: map a -> map a -> map a
-  mapIntersection :: map a -> map a -> map a
-  mapIsSubmapOf :: Eq a => map a -> map a -> Bool
-
-  mapMap :: (a -> b) -> map a -> map b
-  mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b
-  mapFoldl :: (b -> a -> b) -> b -> map a -> b
-  mapFoldr :: (a -> b -> b) -> b -> map a -> b
-  mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b
-  mapFoldMapWithKey :: Monoid m => (KeyOf map -> a -> m) -> map a -> m
-  mapFilter :: (a -> Bool) -> map a -> map a
-  mapFilterWithKey :: (KeyOf map -> a -> Bool) -> map a -> map a
-
-
-  mapElems :: map a -> [a]
-  mapKeys :: map a -> [KeyOf map]
-  mapToList :: map a -> [(KeyOf map, a)]
-  mapFromList :: [(KeyOf map, a)] -> map a
-  mapFromListWith :: (a -> a -> a) -> [(KeyOf map,a)] -> map a
-
--- Helper functions for IsMap class
-mapInsertList :: IsMap map => [(KeyOf map, a)] -> map a -> map a
-mapInsertList assocs map = foldl' (flip (uncurry mapInsert)) map assocs
-
-mapDeleteList :: IsMap map => [KeyOf map] -> map a -> map a
-mapDeleteList keys map = foldl' (flip mapDelete) map keys
-
-mapUnions :: IsMap map => [map a] -> map a
-mapUnions [] = mapEmpty
-mapUnions maps = foldl1' mapUnion maps
-
------------------------------------------------------------------------------
--- Basic instances
------------------------------------------------------------------------------
-
-newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show, Semigroup, Monoid)
-
-instance IsSet UniqueSet where
-  type ElemOf UniqueSet = Int
-
-  setNull (US s) = S.null s
-  setSize (US s) = S.size s
-  setMember k (US s) = S.member k s
-
-  setEmpty = US S.empty
-  setSingleton k = US (S.singleton k)
-  setInsert k (US s) = US (S.insert k s)
-  setDelete k (US s) = US (S.delete k s)
-
-  setUnion (US x) (US y) = US (S.union x y)
-  setDifference (US x) (US y) = US (S.difference x y)
-  setIntersection (US x) (US y) = US (S.intersection x y)
-  setIsSubsetOf (US x) (US y) = S.isSubsetOf x y
-  setFilter f (US s) = US (S.filter f s)
-
-  setFoldl k z (US s) = S.foldl' k z s
-  setFoldr k z (US s) = S.foldr k z s
-
-  setElems (US s) = S.elems s
-  setFromList ks = US (S.fromList ks)
-
-newtype UniqueMap v = UM (M.IntMap v)
-  deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
-
-instance IsMap UniqueMap where
-  type KeyOf UniqueMap = Int
-
-  mapNull (UM m) = M.null m
-  mapSize (UM m) = M.size m
-  mapMember k (UM m) = M.member k m
-  mapLookup k (UM m) = M.lookup k m
-  mapFindWithDefault def k (UM m) = M.findWithDefault def k m
-
-  mapEmpty = UM M.empty
-  mapSingleton k v = UM (M.singleton k v)
-  mapInsert k v (UM m) = UM (M.insert k v m)
-  mapInsertWith f k v (UM m) = UM (M.insertWith f k v m)
-  mapDelete k (UM m) = UM (M.delete k m)
-  mapAlter f k (UM m) = UM (M.alter f k m)
-  mapAdjust f k (UM m) = UM (M.adjust f k m)
-
-  mapUnion (UM x) (UM y) = UM (M.union x y)
-  mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y)
-  mapDifference (UM x) (UM y) = UM (M.difference x y)
-  mapIntersection (UM x) (UM y) = UM (M.intersection x y)
-  mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y
-
-  mapMap f (UM m) = UM (M.map f m)
-  mapMapWithKey f (UM m) = UM (M.mapWithKey f m)
-  mapFoldl k z (UM m) = M.foldl' k z m
-  mapFoldr k z (UM m) = M.foldr k z m
-  mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m
-  mapFoldMapWithKey f (UM m) = M.foldMapWithKey f m
-  mapFilter f (UM m) = UM (M.filter f m)
-  mapFilterWithKey f (UM m) = UM (M.filterWithKey f m)
-
-  mapElems (UM m) = M.elems m
-  mapKeys (UM m) = M.keys m
-  mapToList (UM m) = M.toList m
-  mapFromList assocs = UM (M.fromList assocs)
-  mapFromListWith f assocs = UM (M.fromListWith f assocs)
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
deleted file mode 100644
index 9762a84e20..0000000000
--- a/compiler/cmm/Hoopl/Dataflow.hs
+++ /dev/null
@@ -1,441 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE MultiParamTypeClasses  #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeFamilies #-}
-
---
--- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones,
--- and Norman Ramsey
---
--- Modifications copyright (c) The University of Glasgow 2012
---
--- This module is a specialised and optimised version of
--- Compiler.Hoopl.Dataflow in the hoopl package.  In particular it is
--- specialised to the UniqSM monad.
---
-
-module Hoopl.Dataflow
-  ( C, O, Block
-  , lastNode, entryLabel
-  , foldNodesBwdOO
-  , foldRewriteNodesBwdOO
-  , DataflowLattice(..), OldFact(..), NewFact(..), JoinedFact(..)
-  , TransferFun, RewriteFun
-  , Fact, FactBase
-  , getFact, mkFactBase
-  , analyzeCmmFwd, analyzeCmmBwd
-  , rewriteCmmBwd
-  , changedIf
-  , joinOutFacts
-  , joinFacts
-  )
-where
-
-import GhcPrelude
-
-import Cmm
-import UniqSupply
-
-import Data.Array
-import Data.Maybe
-import Data.IntSet (IntSet)
-import qualified Data.IntSet as IntSet
-
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Collections
-import Hoopl.Label
-
-type family   Fact (x :: Extensibility) f :: *
-type instance Fact C f = FactBase f
-type instance Fact O f = f
-
-newtype OldFact a = OldFact a
-
-newtype NewFact a = NewFact a
-
--- | The result of joining OldFact and NewFact.
-data JoinedFact a
-    = Changed !a     -- ^ Result is different than OldFact.
-    | NotChanged !a  -- ^ Result is the same as OldFact.
-
-getJoined :: JoinedFact a -> a
-getJoined (Changed a) = a
-getJoined (NotChanged a) = a
-
-changedIf :: Bool -> a -> JoinedFact a
-changedIf True = Changed
-changedIf False = NotChanged
-
-type JoinFun a = OldFact a -> NewFact a -> JoinedFact a
-
-data DataflowLattice a = DataflowLattice
-    { fact_bot :: a
-    , fact_join :: JoinFun a
-    }
-
-data Direction = Fwd | Bwd
-
-type TransferFun f = CmmBlock -> FactBase f -> FactBase f
-
--- | Function for rewrtiting and analysis combined. To be used with
--- @rewriteCmm@.
---
--- Currently set to work with @UniqSM@ monad, but we could probably abstract
--- that away (if we do that, we might want to specialize the fixpoint algorithms
--- to the particular monads through SPECIALIZE).
-type RewriteFun f = CmmBlock -> FactBase f -> UniqSM (CmmBlock, FactBase f)
-
-analyzeCmmBwd, analyzeCmmFwd
-    :: DataflowLattice f
-    -> TransferFun f
-    -> CmmGraph
-    -> FactBase f
-    -> FactBase f
-analyzeCmmBwd = analyzeCmm Bwd
-analyzeCmmFwd = analyzeCmm Fwd
-
-analyzeCmm
-    :: Direction
-    -> DataflowLattice f
-    -> TransferFun f
-    -> CmmGraph
-    -> FactBase f
-    -> FactBase f
-analyzeCmm dir lattice transfer cmmGraph initFact =
-    {-# SCC analyzeCmm #-}
-    let entry = g_entry cmmGraph
-        hooplGraph = g_graph cmmGraph
-        blockMap =
-            case hooplGraph of
-                GMany NothingO bm NothingO -> bm
-    in fixpointAnalysis dir lattice transfer entry blockMap initFact
-
--- Fixpoint algorithm.
-fixpointAnalysis
-    :: forall f.
-       Direction
-    -> DataflowLattice f
-    -> TransferFun f
-    -> Label
-    -> LabelMap CmmBlock
-    -> FactBase f
-    -> FactBase f
-fixpointAnalysis direction lattice do_block entry blockmap = loop start
-  where
-    -- Sorting the blocks helps to minimize the number of times we need to
-    -- process blocks. For instance, for forward analysis we want to look at
-    -- blocks in reverse postorder. Also, see comments for sortBlocks.
-    blocks     = sortBlocks direction entry blockmap
-    num_blocks = length blocks
-    block_arr  = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
-    start      = {-# SCC "start" #-} IntSet.fromDistinctAscList
-      [0 .. num_blocks - 1]
-    dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
-    join       = fact_join lattice
-
-    loop
-        :: IntHeap     -- ^ Worklist, i.e., blocks to process
-        -> FactBase f  -- ^ Current result (increases monotonically)
-        -> FactBase f
-    loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo =
-        let block = block_arr ! index
-            out_facts = {-# SCC "do_block" #-} do_block block fbase1
-            -- For each of the outgoing edges, we join it with the current
-            -- information in fbase1 and (if something changed) we update it
-            -- and add the affected blocks to the worklist.
-            (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-}
-                mapFoldlWithKey
-                    (updateFact join dep_blocks) (todo1, fbase1) out_facts
-        in loop todo2 fbase2
-    loop _ !fbase1 = fbase1
-
-rewriteCmmBwd
-    :: DataflowLattice f
-    -> RewriteFun f
-    -> CmmGraph
-    -> FactBase f
-    -> UniqSM (CmmGraph, FactBase f)
-rewriteCmmBwd = rewriteCmm Bwd
-
-rewriteCmm
-    :: Direction
-    -> DataflowLattice f
-    -> RewriteFun f
-    -> CmmGraph
-    -> FactBase f
-    -> UniqSM (CmmGraph, FactBase f)
-rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do
-    let entry = g_entry cmmGraph
-        hooplGraph = g_graph cmmGraph
-        blockMap1 =
-            case hooplGraph of
-                GMany NothingO bm NothingO -> bm
-    (blockMap2, facts) <-
-        fixpointRewrite dir lattice rwFun entry blockMap1 initFact
-    return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts)
-
-fixpointRewrite
-    :: forall f.
-       Direction
-    -> DataflowLattice f
-    -> RewriteFun f
-    -> Label
-    -> LabelMap CmmBlock
-    -> FactBase f
-    -> UniqSM (LabelMap CmmBlock, FactBase f)
-fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap
-  where
-    -- Sorting the blocks helps to minimize the number of times we need to
-    -- process blocks. For instance, for forward analysis we want to look at
-    -- blocks in reverse postorder. Also, see comments for sortBlocks.
-    blocks     = sortBlocks dir entry blockmap
-    num_blocks = length blocks
-    block_arr  = {-# SCC "block_arr_rewrite" #-}
-                 listArray (0, num_blocks - 1) blocks
-    start      = {-# SCC "start_rewrite" #-}
-                 IntSet.fromDistinctAscList [0 .. num_blocks - 1]
-    dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks
-    join       = fact_join lattice
-
-    loop
-        :: IntHeap            -- ^ Worklist, i.e., blocks to process
-        -> LabelMap CmmBlock  -- ^ Rewritten blocks.
-        -> FactBase f         -- ^ Current facts.
-        -> UniqSM (LabelMap CmmBlock, FactBase f)
-    loop todo !blocks1 !fbase1
-      | Just (index, todo1) <- IntSet.minView todo = do
-        -- Note that we use the *original* block here. This is important.
-        -- We're optimistically rewriting blocks even before reaching the fixed
-        -- point, which means that the rewrite might be incorrect. So if the
-        -- facts change, we need to rewrite the original block again (taking
-        -- into account the new facts).
-        let block = block_arr ! index
-        (new_block, out_facts) <- {-# SCC "do_block_rewrite" #-}
-            do_block block fbase1
-        let blocks2 = mapInsert (entryLabel new_block) new_block blocks1
-            (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-}
-                mapFoldlWithKey
-                    (updateFact join dep_blocks) (todo1, fbase1) out_facts
-        loop todo2 blocks2 fbase2
-    loop _ !blocks1 !fbase1 = return (blocks1, fbase1)
-
-
-{-
-Note [Unreachable blocks]
-~~~~~~~~~~~~~~~~~~~~~~~~~
-A block that is not in the domain of tfb_fbase is "currently unreachable".
-A currently-unreachable block is not even analyzed.  Reason: consider
-constant prop and this graph, with entry point L1:
-  L1: x:=3; goto L4
-  L2: x:=4; goto L4
-  L4: if x>3 goto L2 else goto L5
-Here L2 is actually unreachable, but if we process it with bottom input fact,
-we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
-
-* If a currently-unreachable block is not analyzed, then its rewritten
-  graph will not be accumulated in tfb_rg.  And that is good:
-  unreachable blocks simply do not appear in the output.
-
-* Note that clients must be careful to provide a fact (even if bottom)
-  for each entry point. Otherwise useful blocks may be garbage collected.
-
-* Note that updateFact must set the change-flag if a label goes from
-  not-in-fbase to in-fbase, even if its fact is bottom.  In effect the
-  real fact lattice is
-       UNR
-       bottom
-       the points above bottom
-
-* Even if the fact is going from UNR to bottom, we still call the
-  client's fact_join function because it might give the client
-  some useful debugging information.
-
-* All of this only applies for *forward* ixpoints.  For the backward
-  case we must treat every block as reachable; it might finish with a
-  'return', and therefore have no successors, for example.
--}
-
-
------------------------------------------------------------------------------
---  Pieces that are shared by fixpoint and fixpoint_anal
------------------------------------------------------------------------------
-
--- | Sort the blocks into the right order for analysis. This means reverse
--- postorder for a forward analysis. For the backward one, we simply reverse
--- that (see Note [Backward vs forward analysis]).
-sortBlocks
-    :: NonLocal n
-    => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C]
-sortBlocks direction entry blockmap =
-    case direction of
-        Fwd -> fwd
-        Bwd -> reverse fwd
-  where
-    fwd = revPostorderFrom blockmap entry
-
--- Note [Backward vs forward analysis]
---
--- The forward and backward cases are not dual.  In the forward case, the entry
--- points are known, and one simply traverses the body blocks from those points.
--- In the backward case, something is known about the exit points, but a
--- backward analysis must also include reachable blocks that don't reach the
--- exit, as in a procedure that loops forever and has side effects.)
--- For instance, let E be the entry and X the exit blocks (arrows indicate
--- control flow)
---   E -> X
---   E -> B
---   B -> C
---   C -> B
--- We do need to include B and C even though they're unreachable in the
--- *reverse* graph (that we could use for backward analysis):
---   E <- X
---   E <- B
---   B <- C
---   C <- B
--- So when sorting the blocks for the backward analysis, we simply take the
--- reverse of what is used for the forward one.
-
-
--- | Construct a mapping from a @Label@ to the block indexes that should be
--- re-analyzed if the facts at that @Label@ change.
---
--- Note that we're considering here the entry point of the block, so if the
--- facts change at the entry:
--- * for a backward analysis we need to re-analyze all the predecessors, but
--- * for a forward analysis, we only need to re-analyze the current block
---   (and that will in turn propagate facts into its successors).
-mkDepBlocks :: Direction -> [CmmBlock] -> LabelMap IntSet
-mkDepBlocks Fwd blocks = go blocks 0 mapEmpty
-  where
-    go []     !_ !dep_map = dep_map
-    go (b:bs) !n !dep_map =
-        go bs (n + 1) $ mapInsert (entryLabel b) (IntSet.singleton n) dep_map
-mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
-  where
-    go []     !_ !dep_map = dep_map
-    go (b:bs) !n !dep_map =
-        let insert m l = mapInsertWith IntSet.union l (IntSet.singleton n) m
-        in go bs (n + 1) $ foldl' insert dep_map (successors b)
-
--- | After some new facts have been generated by analysing a block, we
--- fold this function over them to generate (a) a list of block
--- indices to (re-)analyse, and (b) the new FactBase.
-updateFact
-    :: JoinFun f
-    -> LabelMap IntSet
-    -> (IntHeap, FactBase f)
-    -> Label
-    -> f -- out fact
-    -> (IntHeap, FactBase f)
-updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
-  = case lookupFact lbl fbase of
-      Nothing ->
-          -- Note [No old fact]
-          let !z = mapInsert lbl new_fact fbase in (changed, z)
-      Just old_fact ->
-          case fact_join (OldFact old_fact) (NewFact new_fact) of
-              (NotChanged _) -> (todo, fbase)
-              (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z)
-  where
-    changed = todo `IntSet.union`
-              mapFindWithDefault IntSet.empty lbl dep_blocks
-
-{-
-Note [No old fact]
-
-We know that the new_fact is >= _|_, so we don't need to join.  However,
-if the new fact is also _|_, and we have already analysed its block,
-we don't need to record a change.  So there's a tradeoff here.  It turns
-out that always recording a change is faster.
--}
-
-----------------------------------------------------------------
---       Utilities
-----------------------------------------------------------------
-
--- Fact lookup: the fact `orelse` bottom
-getFact  :: DataflowLattice f -> Label -> FactBase f -> f
-getFact lat l fb = case lookupFact l fb of Just  f -> f
-                                           Nothing -> fact_bot lat
-
--- | Returns the result of joining the facts from all the successors of the
--- provided node or block.
-joinOutFacts :: (NonLocal n) => DataflowLattice f -> n e C -> FactBase f -> f
-joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts
-  where
-    join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
-    facts =
-        [ fromJust fact
-        | s <- successors nonLocal
-        , let fact = lookupFact s fact_base
-        , isJust fact
-        ]
-
-joinFacts :: DataflowLattice f -> [f] -> f
-joinFacts lattice facts  = foldl' join (fact_bot lattice) facts
-  where
-    join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
-
--- | Returns the joined facts for each label.
-mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f
-mkFactBase lattice = foldl' add mapEmpty
-  where
-    join = fact_join lattice
-
-    add result (l, f1) =
-        let !newFact =
-                case mapLookup l result of
-                    Nothing -> f1
-                    Just f2 -> getJoined $ join (OldFact f1) (NewFact f2)
-        in mapInsert l newFact result
-
--- | Folds backward over all nodes of an open-open block.
--- Strict in the accumulator.
-foldNodesBwdOO :: (CmmNode O O -> f -> f) -> Block CmmNode O O -> f -> f
-foldNodesBwdOO funOO = go
-  where
-    go (BCat b1 b2) f = go b1 $! go b2 f
-    go (BSnoc h n) f = go h $! funOO n f
-    go (BCons n t) f = funOO n $! go t f
-    go (BMiddle n) f = funOO n f
-    go BNil f = f
-{-# INLINABLE foldNodesBwdOO #-}
-
--- | Folds backward over all the nodes of an open-open block and allows
--- rewriting them. The accumulator is both the block of nodes and @f@ (usually
--- dataflow facts).
--- Strict in both accumulated parts.
-foldRewriteNodesBwdOO
-    :: forall f.
-       (CmmNode O O -> f -> UniqSM (Block CmmNode O O, f))
-    -> Block CmmNode O O
-    -> f
-    -> UniqSM (Block CmmNode O O, f)
-foldRewriteNodesBwdOO rewriteOO initBlock initFacts = go initBlock initFacts
-  where
-    go (BCons node1 block1) !fact1 = (rewriteOO node1 `comp` go block1) fact1
-    go (BSnoc block1 node1) !fact1 = (go block1 `comp` rewriteOO node1) fact1
-    go (BCat blockA1 blockB1) !fact1 = (go blockA1 `comp` go blockB1) fact1
-    go (BMiddle node) !fact1 = rewriteOO node fact1
-    go BNil !fact = return (BNil, fact)
-
-    comp rew1 rew2 = \f1 -> do
-        (b, f2) <- rew2 f1
-        (a, !f3) <- rew1 f2
-        let !c = joinBlocksOO a b
-        return (c, f3)
-    {-# INLINE comp #-}
-{-# INLINABLE foldRewriteNodesBwdOO #-}
-
-joinBlocksOO :: Block n O O -> Block n O O -> Block n O O
-joinBlocksOO BNil b = b
-joinBlocksOO b BNil = b
-joinBlocksOO (BMiddle n) b = blockCons n b
-joinBlocksOO b (BMiddle n) = blockSnoc b n
-joinBlocksOO b1 b2 = BCat b1 b2
-
-type IntHeap = IntSet
diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs
deleted file mode 100644
index 992becb417..0000000000
--- a/compiler/cmm/Hoopl/Graph.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeFamilies #-}
-module Hoopl.Graph
-    ( Body
-    , Graph
-    , Graph'(..)
-    , NonLocal(..)
-    , addBlock
-    , bodyList
-    , emptyBody
-    , labelsDefined
-    , mapGraph
-    , mapGraphBlocks
-    , revPostorderFrom
-    ) where
-
-
-import GhcPrelude
-import Util
-
-import Hoopl.Label
-import Hoopl.Block
-import Hoopl.Collections
-
--- | A (possibly empty) collection of closed/closed blocks
-type Body n = LabelMap (Block n C C)
-
--- | @Body@ abstracted over @block@
-type Body' block (n :: Extensibility -> Extensibility -> *) = LabelMap (block n C C)
-
--------------------------------
--- | Gives access to the anchor points for
--- nonlocal edges as well as the edges themselves
-class NonLocal thing where
-  entryLabel :: thing C x -> Label   -- ^ The label of a first node or block
-  successors :: thing e C -> [Label] -- ^ Gives control-flow successors
-
-instance NonLocal n => NonLocal (Block n) where
-  entryLabel (BlockCO f _)   = entryLabel f
-  entryLabel (BlockCC f _ _) = entryLabel f
-
-  successors (BlockOC   _ n) = successors n
-  successors (BlockCC _ _ n) = successors n
-
-
-emptyBody :: Body' block n
-emptyBody = mapEmpty
-
-bodyList :: Body' block n -> [(Label,block n C C)]
-bodyList body = mapToList body
-
-addBlock
-    :: (NonLocal block, HasDebugCallStack)
-    => block C C -> LabelMap (block C C) -> LabelMap (block C C)
-addBlock block body = mapAlter add lbl body
-  where
-    lbl = entryLabel block
-    add Nothing = Just block
-    add _ = error $ "duplicate label " ++ show lbl ++ " in graph"
-
-
--- ---------------------------------------------------------------------------
--- Graph
-
--- | A control-flow graph, which may take any of four shapes (O/O,
--- O/C, C/O, C/C).  A graph open at the entry has a single,
--- distinguished, anonymous entry point; if a graph is closed at the
--- entry, its entry point(s) are supplied by a context.
-type Graph = Graph' Block
-
--- | @Graph'@ is abstracted over the block type, so that we can build
--- graphs of annotated blocks for example (Compiler.Hoopl.Dataflow
--- needs this).
-data Graph' block (n :: Extensibility -> Extensibility -> *) e x where
-  GNil  :: Graph' block n O O
-  GUnit :: block n O O -> Graph' block n O O
-  GMany :: MaybeO e (block n O C)
-        -> Body' block n
-        -> MaybeO x (block n C O)
-        -> Graph' block n e x
-
-
--- -----------------------------------------------------------------------------
--- Mapping over graphs
-
--- | Maps over all nodes in a graph.
-mapGraph :: (forall e x. n e x -> n' e x) -> Graph n e x -> Graph n' e x
-mapGraph f = mapGraphBlocks (mapBlock f)
-
--- | Function 'mapGraphBlocks' enables a change of representation of blocks,
--- nodes, or both.  It lifts a polymorphic block transform into a polymorphic
--- graph transform.  When the block representation stabilizes, a similar
--- function should be provided for blocks.
-mapGraphBlocks :: forall block n block' n' e x .
-                  (forall e x . block n e x -> block' n' e x)
-               -> (Graph' block n e x -> Graph' block' n' e x)
-
-mapGraphBlocks f = map
-  where map :: Graph' block n e x -> Graph' block' n' e x
-        map GNil = GNil
-        map (GUnit b) = GUnit (f b)
-        map (GMany e b x) = GMany (fmap f e) (mapMap f b) (fmap f x)
-
--- -----------------------------------------------------------------------------
--- Extracting Labels from graphs
-
-labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x
-              -> LabelSet
-labelsDefined GNil      = setEmpty
-labelsDefined (GUnit{}) = setEmpty
-labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body
-  where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet
-        addEntry labels label _ = setInsert label labels
-        exitLabel :: MaybeO x (block n C O) -> LabelSet
-        exitLabel NothingO  = setEmpty
-        exitLabel (JustO b) = setSingleton (entryLabel b)
-
-
-----------------------------------------------------------------
-
--- | Returns a list of blocks reachable from the provided Labels in the reverse
--- postorder.
---
--- This is the most important traversal over this data structure.  It drops
--- unreachable code and puts blocks in an order that is good for solving forward
--- dataflow problems quickly.  The reverse order is good for solving backward
--- dataflow problems quickly.  The forward order is also reasonably good for
--- emitting instructions, except that it will not usually exploit Forrest
--- Baskett's trick of eliminating the unconditional branch from a loop.  For
--- that you would need a more serious analysis, probably based on dominators, to
--- identify loop headers.
---
--- For forward analyses we want reverse postorder visitation, consider:
--- @
---      A -> [B,C]
---      B -> D
---      C -> D
--- @
--- Postorder: [D, C, B, A] (or [D, B, C, A])
--- Reverse postorder: [A, B, C, D] (or [A, C, B, D])
--- This matters for, e.g., forward analysis, because we want to analyze *both*
--- B and C before we analyze D.
-revPostorderFrom
-  :: forall block.  (NonLocal block)
-  => LabelMap (block C C) -> Label -> [block C C]
-revPostorderFrom graph start = go start_worklist setEmpty []
-  where
-    start_worklist = lookup_for_descend start Nil
-
-    -- To compute the postorder we need to "visit" a block (mark as done)
-    -- *after* visiting all its successors. So we need to know whether we
-    -- already processed all successors of each block (and @NonLocal@ allows
-    -- arbitrary many successors). So we use an explicit stack with an extra bit
-    -- of information:
-    -- * @ConsTodo@ means to explore the block if it wasn't visited before
-    -- * @ConsMark@ means that all successors were already done and we can add
-    --   the block to the result.
-    --
-    -- NOTE: We add blocks to the result list in postorder, but we *prepend*
-    -- them (i.e., we use @(:)@), which means that the final list is in reverse
-    -- postorder.
-    go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C]
-    go Nil                      !_           !result = result
-    go (ConsMark block rest)    !wip_or_done !result =
-        go rest wip_or_done (block : result)
-    go (ConsTodo block rest)    !wip_or_done !result
-        | entryLabel block `setMember` wip_or_done = go rest wip_or_done result
-        | otherwise =
-            let new_worklist =
-                    foldr lookup_for_descend
-                          (ConsMark block rest)
-                          (successors block)
-            in go new_worklist (setInsert (entryLabel block) wip_or_done) result
-
-    lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C)
-    lookup_for_descend label wl
-      | Just b <- mapLookup label graph = ConsTodo b wl
-      | otherwise =
-           error $ "Label that doesn't have a block?! " ++ show label
-
-data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil
diff --git a/compiler/cmm/Hoopl/Label.hs b/compiler/cmm/Hoopl/Label.hs
deleted file mode 100644
index 2e75d97244..0000000000
--- a/compiler/cmm/Hoopl/Label.hs
+++ /dev/null
@@ -1,142 +0,0 @@
-{-# LANGUAGE DeriveFoldable #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE DeriveTraversable #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-
-module Hoopl.Label
-    ( Label
-    , LabelMap
-    , LabelSet
-    , FactBase
-    , lookupFact
-    , mkHooplLabel
-    ) where
-
-import GhcPrelude
-
-import Outputable
-
--- TODO: This should really just use GHC's Unique and Uniq{Set,FM}
-import Hoopl.Collections
-
-import Unique (Uniquable(..))
-import TrieMap
-
-
------------------------------------------------------------------------------
---              Label
------------------------------------------------------------------------------
-
-newtype Label = Label { lblToUnique :: Int }
-  deriving (Eq, Ord)
-
-mkHooplLabel :: Int -> Label
-mkHooplLabel = Label
-
-instance Show Label where
-  show (Label n) = "L" ++ show n
-
-instance Uniquable Label where
-  getUnique label = getUnique (lblToUnique label)
-
-instance Outputable Label where
-  ppr label = ppr (getUnique label)
-
------------------------------------------------------------------------------
--- LabelSet
-
-newtype LabelSet = LS UniqueSet deriving (Eq, Ord, Show, Monoid, Semigroup)
-
-instance IsSet LabelSet where
-  type ElemOf LabelSet = Label
-
-  setNull (LS s) = setNull s
-  setSize (LS s) = setSize s
-  setMember (Label k) (LS s) = setMember k s
-
-  setEmpty = LS setEmpty
-  setSingleton (Label k) = LS (setSingleton k)
-  setInsert (Label k) (LS s) = LS (setInsert k s)
-  setDelete (Label k) (LS s) = LS (setDelete k s)
-
-  setUnion (LS x) (LS y) = LS (setUnion x y)
-  setDifference (LS x) (LS y) = LS (setDifference x y)
-  setIntersection (LS x) (LS y) = LS (setIntersection x y)
-  setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y
-  setFilter f (LS s) = LS (setFilter (f . mkHooplLabel) s)
-  setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s
-  setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s
-
-  setElems (LS s) = map mkHooplLabel (setElems s)
-  setFromList ks = LS (setFromList (map lblToUnique ks))
-
------------------------------------------------------------------------------
--- LabelMap
-
-newtype LabelMap v = LM (UniqueMap v)
-  deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
-
-instance IsMap LabelMap where
-  type KeyOf LabelMap = Label
-
-  mapNull (LM m) = mapNull m
-  mapSize (LM m) = mapSize m
-  mapMember (Label k) (LM m) = mapMember k m
-  mapLookup (Label k) (LM m) = mapLookup k m
-  mapFindWithDefault def (Label k) (LM m) = mapFindWithDefault def k m
-
-  mapEmpty = LM mapEmpty
-  mapSingleton (Label k) v = LM (mapSingleton k v)
-  mapInsert (Label k) v (LM m) = LM (mapInsert k v m)
-  mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m)
-  mapDelete (Label k) (LM m) = LM (mapDelete k m)
-  mapAlter f (Label k) (LM m) = LM (mapAlter f k m)
-  mapAdjust f (Label k) (LM m) = LM (mapAdjust f k m)
-
-  mapUnion (LM x) (LM y) = LM (mapUnion x y)
-  mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y)
-  mapDifference (LM x) (LM y) = LM (mapDifference x y)
-  mapIntersection (LM x) (LM y) = LM (mapIntersection x y)
-  mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y
-
-  mapMap f (LM m) = LM (mapMap f m)
-  mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m)
-  mapFoldl k z (LM m) = mapFoldl k z m
-  mapFoldr k z (LM m) = mapFoldr k z m
-  mapFoldlWithKey k z (LM m) =
-      mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m
-  mapFoldMapWithKey f (LM m) = mapFoldMapWithKey (\k v -> f (mkHooplLabel k) v) m
-  mapFilter f (LM m) = LM (mapFilter f m)
-  mapFilterWithKey f (LM m) = LM (mapFilterWithKey (f . mkHooplLabel) m)
-
-  mapElems (LM m) = mapElems m
-  mapKeys (LM m) = map mkHooplLabel (mapKeys m)
-  mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m]
-  mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs])
-  mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs])
-
------------------------------------------------------------------------------
--- Instances
-
-instance Outputable LabelSet where
-  ppr = ppr . setElems
-
-instance Outputable a => Outputable (LabelMap a) where
-  ppr = ppr . mapToList
-
-instance TrieMap LabelMap where
-  type Key LabelMap = Label
-  emptyTM = mapEmpty
-  lookupTM k m = mapLookup k m
-  alterTM k f m = mapAlter f k m
-  foldTM k m z = mapFoldr k z m
-  mapTM f m = mapMap f m
-
------------------------------------------------------------------------------
--- FactBase
-
-type FactBase f = LabelMap f
-
-lookupFact :: Label -> FactBase f -> Maybe f
-lookupFact = mapLookup
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
deleted file mode 100644
index c6e62435a2..0000000000
--- a/compiler/cmm/MkGraph.hs
+++ /dev/null
@@ -1,484 +0,0 @@
-{-# LANGUAGE BangPatterns, GADTs #-}
-
-module MkGraph
-  ( CmmAGraph, CmmAGraphScoped, CgStmt(..)
-  , (<*>), catAGraphs
-  , mkLabel, mkMiddle, mkLast, outOfLine
-  , lgraphOfAGraph, labelAGraph
-
-  , stackStubExpr
-  , mkNop, mkAssign, mkStore
-  , mkUnsafeCall, mkFinalCall, mkCallReturnsTo
-  , mkJumpReturnsTo
-  , mkJump, mkJumpExtra
-  , mkRawJump
-  , mkCbranch, mkSwitch
-  , mkReturn, mkComment, mkCallEntry, mkBranch
-  , mkUnwind
-  , copyInOflow, copyOutOflow
-  , noExtraStack
-  , toCall, Transfer(..)
-  )
-where
-
-import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>)
-
-import BlockId
-import Cmm
-import CmmCallConv
-import CmmSwitch (SwitchTargets)
-
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Label
-import DynFlags
-import FastString
-import ForeignCall
-import OrdList
-import SMRep (ByteOff)
-import UniqSupply
-import Util
-import Panic
-
-
------------------------------------------------------------------------------
--- Building Graphs
-
-
--- | CmmAGraph is a chunk of code consisting of:
---
---   * ordinary statements (assignments, stores etc.)
---   * jumps
---   * labels
---   * out-of-line labelled blocks
---
--- The semantics is that control falls through labels and out-of-line
--- blocks.  Everything after a jump up to the next label is by
--- definition unreachable code, and will be discarded.
---
--- Two CmmAGraphs can be stuck together with <*>, with the meaning that
--- control flows from the first to the second.
---
--- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
--- by providing a label for the entry point and a tick scope; see
--- 'labelAGraph'.
-type CmmAGraph = OrdList CgStmt
--- | Unlabeled graph with tick scope
-type CmmAGraphScoped = (CmmAGraph, CmmTickScope)
-
-data CgStmt
-  = CgLabel BlockId CmmTickScope
-  | CgStmt  (CmmNode O O)
-  | CgLast  (CmmNode O C)
-  | CgFork  BlockId CmmAGraph CmmTickScope
-
-flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
-flattenCmmAGraph id (stmts_t, tscope) =
-    CmmGraph { g_entry = id,
-               g_graph = GMany NothingO body NothingO }
-  where
-  body = foldr addBlock emptyBody $ flatten id stmts_t tscope []
-
-  --
-  -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
-  --
-  -- NB. avoid the quadratic-append trap by passing in the tail of the
-  -- list.  This is important for Very Long Functions (e.g. in T783).
-  --
-  flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C]
-          -> [Block CmmNode C C]
-  flatten id g tscope blocks
-      = flatten1 (fromOL g) block' blocks
-      where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock
-  --
-  -- flatten0: we are outside a block at this point: any code before
-  -- the first label is unreachable, so just drop it.
-  --
-  flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
-  flatten0 [] blocks = blocks
-
-  flatten0 (CgLabel id tscope : stmts) blocks
-    = flatten1 stmts block blocks
-    where !block = blockJoinHead (CmmEntry id tscope) emptyBlock
-
-  flatten0 (CgFork fork_id stmts_t tscope : rest) blocks
-    = flatten fork_id stmts_t tscope $ flatten0 rest blocks
-
-  flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
-  flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
-
-  --
-  -- flatten1: we have a partial block, collect statements until the
-  -- next last node to make a block, then call flatten0 to get the rest
-  -- of the blocks
-  --
-  flatten1 :: [CgStmt] -> Block CmmNode C O
-           -> [Block CmmNode C C] -> [Block CmmNode C C]
-
-  -- The current block falls through to the end of a function or fork:
-  -- this code should not be reachable, but it may be referenced by
-  -- other code that is not reachable.  We'll remove it later with
-  -- dead-code analysis, but for now we have to keep the graph
-  -- well-formed, so we terminate the block with a branch to the
-  -- beginning of the current block.
-  flatten1 [] block blocks
-    = blockJoinTail block (CmmBranch (entryLabel block)) : blocks
-
-  flatten1 (CgLast stmt : stmts) block blocks
-    = block' : flatten0 stmts blocks
-    where !block' = blockJoinTail block stmt
-
-  flatten1 (CgStmt stmt : stmts) block blocks
-    = flatten1 stmts block' blocks
-    where !block' = blockSnoc block stmt
-
-  flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks
-    = flatten fork_id stmts_t tscope $ flatten1 rest block blocks
-
-  -- a label here means that we should start a new block, and the
-  -- current block should fall through to the new block.
-  flatten1 (CgLabel id tscp : stmts) block blocks
-    = blockJoinTail block (CmmBranch id) :
-      flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks
-
-
-
----------- AGraph manipulation
-
-(<*>)          :: CmmAGraph -> CmmAGraph -> CmmAGraph
-(<*>)           = appOL
-
-catAGraphs     :: [CmmAGraph] -> CmmAGraph
-catAGraphs      = concatOL
-
--- | creates a sequence "goto id; id:" as an AGraph
-mkLabel        :: BlockId -> CmmTickScope -> CmmAGraph
-mkLabel bid scp = unitOL (CgLabel bid scp)
-
--- | creates an open AGraph from a given node
-mkMiddle        :: CmmNode O O -> CmmAGraph
-mkMiddle middle = unitOL (CgStmt middle)
-
--- | creates a closed AGraph from a given node
-mkLast         :: CmmNode O C -> CmmAGraph
-mkLast last     = unitOL (CgLast last)
-
--- | A labelled code block; should end in a last node
-outOfLine      :: BlockId -> CmmAGraphScoped -> CmmAGraph
-outOfLine l (c,s) = unitOL (CgFork l c s)
-
--- | allocate a fresh label for the entry point
-lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph
-lgraphOfAGraph g = do
-  u <- getUniqueM
-  return (labelAGraph (mkBlockId u) g)
-
--- | use the given BlockId as the label of the entry point
-labelAGraph    :: BlockId -> CmmAGraphScoped -> CmmGraph
-labelAGraph lbl ag = flattenCmmAGraph lbl ag
-
----------- No-ops
-mkNop        :: CmmAGraph
-mkNop         = nilOL
-
-mkComment    :: FastString -> CmmAGraph
-mkComment fs
-  -- SDM: generating all those comments takes time, this saved about 4% for me
-  | debugIsOn = mkMiddle $ CmmComment fs
-  | otherwise = nilOL
-
----------- Assignment and store
-mkAssign     :: CmmReg  -> CmmExpr -> CmmAGraph
-mkAssign l (CmmReg r) | l == r  = mkNop
-mkAssign l r  = mkMiddle $ CmmAssign l r
-
-mkStore      :: CmmExpr -> CmmExpr -> CmmAGraph
-mkStore  l r  = mkMiddle $ CmmStore  l r
-
----------- Control transfer
-mkJump          :: DynFlags -> Convention -> CmmExpr
-                -> [CmmExpr]
-                -> UpdFrameOffset
-                -> CmmAGraph
-mkJump dflags conv e actuals updfr_off =
-  lastWithArgs dflags Jump Old conv actuals updfr_off $
-    toCall e Nothing updfr_off 0
-
--- | A jump where the caller says what the live GlobalRegs are.  Used
--- for low-level hand-written Cmm.
-mkRawJump       :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
-                -> CmmAGraph
-mkRawJump dflags e updfr_off vols =
-  lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $
-    \arg_space _  -> toCall e Nothing updfr_off 0 arg_space vols
-
-
-mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr]
-                -> UpdFrameOffset -> [CmmExpr]
-                -> CmmAGraph
-mkJumpExtra dflags conv e actuals updfr_off extra_stack =
-  lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $
-    toCall e Nothing updfr_off 0
-
-mkCbranch       :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph
-mkCbranch pred ifso ifnot likely =
-  mkLast (CmmCondBranch pred ifso ifnot likely)
-
-mkSwitch        :: CmmExpr -> SwitchTargets -> CmmAGraph
-mkSwitch e tbl   = mkLast $ CmmSwitch e tbl
-
-mkReturn        :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset
-                -> CmmAGraph
-mkReturn dflags e actuals updfr_off =
-  lastWithArgs dflags Ret  Old NativeReturn actuals updfr_off $
-    toCall e Nothing updfr_off 0
-
-mkBranch        :: BlockId -> CmmAGraph
-mkBranch bid     = mkLast (CmmBranch bid)
-
-mkFinalCall   :: DynFlags
-              -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset
-              -> CmmAGraph
-mkFinalCall dflags f _ actuals updfr_off =
-  lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $
-    toCall f Nothing updfr_off 0
-
-mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
-                -> BlockId
-                -> ByteOff
-                -> UpdFrameOffset
-                -> [CmmExpr]
-                -> CmmAGraph
-mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
-  lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals
-     updfr_off extra_stack $
-       toCall f (Just ret_lbl) updfr_off ret_off
-
--- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
--- already on the stack).
-mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
-                -> BlockId
-                -> ByteOff
-                -> UpdFrameOffset
-                -> CmmAGraph
-mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off  = do
-  lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $
-       toCall f (Just ret_lbl) updfr_off ret_off
-
-mkUnsafeCall  :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
-mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
-
--- | Construct a 'CmmUnwind' node for the given register and unwinding
--- expression.
-mkUnwind     :: GlobalReg -> CmmExpr -> CmmAGraph
-mkUnwind r e  = mkMiddle $ CmmUnwind [(r, Just e)]
-
---------------------------------------------------------------------------
-
-
-
-
--- Why are we inserting extra blocks that simply branch to the successors?
--- Because in addition to the branch instruction, @mkBranch@ will insert
--- a necessary adjustment to the stack pointer.
-
-
--- For debugging purposes, we can stub out dead stack slots:
-stackStubExpr :: Width -> CmmExpr
-stackStubExpr w = CmmLit (CmmInt 0 w)
-
--- When we copy in parameters, we usually want to put overflow
--- parameters on the stack, but sometimes we want to pass the
--- variables in their spill slots.  Therefore, for copying arguments
--- and results, we provide different functions to pass the arguments
--- in an overflow area and to pass them in spill slots.
-copyInOflow  :: DynFlags -> Convention -> Area
-             -> [CmmFormal]
-             -> [CmmFormal]
-             -> (Int, [GlobalReg], CmmAGraph)
-
-copyInOflow dflags conv area formals extra_stk
-  = (offset, gregs, catAGraphs $ map mkMiddle nodes)
-  where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk
-
--- Return the number of bytes used for copying arguments, as well as the
--- instructions to copy the arguments.
-copyIn :: DynFlags -> Convention -> Area
-       -> [CmmFormal]
-       -> [CmmFormal]
-       -> (ByteOff, [GlobalReg], [CmmNode O O])
-copyIn dflags conv area formals extra_stk
-  = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
-  where
-    -- See Note [Width of parameters]
-    ci (reg, RegisterParam r@(VanillaReg {})) =
-        let local = CmmLocal reg
-            global = CmmReg (CmmGlobal r)
-            width = cmmRegWidth dflags local
-            expr
-                | width == wordWidth dflags = global
-                | width < wordWidth dflags =
-                    CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global]
-                | otherwise = panic "Parameter width greater than word width"
-
-        in CmmAssign local expr
-
-    -- Non VanillaRegs
-    ci (reg, RegisterParam r) =
-        CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
-
-    ci (reg, StackParam off)
-      | isBitsType $ localRegType reg
-      , typeWidth (localRegType reg) < wordWidth dflags =
-        let
-          stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags))
-          local = CmmLocal reg
-          width = cmmRegWidth dflags local
-          expr  = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot]
-        in CmmAssign local expr
-
-      | otherwise =
-         CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
-         where ty = localRegType reg
-
-    init_offset = widthInBytes (wordWidth dflags) -- infotable
-
-    (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
-
-    (stk_size, args) = assignArgumentsPos dflags stk_off conv
-                                          localRegType formals
-
--- Factoring out the common parts of the copyout functions yielded something
--- more complicated:
-
-data Transfer = Call | JumpRet | Jump | Ret deriving Eq
-
-copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr]
-             -> UpdFrameOffset
-             -> [CmmExpr] -- extra stack args
-             -> (Int, [GlobalReg], CmmAGraph)
-
--- Generate code to move the actual parameters into the locations
--- required by the calling convention.  This includes a store for the
--- return address.
---
--- The argument layout function ignores the pointer to the info table,
--- so we slot that in here. When copying-out to a young area, we set
--- the info table for return and adjust the offsets of the other
--- parameters.  If this is a call instruction, we adjust the offsets
--- of the other parameters.
-copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
-  = (stk_size, regs, graph)
-  where
-    (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
-
-    -- See Note [Width of parameters]
-    co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
-        let width = cmmExprWidth dflags v
-            value
-                | width == wordWidth dflags = v
-                | width < wordWidth dflags =
-                    CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v]
-                | otherwise = panic "Parameter width greater than word width"
-
-        in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
-
-    -- Non VanillaRegs
-    co (v, RegisterParam r) (rs, ms) =
-        (r:rs, mkAssign (CmmGlobal r) v <*> ms)
-
-    -- See Note [Width of parameters]
-    co (v, StackParam off)  (rs, ms)
-      = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
-
-    width v = cmmExprWidth dflags v
-    value v
-      | isBitsType $ cmmExprType dflags v
-      , width v < wordWidth dflags =
-        CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v]
-      | otherwise = v
-
-    (setRA, init_offset) =
-      case area of
-            Young id ->  -- Generate a store instruction for
-                         -- the return address if making a call
-                  case transfer of
-                     Call ->
-                       ([(CmmLit (CmmBlock id), StackParam init_offset)],
-                       widthInBytes (wordWidth dflags))
-                     JumpRet ->
-                       ([],
-                       widthInBytes (wordWidth dflags))
-                     _other ->
-                       ([], 0)
-            Old -> ([], updfr_off)
-
-    (extra_stack_off, stack_params) =
-       assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff
-
-    args :: [(CmmExpr, ParamLocation)]   -- The argument and where to put it
-    (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv
-                                          (cmmExprType dflags) actuals
-
-
--- Note [Width of parameters]
---
--- Consider passing a small (< word width) primitive like Int8# to a function.
--- It's actually non-trivial to do this without extending/narrowing:
--- * Global registers are considered to have native word width (i.e., 64-bits on
---   x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a
---   global register.
--- * Same problem exists with LLVM IR.
--- * Lowering gets harder since on x86-32 not every register exposes its lower
---   8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
---   8-bit register for %edi). So we would either need to extend/narrow anyway,
---   or complicate the calling convention.
--- * Passing a small integer in a stack slot, which has native word width,
---   requires extending to word width when writing to the stack and narrowing
---   when reading off the stack (see #16258).
--- So instead, we always extend every parameter smaller than native word width
--- in copyOutOflow and then truncate it back to the expected width in copyIn.
--- Note that we do this in cmm using MO_XX_Conv to avoid requiring
--- zero-/sign-extending - it's up to a backend to handle this in a most
--- efficient way (e.g., a simple register move or a smaller size store).
--- This convention (of ignoring the upper bits) is different from some C ABIs,
--- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters.
---
--- There was some discussion about this on this PR:
--- https://github.com/ghc-proposals/ghc-proposals/pull/74
-
-
-mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal]
-            -> (Int, [GlobalReg], CmmAGraph)
-mkCallEntry dflags conv formals extra_stk
-  = copyInOflow dflags conv Old formals extra_stk
-
-lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr]
-             -> UpdFrameOffset
-             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
-             -> CmmAGraph
-lastWithArgs dflags transfer area conv actuals updfr_off last =
-  lastWithArgsAndExtraStack dflags transfer area conv actuals
-                            updfr_off noExtraStack last
-
-lastWithArgsAndExtraStack :: DynFlags
-             -> Transfer -> Area -> Convention -> [CmmExpr]
-             -> UpdFrameOffset -> [CmmExpr]
-             -> (ByteOff -> [GlobalReg] -> CmmAGraph)
-             -> CmmAGraph
-lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off
-                          extra_stack last =
-  copies <*> last outArgs regs
- where
-  (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals
-                               updfr_off extra_stack
-
-
-noExtraStack :: [CmmExpr]
-noExtraStack = []
-
-toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
-       -> ByteOff -> [GlobalReg]
-       -> CmmAGraph
-toCall e cont updfr_off res_space arg_space regs =
-  mkLast $ CmmCall e cont regs arg_space res_space updfr_off
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
deleted file mode 100644
index d94bc01e03..0000000000
--- a/compiler/cmm/PprC.hs
+++ /dev/null
@@ -1,1380 +0,0 @@
-{-# LANGUAGE CPP, DeriveFunctor, GADTs, PatternSynonyms #-}
-
------------------------------------------------------------------------------
---
--- Pretty-printing of Cmm as C, suitable for feeding gcc
---
--- (c) The University of Glasgow 2004-2006
---
--- Print Cmm as real C, for -fvia-C
---
--- See wiki:commentary/compiler/backends/ppr-c
---
--- This is simpler than the old PprAbsC, because Cmm is "macro-expanded"
--- relative to the old AbstractC, and many oddities/decorations have
--- disappeared from the data type.
---
--- This code generator is only supported in unregisterised mode.
---
------------------------------------------------------------------------------
-
-module PprC (
-        writeC
-  ) where
-
-#include "HsVersions.h"
-
--- Cmm stuff
-import GhcPrelude
-
-import BlockId
-import CLabel
-import ForeignCall
-import Cmm hiding (pprBBlock)
-import PprCmm () -- For Outputable instances
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import CmmUtils
-import CmmSwitch
-
--- Utils
-import CPrim
-import DynFlags
-import FastString
-import Outputable
-import GHC.Platform
-import UniqSet
-import UniqFM
-import Unique
-import Util
-
--- The rest
-import Data.ByteString (ByteString)
-import qualified Data.ByteString as BS
-import Control.Monad.ST
-import Data.Bits
-import Data.Char
-import Data.List
-import Data.Map (Map)
-import Data.Word
-import System.IO
-import qualified Data.Map as Map
-import Control.Monad (ap)
-import qualified Data.Array.Unsafe as U ( castSTUArray )
-import Data.Array.ST
-
--- --------------------------------------------------------------------------
--- Top level
-
-writeC :: DynFlags -> Handle -> RawCmmGroup -> IO ()
-writeC dflags handle cmm = printForC dflags handle (pprC cmm $$ blankLine)
-
--- --------------------------------------------------------------------------
--- Now do some real work
---
--- for fun, we could call cmmToCmm over the tops...
---
-
-pprC :: RawCmmGroup -> SDoc
-pprC tops = vcat $ intersperse blankLine $ map pprTop tops
-
---
--- top level procs
---
-pprTop :: RawCmmDecl -> SDoc
-pprTop (CmmProc infos clbl _in_live_regs graph) =
-
-    (case mapLookup (g_entry graph) infos of
-       Nothing -> empty
-       Just (Statics info_clbl info_dat) ->
-           pprDataExterns info_dat $$
-           pprWordArray info_is_in_rodata info_clbl info_dat) $$
-    (vcat [
-           blankLine,
-           extern_decls,
-           (if (externallyVisibleCLabel clbl)
-                    then mkFN_ else mkIF_) (ppr clbl) <+> lbrace,
-           nest 8 temp_decls,
-           vcat (map pprBBlock blocks),
-           rbrace ]
-    )
-  where
-        -- info tables are always in .rodata
-        info_is_in_rodata = True
-        blocks = toBlockListEntryFirst graph
-        (temp_decls, extern_decls) = pprTempAndExternDecls blocks
-
-
--- Chunks of static data.
-
--- We only handle (a) arrays of word-sized things and (b) strings.
-
-pprTop (CmmData section (Statics lbl [CmmString str])) =
-  pprExternDecl lbl $$
-  hcat [
-    pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
-    text "[] = ", pprStringInCStyle str, semi
-  ]
-
-pprTop (CmmData section (Statics lbl [CmmUninitialised size])) =
-  pprExternDecl lbl $$
-  hcat [
-    pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
-    brackets (int size), semi
-  ]
-
-pprTop (CmmData section (Statics lbl lits)) =
-  pprDataExterns lits $$
-  pprWordArray (isSecConstant section) lbl lits
-
--- --------------------------------------------------------------------------
--- BasicBlocks are self-contained entities: they always end in a jump.
---
--- Like nativeGen/AsmCodeGen, we could probably reorder blocks to turn
--- as many jumps as possible into fall throughs.
---
-
-pprBBlock :: CmmBlock -> SDoc
-pprBBlock block =
-  nest 4 (pprBlockId (entryLabel block) <> colon) $$
-  nest 8 (vcat (map pprStmt (blockToList nodes)) $$ pprStmt last)
- where
-  (_, nodes, last)  = blockSplit block
-
--- --------------------------------------------------------------------------
--- Info tables. Just arrays of words.
--- See codeGen/ClosureInfo, and nativeGen/PprMach
-
-pprWordArray :: Bool -> CLabel -> [CmmStatic] -> SDoc
-pprWordArray is_ro lbl ds
-  = sdocWithDynFlags $ \dflags ->
-    -- TODO: align closures only
-    pprExternDecl lbl $$
-    hcat [ pprLocalness lbl, pprConstness is_ro, text "StgWord"
-         , space, ppr lbl, text "[]"
-         -- See Note [StgWord alignment]
-         , pprAlignment (wordWidth dflags)
-         , text "= {" ]
-    $$ nest 8 (commafy (pprStatics dflags ds))
-    $$ text "};"
-
-pprAlignment :: Width -> SDoc
-pprAlignment words =
-     text "__attribute__((aligned(" <> int (widthInBytes words) <> text ")))"
-
--- Note [StgWord alignment]
--- C codegen builds static closures as StgWord C arrays (pprWordArray).
--- Their real C type is 'StgClosure'. Macros like UNTAG_CLOSURE assume
--- pointers to 'StgClosure' are aligned at pointer size boundary:
---  4 byte boundary on 32 systems
---  and 8 bytes on 64-bit systems
--- see TAG_MASK and TAG_BITS definition and usage.
---
--- It's a reasonable assumption also known as natural alignment.
--- Although some architectures have different alignment rules.
--- One of known exceptions is m68k (#11395, comment:16) where:
---   __alignof__(StgWord) == 2, sizeof(StgWord) == 4
---
--- Thus we explicitly increase alignment by using
---    __attribute__((aligned(4)))
--- declaration.
-
---
--- has to be static, if it isn't globally visible
---
-pprLocalness :: CLabel -> SDoc
-pprLocalness lbl | not $ externallyVisibleCLabel lbl = text "static "
-                 | otherwise = empty
-
-pprConstness :: Bool -> SDoc
-pprConstness is_ro | is_ro = text "const "
-                   | otherwise = empty
-
--- --------------------------------------------------------------------------
--- Statements.
---
-
-pprStmt :: CmmNode e x -> SDoc
-
-pprStmt stmt =
-    sdocWithDynFlags $ \dflags ->
-    case stmt of
-    CmmEntry{}   -> empty
-    CmmComment _ -> empty -- (hang (text "/*") 3 (ftext s)) $$ ptext (sLit "*/")
-                          -- XXX if the string contains "*/", we need to fix it
-                          -- XXX we probably want to emit these comments when
-                          -- some debugging option is on.  They can get quite
-                          -- large.
-
-    CmmTick _ -> empty
-    CmmUnwind{} -> empty
-
-    CmmAssign dest src -> pprAssign dflags dest src
-
-    CmmStore  dest src
-        | typeWidth rep == W64 && wordWidth dflags /= W64
-        -> (if isFloatType rep then text "ASSIGN_DBL"
-                               else ptext (sLit ("ASSIGN_Word64"))) <>
-           parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi
-
-        | otherwise
-        -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ]
-        where
-          rep = cmmExprType dflags src
-
-    CmmUnsafeForeignCall target@(ForeignTarget fn conv) results args ->
-        fnCall
-        where
-        (res_hints, arg_hints) = foreignTargetHints target
-        hresults = zip results res_hints
-        hargs    = zip args arg_hints
-
-        ForeignConvention cconv _ _ ret = conv
-
-        cast_fn = parens (cCast (pprCFunType (char '*') cconv hresults hargs) fn)
-
-        -- See wiki:commentary/compiler/backends/ppr-c#prototypes
-        fnCall =
-            case fn of
-              CmmLit (CmmLabel lbl)
-                | StdCallConv <- cconv ->
-                    pprCall (ppr lbl) cconv hresults hargs
-                        -- stdcall functions must be declared with
-                        -- a function type, otherwise the C compiler
-                        -- doesn't add the @n suffix to the label.  We
-                        -- can't add the @n suffix ourselves, because
-                        -- it isn't valid C.
-                | CmmNeverReturns <- ret ->
-                    pprCall cast_fn cconv hresults hargs <> semi
-                | not (isMathFun lbl) ->
-                    pprForeignCall (ppr lbl) cconv hresults hargs
-              _ ->
-                    pprCall cast_fn cconv hresults hargs <> semi
-                        -- for a dynamic call, no declaration is necessary.
-
-    CmmUnsafeForeignCall (PrimTarget MO_Touch) _results _args -> empty
-    CmmUnsafeForeignCall (PrimTarget (MO_Prefetch_Data _)) _results _args -> empty
-
-    CmmUnsafeForeignCall target@(PrimTarget op) results args ->
-        fn_call
-      where
-        cconv = CCallConv
-        fn = pprCallishMachOp_for_C op
-
-        (res_hints, arg_hints) = foreignTargetHints target
-        hresults = zip results res_hints
-        hargs    = zip args arg_hints
-
-        fn_call
-          -- The mem primops carry an extra alignment arg.
-          -- We could maybe emit an alignment directive using this info.
-          -- We also need to cast mem primops to prevent conflicts with GCC
-          -- builtins (see bug #5967).
-          | Just _align <- machOpMemcpyishAlign op
-          = (text ";EFF_(" <> fn <> char ')' <> semi) $$
-            pprForeignCall fn cconv hresults hargs
-          | otherwise
-          = pprCall fn cconv hresults hargs
-
-    CmmBranch ident          -> pprBranch ident
-    CmmCondBranch expr yes no _ -> pprCondBranch expr yes no
-    CmmCall { cml_target = expr } -> mkJMP_ (pprExpr expr) <> semi
-    CmmSwitch arg ids        -> sdocWithDynFlags $ \dflags ->
-                                pprSwitch dflags arg ids
-
-    _other -> pprPanic "PprC.pprStmt" (ppr stmt)
-
-type Hinted a = (a, ForeignHint)
-
-pprForeignCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual]
-               -> SDoc
-pprForeignCall fn cconv results args = fn_call
-  where
-    fn_call = braces (
-                 pprCFunType (char '*' <> text "ghcFunPtr") cconv results args <> semi
-              $$ text "ghcFunPtr" <+> equals <+> cast_fn <> semi
-              $$ pprCall (text "ghcFunPtr") cconv results args <> semi
-             )
-    cast_fn = parens (parens (pprCFunType (char '*') cconv results args) <> fn)
-
-pprCFunType :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc
-pprCFunType ppr_fn cconv ress args
-  = sdocWithDynFlags $ \dflags ->
-    let res_type [] = text "void"
-        res_type [(one, hint)] = machRepHintCType (localRegType one) hint
-        res_type _ = panic "pprCFunType: only void or 1 return value supported"
-
-        arg_type (expr, hint) = machRepHintCType (cmmExprType dflags expr) hint
-    in res_type ress <+>
-       parens (ccallConvAttribute cconv <> ppr_fn) <>
-       parens (commafy (map arg_type args))
-
--- ---------------------------------------------------------------------
--- unconditional branches
-pprBranch :: BlockId -> SDoc
-pprBranch ident = text "goto" <+> pprBlockId ident <> semi
-
-
--- ---------------------------------------------------------------------
--- conditional branches to local labels
-pprCondBranch :: CmmExpr -> BlockId -> BlockId -> SDoc
-pprCondBranch expr yes no
-        = hsep [ text "if" , parens(pprExpr expr) ,
-                        text "goto", pprBlockId yes <> semi,
-                        text "else goto", pprBlockId no <> semi ]
-
--- ---------------------------------------------------------------------
--- a local table branch
---
--- we find the fall-through cases
---
-pprSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> SDoc
-pprSwitch dflags e ids
-  = (hang (text "switch" <+> parens ( pprExpr e ) <+> lbrace)
-                4 (vcat ( map caseify pairs ) $$ def)) $$ rbrace
-  where
-    (pairs, mbdef) = switchTargetsFallThrough ids
-
-    -- fall through case
-    caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix
-        where
-        do_fallthrough ix =
-                 hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
-                        text "/* fall through */" ]
-
-        final_branch ix =
-                hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon ,
-                       text "goto" , (pprBlockId ident) <> semi ]
-
-    caseify (_     , _    ) = panic "pprSwitch: switch with no cases!"
-
-    def | Just l <- mbdef = text "default: goto" <+> pprBlockId l <> semi
-        | otherwise       = empty
-
--- ---------------------------------------------------------------------
--- Expressions.
---
-
--- C Types: the invariant is that the C expression generated by
---
---      pprExpr e
---
--- has a type in C which is also given by
---
---      machRepCType (cmmExprType e)
---
--- (similar invariants apply to the rest of the pretty printer).
-
-pprExpr :: CmmExpr -> SDoc
-pprExpr e = case e of
-    CmmLit lit -> pprLit lit
-
-
-    CmmLoad e ty -> sdocWithDynFlags $ \dflags -> pprLoad dflags e ty
-    CmmReg reg      -> pprCastReg reg
-    CmmRegOff reg 0 -> pprCastReg reg
-
-    -- CmmRegOff is an alias of MO_Add
-    CmmRegOff reg i -> sdocWithDynFlags $ \dflags ->
-                       pprCastReg reg <> char '+' <>
-                       pprHexVal (fromIntegral i) (wordWidth dflags)
-
-    CmmMachOp mop args -> pprMachOpApp mop args
-
-    CmmStackSlot _ _   -> panic "pprExpr: CmmStackSlot not supported!"
-
-
-pprLoad :: DynFlags -> CmmExpr -> CmmType -> SDoc
-pprLoad dflags e ty
-  | width == W64, wordWidth dflags /= W64
-  = (if isFloatType ty then text "PK_DBL"
-                       else text "PK_Word64")
-    <> parens (mkP_ <> pprExpr1 e)
-
-  | otherwise
-  = case e of
-        CmmReg r | isPtrReg r && width == wordWidth dflags && not (isFloatType ty)
-                 -> char '*' <> pprAsPtrReg r
-
-        CmmRegOff r 0 | isPtrReg r && width == wordWidth dflags && not (isFloatType ty)
-                      -> char '*' <> pprAsPtrReg r
-
-        CmmRegOff r off | isPtrReg r && width == wordWidth dflags
-                        , off `rem` wORD_SIZE dflags == 0 && not (isFloatType ty)
-        -- ToDo: check that the offset is a word multiple?
-        --       (For tagging to work, I had to avoid unaligned loads. --ARY)
-                        -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift dflags))
-
-        _other -> cLoad e ty
-  where
-    width = typeWidth ty
-
-pprExpr1 :: CmmExpr -> SDoc
-pprExpr1 (CmmLit lit)     = pprLit1 lit
-pprExpr1 e@(CmmReg _reg)  = pprExpr e
-pprExpr1 other            = parens (pprExpr other)
-
--- --------------------------------------------------------------------------
--- MachOp applications
-
-pprMachOpApp :: MachOp -> [CmmExpr] -> SDoc
-
-pprMachOpApp op args
-  | isMulMayOfloOp op
-  = text "mulIntMayOflo" <> parens (commafy (map pprExpr args))
-  where isMulMayOfloOp (MO_U_MulMayOflo _) = True
-        isMulMayOfloOp (MO_S_MulMayOflo _) = True
-        isMulMayOfloOp _ = False
-
-pprMachOpApp mop args
-  | Just ty <- machOpNeedsCast mop
-  = ty <> parens (pprMachOpApp' mop args)
-  | otherwise
-  = pprMachOpApp' mop args
-
--- Comparisons in C have type 'int', but we want type W_ (this is what
--- resultRepOfMachOp says).  The other C operations inherit their type
--- from their operands, so no casting is required.
-machOpNeedsCast :: MachOp -> Maybe SDoc
-machOpNeedsCast mop
-  | isComparisonMachOp mop = Just mkW_
-  | otherwise              = Nothing
-
-pprMachOpApp' :: MachOp -> [CmmExpr] -> SDoc
-pprMachOpApp' mop args
- = case args of
-    -- dyadic
-    [x,y] -> pprArg x <+> pprMachOp_for_C mop <+> pprArg y
-
-    -- unary
-    [x]   -> pprMachOp_for_C mop <> parens (pprArg x)
-
-    _     -> panic "PprC.pprMachOp : machop with wrong number of args"
-
-  where
-        -- Cast needed for signed integer ops
-    pprArg e | signedOp    mop = sdocWithDynFlags $ \dflags ->
-                                 cCast (machRep_S_CType (typeWidth (cmmExprType dflags e))) e
-             | needsFCasts mop = sdocWithDynFlags $ \dflags ->
-                                 cCast (machRep_F_CType (typeWidth (cmmExprType dflags e))) e
-             | otherwise    = pprExpr1 e
-    needsFCasts (MO_F_Eq _)   = False
-    needsFCasts (MO_F_Ne _)   = False
-    needsFCasts (MO_F_Neg _)  = True
-    needsFCasts (MO_F_Quot _) = True
-    needsFCasts mop  = floatComparison mop
-
--- --------------------------------------------------------------------------
--- Literals
-
-pprLit :: CmmLit -> SDoc
-pprLit lit = case lit of
-    CmmInt i rep      -> pprHexVal i rep
-
-    CmmFloat f w       -> parens (machRep_F_CType w) <> str
-        where d = fromRational f :: Double
-              str | isInfinite d && d < 0 = text "-INFINITY"
-                  | isInfinite d          = text "INFINITY"
-                  | isNaN d               = text "NAN"
-                  | otherwise             = text (show d)
-                -- these constants come from <math.h>
-                -- see #1861
-
-    CmmVec {} -> panic "PprC printing vector literal"
-
-    CmmBlock bid       -> mkW_ <> pprCLabelAddr (infoTblLbl bid)
-    CmmHighStackMark   -> panic "PprC printing high stack mark"
-    CmmLabel clbl      -> mkW_ <> pprCLabelAddr clbl
-    CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i
-    CmmLabelDiffOff clbl1 _ i _   -- non-word widths not supported via C
-        -- WARNING:
-        --  * the lit must occur in the info table clbl2
-        --  * clbl1 must be an SRT, a slow entry point or a large bitmap
-        -> mkW_ <> pprCLabelAddr clbl1 <> char '+' <> int i
-
-    where
-        pprCLabelAddr lbl = char '&' <> ppr lbl
-
-pprLit1 :: CmmLit -> SDoc
-pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit)
-pprLit1 lit@(CmmLabelDiffOff _ _ _ _) = parens (pprLit lit)
-pprLit1 lit@(CmmFloat _ _)    = parens (pprLit lit)
-pprLit1 other = pprLit other
-
--- ---------------------------------------------------------------------------
--- Static data
-
-pprStatics :: DynFlags -> [CmmStatic] -> [SDoc]
-pprStatics _ [] = []
-pprStatics dflags (CmmStaticLit (CmmFloat f W32) : rest)
-  -- odd numbers of floats are padded to a word by mkVirtHeapOffsetsWithPadding
-  | wORD_SIZE dflags == 8, CmmStaticLit (CmmInt 0 W32) : rest' <- rest
-  = pprLit1 (floatToWord dflags f) : pprStatics dflags rest'
-  -- adjacent floats aren't padded but combined into a single word
-  | wORD_SIZE dflags == 8, CmmStaticLit (CmmFloat g W32) : rest' <- rest
-  = pprLit1 (floatPairToWord dflags f g) : pprStatics dflags rest'
-  | wORD_SIZE dflags == 4
-  = pprLit1 (floatToWord dflags f) : pprStatics dflags rest
-  | otherwise
-  = pprPanic "pprStatics: float" (vcat (map ppr' rest))
-    where ppr' (CmmStaticLit l) = sdocWithDynFlags $ \dflags ->
-                                  ppr (cmmLitType dflags l)
-          ppr' _other           = text "bad static!"
-pprStatics dflags (CmmStaticLit (CmmFloat f W64) : rest)
-  = map pprLit1 (doubleToWords dflags f) ++ pprStatics dflags rest
-
-pprStatics dflags (CmmStaticLit (CmmInt i W64) : rest)
-  | wordWidth dflags == W32
-  = if wORDS_BIGENDIAN dflags
-    then pprStatics dflags (CmmStaticLit (CmmInt q W32) :
-                            CmmStaticLit (CmmInt r W32) : rest)
-    else pprStatics dflags (CmmStaticLit (CmmInt r W32) :
-                            CmmStaticLit (CmmInt q W32) : rest)
-  where r = i .&. 0xffffffff
-        q = i `shiftR` 32
-pprStatics dflags (CmmStaticLit (CmmInt a W32) :
-                   CmmStaticLit (CmmInt b W32) : rest)
-  | wordWidth dflags == W64
-  = if wORDS_BIGENDIAN dflags
-    then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 32) .|. b) W64) :
-                            rest)
-    else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 32) .|. a) W64) :
-                            rest)
-pprStatics dflags (CmmStaticLit (CmmInt a W16) :
-                   CmmStaticLit (CmmInt b W16) : rest)
-  | wordWidth dflags == W32
-  = if wORDS_BIGENDIAN dflags
-    then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 16) .|. b) W32) :
-                            rest)
-    else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 16) .|. a) W32) :
-                            rest)
-pprStatics dflags (CmmStaticLit (CmmInt _ w) : _)
-  | w /= wordWidth dflags
-  = pprPanic "pprStatics: cannot emit a non-word-sized static literal" (ppr w)
-pprStatics dflags (CmmStaticLit lit : rest)
-  = pprLit1 lit : pprStatics dflags rest
-pprStatics _ (other : _)
-  = pprPanic "pprStatics: other" (pprStatic other)
-
-pprStatic :: CmmStatic -> SDoc
-pprStatic s = case s of
-
-    CmmStaticLit lit   -> nest 4 (pprLit lit)
-    CmmUninitialised i -> nest 4 (mkC_ <> brackets (int i))
-
-    -- these should be inlined, like the old .hc
-    CmmString s'       -> nest 4 (mkW_ <> parens(pprStringInCStyle s'))
-
-
--- ---------------------------------------------------------------------------
--- Block Ids
-
-pprBlockId :: BlockId -> SDoc
-pprBlockId b = char '_' <> ppr (getUnique b)
-
--- --------------------------------------------------------------------------
--- Print a MachOp in a way suitable for emitting via C.
---
-
-pprMachOp_for_C :: MachOp -> SDoc
-
-pprMachOp_for_C mop = case mop of
-
-        -- Integer operations
-        MO_Add          _ -> char '+'
-        MO_Sub          _ -> char '-'
-        MO_Eq           _ -> text "=="
-        MO_Ne           _ -> text "!="
-        MO_Mul          _ -> char '*'
-
-        MO_S_Quot       _ -> char '/'
-        MO_S_Rem        _ -> char '%'
-        MO_S_Neg        _ -> char '-'
-
-        MO_U_Quot       _ -> char '/'
-        MO_U_Rem        _ -> char '%'
-
-        -- & Floating-point operations
-        MO_F_Add        _ -> char '+'
-        MO_F_Sub        _ -> char '-'
-        MO_F_Neg        _ -> char '-'
-        MO_F_Mul        _ -> char '*'
-        MO_F_Quot       _ -> char '/'
-
-        -- Signed comparisons
-        MO_S_Ge         _ -> text ">="
-        MO_S_Le         _ -> text "<="
-        MO_S_Gt         _ -> char '>'
-        MO_S_Lt         _ -> char '<'
-
-        -- & Unsigned comparisons
-        MO_U_Ge         _ -> text ">="
-        MO_U_Le         _ -> text "<="
-        MO_U_Gt         _ -> char '>'
-        MO_U_Lt         _ -> char '<'
-
-        -- & Floating-point comparisons
-        MO_F_Eq         _ -> text "=="
-        MO_F_Ne         _ -> text "!="
-        MO_F_Ge         _ -> text ">="
-        MO_F_Le         _ -> text "<="
-        MO_F_Gt         _ -> char '>'
-        MO_F_Lt         _ -> char '<'
-
-        -- Bitwise operations.  Not all of these may be supported at all
-        -- sizes, and only integral MachReps are valid.
-        MO_And          _ -> char '&'
-        MO_Or           _ -> char '|'
-        MO_Xor          _ -> char '^'
-        MO_Not          _ -> char '~'
-        MO_Shl          _ -> text "<<"
-        MO_U_Shr        _ -> text ">>" -- unsigned shift right
-        MO_S_Shr        _ -> text ">>" -- signed shift right
-
--- Conversions.  Some of these will be NOPs, but never those that convert
--- between ints and floats.
--- Floating-point conversions use the signed variant.
--- We won't know to generate (void*) casts here, but maybe from
--- context elsewhere
-
--- noop casts
-        MO_UU_Conv from to | from == to -> empty
-        MO_UU_Conv _from to -> parens (machRep_U_CType to)
-
-        MO_SS_Conv from to | from == to -> empty
-        MO_SS_Conv _from to -> parens (machRep_S_CType to)
-
-        MO_XX_Conv from to | from == to -> empty
-        MO_XX_Conv _from to -> parens (machRep_U_CType to)
-
-        MO_FF_Conv from to | from == to -> empty
-        MO_FF_Conv _from to -> parens (machRep_F_CType to)
-
-        MO_SF_Conv _from to -> parens (machRep_F_CType to)
-        MO_FS_Conv _from to -> parens (machRep_S_CType to)
-
-        MO_S_MulMayOflo _ -> pprTrace "offending mop:"
-                                (text "MO_S_MulMayOflo")
-                                (panic $ "PprC.pprMachOp_for_C: MO_S_MulMayOflo"
-                                      ++ " should have been handled earlier!")
-        MO_U_MulMayOflo _ -> pprTrace "offending mop:"
-                                (text "MO_U_MulMayOflo")
-                                (panic $ "PprC.pprMachOp_for_C: MO_U_MulMayOflo"
-                                      ++ " should have been handled earlier!")
-
-        MO_V_Insert {}    -> pprTrace "offending mop:"
-                                (text "MO_V_Insert")
-                                (panic $ "PprC.pprMachOp_for_C: MO_V_Insert"
-                                      ++ " should have been handled earlier!")
-        MO_V_Extract {}   -> pprTrace "offending mop:"
-                                (text "MO_V_Extract")
-                                (panic $ "PprC.pprMachOp_for_C: MO_V_Extract"
-                                      ++ " should have been handled earlier!")
-
-        MO_V_Add {}       -> pprTrace "offending mop:"
-                                (text "MO_V_Add")
-                                (panic $ "PprC.pprMachOp_for_C: MO_V_Add"
-                                      ++ " should have been handled earlier!")
-        MO_V_Sub {}       -> pprTrace "offending mop:"
-                                (text "MO_V_Sub")
-                                (panic $ "PprC.pprMachOp_for_C: MO_V_Sub"
-                                      ++ " should have been handled earlier!")
-        MO_V_Mul {}       -> pprTrace "offending mop:"
-                                (text "MO_V_Mul")
-                                (panic $ "PprC.pprMachOp_for_C: MO_V_Mul"
-                                      ++ " should have been handled earlier!")
-
-        MO_VS_Quot {}     -> pprTrace "offending mop:"
-                                (text "MO_VS_Quot")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Quot"
-                                      ++ " should have been handled earlier!")
-        MO_VS_Rem {}      -> pprTrace "offending mop:"
-                                (text "MO_VS_Rem")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Rem"
-                                      ++ " should have been handled earlier!")
-        MO_VS_Neg {}      -> pprTrace "offending mop:"
-                                (text "MO_VS_Neg")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
-                                      ++ " should have been handled earlier!")
-
-        MO_VU_Quot {}     -> pprTrace "offending mop:"
-                                (text "MO_VU_Quot")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot"
-                                      ++ " should have been handled earlier!")
-        MO_VU_Rem {}      -> pprTrace "offending mop:"
-                                (text "MO_VU_Rem")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
-                                      ++ " should have been handled earlier!")
-
-        MO_VF_Insert {}   -> pprTrace "offending mop:"
-                                (text "MO_VF_Insert")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
-                                      ++ " should have been handled earlier!")
-        MO_VF_Extract {}  -> pprTrace "offending mop:"
-                                (text "MO_VF_Extract")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
-                                      ++ " should have been handled earlier!")
-
-        MO_VF_Add {}      -> pprTrace "offending mop:"
-                                (text "MO_VF_Add")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
-                                      ++ " should have been handled earlier!")
-        MO_VF_Sub {}      -> pprTrace "offending mop:"
-                                (text "MO_VF_Sub")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Sub"
-                                      ++ " should have been handled earlier!")
-        MO_VF_Neg {}      -> pprTrace "offending mop:"
-                                (text "MO_VF_Neg")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Neg"
-                                      ++ " should have been handled earlier!")
-        MO_VF_Mul {}      -> pprTrace "offending mop:"
-                                (text "MO_VF_Mul")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Mul"
-                                      ++ " should have been handled earlier!")
-        MO_VF_Quot {}     -> pprTrace "offending mop:"
-                                (text "MO_VF_Quot")
-                                (panic $ "PprC.pprMachOp_for_C: MO_VF_Quot"
-                                      ++ " should have been handled earlier!")
-
-        MO_AlignmentCheck {} -> panic "-falignment-santisation not supported by unregisterised backend"
-
-signedOp :: MachOp -> Bool      -- Argument type(s) are signed ints
-signedOp (MO_S_Quot _)    = True
-signedOp (MO_S_Rem  _)    = True
-signedOp (MO_S_Neg  _)    = True
-signedOp (MO_S_Ge   _)    = True
-signedOp (MO_S_Le   _)    = True
-signedOp (MO_S_Gt   _)    = True
-signedOp (MO_S_Lt   _)    = True
-signedOp (MO_S_Shr  _)    = True
-signedOp (MO_SS_Conv _ _) = True
-signedOp (MO_SF_Conv _ _) = True
-signedOp _                = False
-
-floatComparison :: MachOp -> Bool  -- comparison between float args
-floatComparison (MO_F_Eq   _) = True
-floatComparison (MO_F_Ne   _) = True
-floatComparison (MO_F_Ge   _) = True
-floatComparison (MO_F_Le   _) = True
-floatComparison (MO_F_Gt   _) = True
-floatComparison (MO_F_Lt   _) = True
-floatComparison _             = False
-
--- ---------------------------------------------------------------------
--- tend to be implemented by foreign calls
-
-pprCallishMachOp_for_C :: CallishMachOp -> SDoc
-
-pprCallishMachOp_for_C mop
-    = case mop of
-        MO_F64_Pwr      -> text "pow"
-        MO_F64_Sin      -> text "sin"
-        MO_F64_Cos      -> text "cos"
-        MO_F64_Tan      -> text "tan"
-        MO_F64_Sinh     -> text "sinh"
-        MO_F64_Cosh     -> text "cosh"
-        MO_F64_Tanh     -> text "tanh"
-        MO_F64_Asin     -> text "asin"
-        MO_F64_Acos     -> text "acos"
-        MO_F64_Atanh    -> text "atanh"
-        MO_F64_Asinh    -> text "asinh"
-        MO_F64_Acosh    -> text "acosh"
-        MO_F64_Atan     -> text "atan"
-        MO_F64_Log      -> text "log"
-        MO_F64_Log1P    -> text "log1p"
-        MO_F64_Exp      -> text "exp"
-        MO_F64_ExpM1    -> text "expm1"
-        MO_F64_Sqrt     -> text "sqrt"
-        MO_F64_Fabs     -> text "fabs"
-        MO_F32_Pwr      -> text "powf"
-        MO_F32_Sin      -> text "sinf"
-        MO_F32_Cos      -> text "cosf"
-        MO_F32_Tan      -> text "tanf"
-        MO_F32_Sinh     -> text "sinhf"
-        MO_F32_Cosh     -> text "coshf"
-        MO_F32_Tanh     -> text "tanhf"
-        MO_F32_Asin     -> text "asinf"
-        MO_F32_Acos     -> text "acosf"
-        MO_F32_Atan     -> text "atanf"
-        MO_F32_Asinh    -> text "asinhf"
-        MO_F32_Acosh    -> text "acoshf"
-        MO_F32_Atanh    -> text "atanhf"
-        MO_F32_Log      -> text "logf"
-        MO_F32_Log1P    -> text "log1pf"
-        MO_F32_Exp      -> text "expf"
-        MO_F32_ExpM1    -> text "expm1f"
-        MO_F32_Sqrt     -> text "sqrtf"
-        MO_F32_Fabs     -> text "fabsf"
-        MO_ReadBarrier  -> text "load_load_barrier"
-        MO_WriteBarrier -> text "write_barrier"
-        MO_Memcpy _     -> text "memcpy"
-        MO_Memset _     -> text "memset"
-        MO_Memmove _    -> text "memmove"
-        MO_Memcmp _     -> text "memcmp"
-        (MO_BSwap w)    -> ptext (sLit $ bSwapLabel w)
-        (MO_BRev w)     -> ptext (sLit $ bRevLabel w)
-        (MO_PopCnt w)   -> ptext (sLit $ popCntLabel w)
-        (MO_Pext w)     -> ptext (sLit $ pextLabel w)
-        (MO_Pdep w)     -> ptext (sLit $ pdepLabel w)
-        (MO_Clz w)      -> ptext (sLit $ clzLabel w)
-        (MO_Ctz w)      -> ptext (sLit $ ctzLabel w)
-        (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
-        (MO_Cmpxchg w)  -> ptext (sLit $ cmpxchgLabel w)
-        (MO_AtomicRead w)  -> ptext (sLit $ atomicReadLabel w)
-        (MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w)
-        (MO_UF_Conv w)  -> ptext (sLit $ word2FloatLabel w)
-
-        MO_S_Mul2     {} -> unsupported
-        MO_S_QuotRem  {} -> unsupported
-        MO_U_QuotRem  {} -> unsupported
-        MO_U_QuotRem2 {} -> unsupported
-        MO_Add2       {} -> unsupported
-        MO_AddWordC   {} -> unsupported
-        MO_SubWordC   {} -> unsupported
-        MO_AddIntC    {} -> unsupported
-        MO_SubIntC    {} -> unsupported
-        MO_U_Mul2     {} -> unsupported
-        MO_Touch         -> unsupported
-        (MO_Prefetch_Data _ ) -> unsupported
-        --- we could support prefetch via "__builtin_prefetch"
-        --- Not adding it for now
-    where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop
-                            ++ " not supported!")
-
--- ---------------------------------------------------------------------
--- Useful #defines
---
-
-mkJMP_, mkFN_, mkIF_ :: SDoc -> SDoc
-
-mkJMP_ i = text "JMP_" <> parens i
-mkFN_  i = text "FN_"  <> parens i -- externally visible function
-mkIF_  i = text "IF_"  <> parens i -- locally visible
-
--- from includes/Stg.h
---
-mkC_,mkW_,mkP_ :: SDoc
-
-mkC_  = text "(C_)"        -- StgChar
-mkW_  = text "(W_)"        -- StgWord
-mkP_  = text "(P_)"        -- StgWord*
-
--- ---------------------------------------------------------------------
---
--- Assignments
---
--- Generating assignments is what we're all about, here
---
-pprAssign :: DynFlags -> CmmReg -> CmmExpr -> SDoc
-
--- dest is a reg, rhs is a reg
-pprAssign _ r1 (CmmReg r2)
-   | isPtrReg r1 && isPtrReg r2
-   = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, semi ]
-
--- dest is a reg, rhs is a CmmRegOff
-pprAssign dflags r1 (CmmRegOff r2 off)
-   | isPtrReg r1 && isPtrReg r2 && (off `rem` wORD_SIZE dflags == 0)
-   = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, op, int off', semi ]
-  where
-        off1 = off `shiftR` wordShift dflags
-
-        (op,off') | off >= 0  = (char '+', off1)
-                  | otherwise = (char '-', -off1)
-
--- dest is a reg, rhs is anything.
--- We can't cast the lvalue, so we have to cast the rhs if necessary.  Casting
--- the lvalue elicits a warning from new GCC versions (3.4+).
-pprAssign _ r1 r2
-  | isFixedPtrReg r1             = mkAssign (mkP_ <> pprExpr1 r2)
-  | Just ty <- strangeRegType r1 = mkAssign (parens ty <> pprExpr1 r2)
-  | otherwise                    = mkAssign (pprExpr r2)
-    where mkAssign x = if r1 == CmmGlobal BaseReg
-                       then text "ASSIGN_BaseReg" <> parens x <> semi
-                       else pprReg r1 <> text " = " <> x <> semi
-
--- ---------------------------------------------------------------------
--- Registers
-
-pprCastReg :: CmmReg -> SDoc
-pprCastReg reg
-   | isStrangeTypeReg reg = mkW_ <> pprReg reg
-   | otherwise            = pprReg reg
-
--- True if (pprReg reg) will give an expression with type StgPtr.  We
--- need to take care with pointer arithmetic on registers with type
--- StgPtr.
-isFixedPtrReg :: CmmReg -> Bool
-isFixedPtrReg (CmmLocal _) = False
-isFixedPtrReg (CmmGlobal r) = isFixedPtrGlobalReg r
-
--- True if (pprAsPtrReg reg) will give an expression with type StgPtr
--- JD: THIS IS HORRIBLE AND SHOULD BE RENAMED, AT THE VERY LEAST.
--- THE GARBAGE WITH THE VNonGcPtr HELPS MATCH THE OLD CODE GENERATOR'S OUTPUT;
--- I'M NOT SURE IF IT SHOULD REALLY STAY THAT WAY.
-isPtrReg :: CmmReg -> Bool
-isPtrReg (CmmLocal _)                         = False
-isPtrReg (CmmGlobal (VanillaReg _ VGcPtr))    = True  -- if we print via pprAsPtrReg
-isPtrReg (CmmGlobal (VanillaReg _ VNonGcPtr)) = False -- if we print via pprAsPtrReg
-isPtrReg (CmmGlobal reg)                      = isFixedPtrGlobalReg reg
-
--- True if this global reg has type StgPtr
-isFixedPtrGlobalReg :: GlobalReg -> Bool
-isFixedPtrGlobalReg Sp    = True
-isFixedPtrGlobalReg Hp    = True
-isFixedPtrGlobalReg HpLim = True
-isFixedPtrGlobalReg SpLim = True
-isFixedPtrGlobalReg _     = False
-
--- True if in C this register doesn't have the type given by
--- (machRepCType (cmmRegType reg)), so it has to be cast.
-isStrangeTypeReg :: CmmReg -> Bool
-isStrangeTypeReg (CmmLocal _)   = False
-isStrangeTypeReg (CmmGlobal g)  = isStrangeTypeGlobal g
-
-isStrangeTypeGlobal :: GlobalReg -> Bool
-isStrangeTypeGlobal CCCS                = True
-isStrangeTypeGlobal CurrentTSO          = True
-isStrangeTypeGlobal CurrentNursery      = True
-isStrangeTypeGlobal BaseReg             = True
-isStrangeTypeGlobal r                   = isFixedPtrGlobalReg r
-
-strangeRegType :: CmmReg -> Maybe SDoc
-strangeRegType (CmmGlobal CCCS) = Just (text "struct CostCentreStack_ *")
-strangeRegType (CmmGlobal CurrentTSO) = Just (text "struct StgTSO_ *")
-strangeRegType (CmmGlobal CurrentNursery) = Just (text "struct bdescr_ *")
-strangeRegType (CmmGlobal BaseReg) = Just (text "struct StgRegTable_ *")
-strangeRegType _ = Nothing
-
--- pprReg just prints the register name.
---
-pprReg :: CmmReg -> SDoc
-pprReg r = case r of
-        CmmLocal  local  -> pprLocalReg local
-        CmmGlobal global -> pprGlobalReg global
-
-pprAsPtrReg :: CmmReg -> SDoc
-pprAsPtrReg (CmmGlobal (VanillaReg n gcp))
-  = WARN( gcp /= VGcPtr, ppr n ) char 'R' <> int n <> text ".p"
-pprAsPtrReg other_reg = pprReg other_reg
-
-pprGlobalReg :: GlobalReg -> SDoc
-pprGlobalReg gr = case gr of
-    VanillaReg n _ -> char 'R' <> int n  <> text ".w"
-        -- pprGlobalReg prints a VanillaReg as a .w regardless
-        -- Example:     R1.w = R1.w & (-0x8UL);
-        --              JMP_(*R1.p);
-    FloatReg   n   -> char 'F' <> int n
-    DoubleReg  n   -> char 'D' <> int n
-    LongReg    n   -> char 'L' <> int n
-    Sp             -> text "Sp"
-    SpLim          -> text "SpLim"
-    Hp             -> text "Hp"
-    HpLim          -> text "HpLim"
-    CCCS           -> text "CCCS"
-    CurrentTSO     -> text "CurrentTSO"
-    CurrentNursery -> text "CurrentNursery"
-    HpAlloc        -> text "HpAlloc"
-    BaseReg        -> text "BaseReg"
-    EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info"
-    GCEnter1       -> text "stg_gc_enter_1"
-    GCFun          -> text "stg_gc_fun"
-    other          -> panic $ "pprGlobalReg: Unsupported register: " ++ show other
-
-pprLocalReg :: LocalReg -> SDoc
-pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq
-
--- -----------------------------------------------------------------------------
--- Foreign Calls
-
-pprCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc
-pprCall ppr_fn cconv results args
-  | not (is_cishCC cconv)
-  = panic $ "pprCall: unknown calling convention"
-
-  | otherwise
-  =
-    ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi
-  where
-     ppr_assign []           rhs = rhs
-     ppr_assign [(one,hint)] rhs
-         = pprLocalReg one <> text " = "
-                 <> pprUnHint hint (localRegType one) <> rhs
-     ppr_assign _other _rhs = panic "pprCall: multiple results"
-
-     pprArg (expr, AddrHint)
-        = cCast (text "void *") expr
-        -- see comment by machRepHintCType below
-     pprArg (expr, SignedHint)
-        = sdocWithDynFlags $ \dflags ->
-          cCast (machRep_S_CType $ typeWidth $ cmmExprType dflags expr) expr
-     pprArg (expr, _other)
-        = pprExpr expr
-
-     pprUnHint AddrHint   rep = parens (machRepCType rep)
-     pprUnHint SignedHint rep = parens (machRepCType rep)
-     pprUnHint _          _   = empty
-
--- Currently we only have these two calling conventions, but this might
--- change in the future...
-is_cishCC :: CCallConv -> Bool
-is_cishCC CCallConv    = True
-is_cishCC CApiConv     = True
-is_cishCC StdCallConv  = True
-is_cishCC PrimCallConv = False
-is_cishCC JavaScriptCallConv = False
-
--- ---------------------------------------------------------------------
--- Find and print local and external declarations for a list of
--- Cmm statements.
---
-pprTempAndExternDecls :: [CmmBlock] -> (SDoc{-temps-}, SDoc{-externs-})
-pprTempAndExternDecls stmts
-  = (pprUFM (getUniqSet temps) (vcat . map pprTempDecl),
-     vcat (map pprExternDecl (Map.keys lbls)))
-  where (temps, lbls) = runTE (mapM_ te_BB stmts)
-
-pprDataExterns :: [CmmStatic] -> SDoc
-pprDataExterns statics
-  = vcat (map pprExternDecl (Map.keys lbls))
-  where (_, lbls) = runTE (mapM_ te_Static statics)
-
-pprTempDecl :: LocalReg -> SDoc
-pprTempDecl l@(LocalReg _ rep)
-  = hcat [ machRepCType rep, space, pprLocalReg l, semi ]
-
-pprExternDecl :: CLabel -> SDoc
-pprExternDecl lbl
-  -- do not print anything for "known external" things
-  | not (needsCDecl lbl) = empty
-  | Just sz <- foreignLabelStdcallInfo lbl = stdcall_decl sz
-  | otherwise =
-        hcat [ visibility, label_type lbl , lparen, ppr lbl, text ");"
-             -- occasionally useful to see label type
-             -- , text "/* ", pprDebugCLabel lbl, text " */"
-             ]
- where
-  label_type lbl | isBytesLabel lbl         = text "B_"
-                 | isForeignLabel lbl && isCFunctionLabel lbl
-                                            = text "FF_"
-                 | isCFunctionLabel lbl     = text "F_"
-                 | isStaticClosureLabel lbl = text "C_"
-                 -- generic .rodata labels
-                 | isSomeRODataLabel lbl    = text "RO_"
-                 -- generic .data labels (common case)
-                 | otherwise                = text "RW_"
-
-  visibility
-     | externallyVisibleCLabel lbl = char 'E'
-     | otherwise                   = char 'I'
-
-  -- If the label we want to refer to is a stdcall function (on Windows) then
-  -- we must generate an appropriate prototype for it, so that the C compiler will
-  -- add the @n suffix to the label (#2276)
-  stdcall_decl sz = sdocWithDynFlags $ \dflags ->
-        text "extern __attribute__((stdcall)) void " <> ppr lbl
-        <> parens (commafy (replicate (sz `quot` wORD_SIZE dflags) (machRep_U_CType (wordWidth dflags))))
-        <> semi
-
-type TEState = (UniqSet LocalReg, Map CLabel ())
-newtype TE a = TE { unTE :: TEState -> (a, TEState) } deriving (Functor)
-
-instance Applicative TE where
-      pure a = TE $ \s -> (a, s)
-      (<*>) = ap
-
-instance Monad TE where
-   TE m >>= k  = TE $ \s -> case m s of (a, s') -> unTE (k a) s'
-
-te_lbl :: CLabel -> TE ()
-te_lbl lbl = TE $ \(temps,lbls) -> ((), (temps, Map.insert lbl () lbls))
-
-te_temp :: LocalReg -> TE ()
-te_temp r = TE $ \(temps,lbls) -> ((), (addOneToUniqSet temps r, lbls))
-
-runTE :: TE () -> TEState
-runTE (TE m) = snd (m (emptyUniqSet, Map.empty))
-
-te_Static :: CmmStatic -> TE ()
-te_Static (CmmStaticLit lit) = te_Lit lit
-te_Static _ = return ()
-
-te_BB :: CmmBlock -> TE ()
-te_BB block = mapM_ te_Stmt (blockToList mid) >> te_Stmt last
-  where (_, mid, last) = blockSplit block
-
-te_Lit :: CmmLit -> TE ()
-te_Lit (CmmLabel l) = te_lbl l
-te_Lit (CmmLabelOff l _) = te_lbl l
-te_Lit (CmmLabelDiffOff l1 _ _ _) = te_lbl l1
-te_Lit _ = return ()
-
-te_Stmt :: CmmNode e x -> TE ()
-te_Stmt (CmmAssign r e)         = te_Reg r >> te_Expr e
-te_Stmt (CmmStore l r)          = te_Expr l >> te_Expr r
-te_Stmt (CmmUnsafeForeignCall target rs es)
-  = do  te_Target target
-        mapM_ te_temp rs
-        mapM_ te_Expr es
-te_Stmt (CmmCondBranch e _ _ _) = te_Expr e
-te_Stmt (CmmSwitch e _)         = te_Expr e
-te_Stmt (CmmCall { cml_target = e }) = te_Expr e
-te_Stmt _                       = return ()
-
-te_Target :: ForeignTarget -> TE ()
-te_Target (ForeignTarget e _)      = te_Expr e
-te_Target (PrimTarget{})           = return ()
-
-te_Expr :: CmmExpr -> TE ()
-te_Expr (CmmLit lit)            = te_Lit lit
-te_Expr (CmmLoad e _)           = te_Expr e
-te_Expr (CmmReg r)              = te_Reg r
-te_Expr (CmmMachOp _ es)        = mapM_ te_Expr es
-te_Expr (CmmRegOff r _)         = te_Reg r
-te_Expr (CmmStackSlot _ _)      = panic "te_Expr: CmmStackSlot not supported!"
-
-te_Reg :: CmmReg -> TE ()
-te_Reg (CmmLocal l) = te_temp l
-te_Reg _            = return ()
-
-
--- ---------------------------------------------------------------------
--- C types for MachReps
-
-cCast :: SDoc -> CmmExpr -> SDoc
-cCast ty expr = parens ty <> pprExpr1 expr
-
-cLoad :: CmmExpr -> CmmType -> SDoc
-cLoad expr rep
-    = sdocWithPlatform $ \platform ->
-      if bewareLoadStoreAlignment (platformArch platform)
-      then let decl = machRepCType rep <+> text "x" <> semi
-               struct = text "struct" <+> braces (decl)
-               packed_attr = text "__attribute__((packed))"
-               cast = parens (struct <+> packed_attr <> char '*')
-           in parens (cast <+> pprExpr1 expr) <> text "->x"
-      else char '*' <> parens (cCast (machRepPtrCType rep) expr)
-    where -- On these platforms, unaligned loads are known to cause problems
-          bewareLoadStoreAlignment ArchAlpha    = True
-          bewareLoadStoreAlignment ArchMipseb   = True
-          bewareLoadStoreAlignment ArchMipsel   = True
-          bewareLoadStoreAlignment (ArchARM {}) = True
-          bewareLoadStoreAlignment ArchARM64    = True
-          bewareLoadStoreAlignment ArchSPARC    = True
-          bewareLoadStoreAlignment ArchSPARC64  = True
-          -- Pessimistically assume that they will also cause problems
-          -- on unknown arches
-          bewareLoadStoreAlignment ArchUnknown  = True
-          bewareLoadStoreAlignment _            = False
-
-isCmmWordType :: DynFlags -> CmmType -> Bool
--- True of GcPtrReg/NonGcReg of native word size
-isCmmWordType dflags ty = not (isFloatType ty)
-                       && typeWidth ty == wordWidth dflags
-
--- This is for finding the types of foreign call arguments.  For a pointer
--- argument, we always cast the argument to (void *), to avoid warnings from
--- the C compiler.
-machRepHintCType :: CmmType -> ForeignHint -> SDoc
-machRepHintCType _   AddrHint   = text "void *"
-machRepHintCType rep SignedHint = machRep_S_CType (typeWidth rep)
-machRepHintCType rep _other     = machRepCType rep
-
-machRepPtrCType :: CmmType -> SDoc
-machRepPtrCType r
- = sdocWithDynFlags $ \dflags ->
-   if isCmmWordType dflags r then text "P_"
-                             else machRepCType r <> char '*'
-
-machRepCType :: CmmType -> SDoc
-machRepCType ty | isFloatType ty = machRep_F_CType w
-                | otherwise      = machRep_U_CType w
-                where
-                  w = typeWidth ty
-
-machRep_F_CType :: Width -> SDoc
-machRep_F_CType W32 = text "StgFloat" -- ToDo: correct?
-machRep_F_CType W64 = text "StgDouble"
-machRep_F_CType _   = panic "machRep_F_CType"
-
-machRep_U_CType :: Width -> SDoc
-machRep_U_CType w
- = sdocWithDynFlags $ \dflags ->
-   case w of
-   _ | w == wordWidth dflags -> text "W_"
-   W8  -> text "StgWord8"
-   W16 -> text "StgWord16"
-   W32 -> text "StgWord32"
-   W64 -> text "StgWord64"
-   _   -> panic "machRep_U_CType"
-
-machRep_S_CType :: Width -> SDoc
-machRep_S_CType w
- = sdocWithDynFlags $ \dflags ->
-   case w of
-   _ | w == wordWidth dflags -> text "I_"
-   W8  -> text "StgInt8"
-   W16 -> text "StgInt16"
-   W32 -> text "StgInt32"
-   W64 -> text "StgInt64"
-   _   -> panic "machRep_S_CType"
-
-
--- ---------------------------------------------------------------------
--- print strings as valid C strings
-
-pprStringInCStyle :: ByteString -> SDoc
-pprStringInCStyle s = doubleQuotes (text (concatMap charToC (BS.unpack s)))
-
--- ---------------------------------------------------------------------------
--- Initialising static objects with floating-point numbers.  We can't
--- just emit the floating point number, because C will cast it to an int
--- by rounding it.  We want the actual bit-representation of the float.
---
--- Consider a concrete C example:
---    double d = 2.5e-10;
---    float f  = 2.5e-10f;
---
---    int * i2 = &d;      printf ("i2: %08X %08X\n", i2[0], i2[1]);
---    long long * l = &d; printf (" l: %016llX\n",   l[0]);
---    int * i = &f;       printf (" i: %08X\n",      i[0]);
--- Result on 64-bit LE (x86_64):
---     i2: E826D695 3DF12E0B
---      l: 3DF12E0BE826D695
---      i: 2F89705F
--- Result on 32-bit BE (m68k):
---     i2: 3DF12E0B E826D695
---      l: 3DF12E0BE826D695
---      i: 2F89705F
---
--- The trick here is to notice that binary representation does not
--- change much: only Word32 values get swapped on LE hosts / targets.
-
--- This is a hack to turn the floating point numbers into ints that we
--- can safely initialise to static locations.
-
-castFloatToWord32Array :: STUArray s Int Float -> ST s (STUArray s Int Word32)
-castFloatToWord32Array = U.castSTUArray
-
-castDoubleToWord64Array :: STUArray s Int Double -> ST s (STUArray s Int Word64)
-castDoubleToWord64Array = U.castSTUArray
-
-floatToWord :: DynFlags -> Rational -> CmmLit
-floatToWord dflags r
-  = runST (do
-        arr <- newArray_ ((0::Int),0)
-        writeArray arr 0 (fromRational r)
-        arr' <- castFloatToWord32Array arr
-        w32 <- readArray arr' 0
-        return (CmmInt (toInteger w32 `shiftL` wo) (wordWidth dflags))
-    )
-    where wo | wordWidth dflags == W64
-             , wORDS_BIGENDIAN dflags    = 32
-             | otherwise                 = 0
-
-floatPairToWord :: DynFlags -> Rational -> Rational -> CmmLit
-floatPairToWord dflags r1 r2
-  = runST (do
-        arr <- newArray_ ((0::Int),1)
-        writeArray arr 0 (fromRational r1)
-        writeArray arr 1 (fromRational r2)
-        arr' <- castFloatToWord32Array arr
-        w32_1 <- readArray arr' 0
-        w32_2 <- readArray arr' 1
-        return (pprWord32Pair w32_1 w32_2)
-    )
-    where pprWord32Pair w32_1 w32_2
-              | wORDS_BIGENDIAN dflags =
-                  CmmInt ((shiftL i1 32) .|. i2) W64
-              | otherwise =
-                  CmmInt ((shiftL i2 32) .|. i1) W64
-              where i1 = toInteger w32_1
-                    i2 = toInteger w32_2
-
-doubleToWords :: DynFlags -> Rational -> [CmmLit]
-doubleToWords dflags r
-  = runST (do
-        arr <- newArray_ ((0::Int),1)
-        writeArray arr 0 (fromRational r)
-        arr' <- castDoubleToWord64Array arr
-        w64 <- readArray arr' 0
-        return (pprWord64 w64)
-    )
-    where targetWidth = wordWidth dflags
-          targetBE    = wORDS_BIGENDIAN dflags
-          pprWord64 w64
-              | targetWidth == W64 =
-                  [ CmmInt (toInteger w64) targetWidth ]
-              | targetWidth == W32 =
-                  [ CmmInt (toInteger targetW1) targetWidth
-                  , CmmInt (toInteger targetW2) targetWidth
-                  ]
-              | otherwise = panic "doubleToWords.pprWord64"
-              where (targetW1, targetW2)
-                        | targetBE  = (wHi, wLo)
-                        | otherwise = (wLo, wHi)
-                    wHi = w64 `shiftR` 32
-                    wLo = w64 .&. 0xFFFFffff
-
--- ---------------------------------------------------------------------------
--- Utils
-
-wordShift :: DynFlags -> Int
-wordShift dflags = widthInLog (wordWidth dflags)
-
-commafy :: [SDoc] -> SDoc
-commafy xs = hsep $ punctuate comma xs
-
--- Print in C hex format: 0x13fa
-pprHexVal :: Integer -> Width -> SDoc
-pprHexVal w rep
-  | w < 0     = parens (char '-' <>
-                    text "0x" <> intToDoc (-w) <> repsuffix rep)
-  | otherwise =     text "0x" <> intToDoc   w  <> repsuffix rep
-  where
-        -- type suffix for literals:
-        -- Integer literals are unsigned in Cmm/C.  We explicitly cast to
-        -- signed values for doing signed operations, but at all other
-        -- times values are unsigned.  This also helps eliminate occasional
-        -- warnings about integer overflow from gcc.
-
-      repsuffix W64 = sdocWithDynFlags $ \dflags ->
-               if cINT_SIZE       dflags == 8 then char 'U'
-          else if cLONG_SIZE      dflags == 8 then text "UL"
-          else if cLONG_LONG_SIZE dflags == 8 then text "ULL"
-          else panic "pprHexVal: Can't find a 64-bit type"
-      repsuffix _ = char 'U'
-
-      intToDoc :: Integer -> SDoc
-      intToDoc i = case truncInt i of
-                       0 -> char '0'
-                       v -> go v
-
-      -- We need to truncate value as Cmm backend does not drop
-      -- redundant bits to ease handling of negative values.
-      -- Thus the following Cmm code on 64-bit arch, like amd64:
-      --     CInt v;
-      --     v = {something};
-      --     if (v == %lobits32(-1)) { ...
-      -- leads to the following C code:
-      --     StgWord64 v = (StgWord32)({something});
-      --     if (v == 0xFFFFffffFFFFffffU) { ...
-      -- Such code is incorrect as it promotes both operands to StgWord64
-      -- and the whole condition is always false.
-      truncInt :: Integer -> Integer
-      truncInt i =
-          case rep of
-              W8  -> i `rem` (2^(8 :: Int))
-              W16 -> i `rem` (2^(16 :: Int))
-              W32 -> i `rem` (2^(32 :: Int))
-              W64 -> i `rem` (2^(64 :: Int))
-              _   -> panic ("pprHexVal/truncInt: C backend can't encode "
-                            ++ show rep ++ " literals")
-
-      go 0 = empty
-      go w' = go q <> dig
-           where
-             (q,r) = w' `quotRem` 16
-             dig | r < 10    = char (chr (fromInteger r + ord '0'))
-                 | otherwise = char (chr (fromInteger r - 10 + ord 'a'))
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
deleted file mode 100644
index 397a666022..0000000000
--- a/compiler/cmm/PprCmm.hs
+++ /dev/null
@@ -1,309 +0,0 @@
-{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts, FlexibleInstances #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-
-----------------------------------------------------------------------------
---
--- Pretty-printing of Cmm as (a superset of) C--
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
---
--- This is where we walk over CmmNode emitting an external representation,
--- suitable for parsing, in a syntax strongly reminiscent of C--. This
--- is the "External Core" for the Cmm layer.
---
--- As such, this should be a well-defined syntax: we want it to look nice.
--- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
--- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
--- than C--'s bits8 .. bits64.
---
--- We try to ensure that all information available in the abstract
--- syntax is reproduced, or reproducible, in the concrete syntax.
--- Data that is not in printed out can be reconstructed according to
--- conventions used in the pretty printer. There are at least two such
--- cases:
---      1) if a value has wordRep type, the type is not appended in the
---      output.
---      2) MachOps that operate over wordRep type are printed in a
---      C-style, rather than as their internal MachRep name.
---
--- These conventions produce much more readable Cmm output.
---
--- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
-
-module PprCmm
-  ( module PprCmmDecl
-  , module PprCmmExpr
-  )
-where
-
-import GhcPrelude hiding (succ)
-
-import CLabel
-import Cmm
-import CmmUtils
-import CmmSwitch
-import DynFlags
-import FastString
-import Outputable
-import PprCmmDecl
-import PprCmmExpr
-import Util
-
-import BasicTypes
-import Hoopl.Block
-import Hoopl.Graph
-
--------------------------------------------------
--- Outputable instances
-
-instance Outputable CmmStackInfo where
-    ppr = pprStackInfo
-
-instance Outputable CmmTopInfo where
-    ppr = pprTopInfo
-
-
-instance Outputable (CmmNode e x) where
-    ppr = pprNode
-
-instance Outputable Convention where
-    ppr = pprConvention
-
-instance Outputable ForeignConvention where
-    ppr = pprForeignConvention
-
-instance Outputable ForeignTarget where
-    ppr = pprForeignTarget
-
-instance Outputable CmmReturnInfo where
-    ppr = pprReturnInfo
-
-instance Outputable (Block CmmNode C C) where
-    ppr = pprBlock
-instance Outputable (Block CmmNode C O) where
-    ppr = pprBlock
-instance Outputable (Block CmmNode O C) where
-    ppr = pprBlock
-instance Outputable (Block CmmNode O O) where
-    ppr = pprBlock
-
-instance Outputable (Graph CmmNode e x) where
-    ppr = pprGraph
-
-instance Outputable CmmGraph where
-    ppr = pprCmmGraph
-
-----------------------------------------------------------
--- Outputting types Cmm contains
-
-pprStackInfo :: CmmStackInfo -> SDoc
-pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) =
-  text "arg_space: " <> ppr arg_space <+>
-  text "updfr_space: " <> ppr updfr_space
-
-pprTopInfo :: CmmTopInfo -> SDoc
-pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) =
-  vcat [text "info_tbls: " <> ppr info_tbl,
-        text "stack_info: " <> ppr stack_info]
-
-----------------------------------------------------------
--- Outputting blocks and graphs
-
-pprBlock :: IndexedCO x SDoc SDoc ~ SDoc
-         => Block CmmNode e x -> IndexedCO e SDoc SDoc
-pprBlock block
-    = foldBlockNodesB3 ( ($$) . ppr
-                       , ($$) . (nest 4) . ppr
-                       , ($$) . (nest 4) . ppr
-                       )
-                       block
-                       empty
-
-pprGraph :: Graph CmmNode e x -> SDoc
-pprGraph GNil = empty
-pprGraph (GUnit block) = ppr block
-pprGraph (GMany entry body exit)
-   = text "{"
-  $$ nest 2 (pprMaybeO entry $$ (vcat $ map ppr $ bodyToBlockList body) $$ pprMaybeO exit)
-  $$ text "}"
-  where pprMaybeO :: Outputable (Block CmmNode e x)
-                  => MaybeO ex (Block CmmNode e x) -> SDoc
-        pprMaybeO NothingO = empty
-        pprMaybeO (JustO block) = ppr block
-
-pprCmmGraph :: CmmGraph -> SDoc
-pprCmmGraph g
-   = text "{" <> text "offset"
-  $$ nest 2 (vcat $ map ppr blocks)
-  $$ text "}"
-  where blocks = revPostorder g
-    -- revPostorder has the side-effect of discarding unreachable code,
-    -- so pretty-printed Cmm will omit any unreachable blocks.  This can
-    -- sometimes be confusing.
-
----------------------------------------------
--- Outputting CmmNode and types which it contains
-
-pprConvention :: Convention -> SDoc
-pprConvention (NativeNodeCall   {}) = text "<native-node-call-convention>"
-pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>"
-pprConvention (NativeReturn {})     = text "<native-ret-convention>"
-pprConvention  Slow                 = text "<slow-convention>"
-pprConvention  GC                   = text "<gc-convention>"
-
-pprForeignConvention :: ForeignConvention -> SDoc
-pprForeignConvention (ForeignConvention c args res ret) =
-          doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret
-
-pprReturnInfo :: CmmReturnInfo -> SDoc
-pprReturnInfo CmmMayReturn = empty
-pprReturnInfo CmmNeverReturns = text "never returns"
-
-pprForeignTarget :: ForeignTarget -> SDoc
-pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn
-  where
-        ppr_target :: CmmExpr -> SDoc
-        ppr_target t@(CmmLit _) = ppr t
-        ppr_target fn'          = parens (ppr fn')
-
-pprForeignTarget (PrimTarget op)
- -- HACK: We're just using a ForeignLabel to get this printed, the label
- --       might not really be foreign.
- = ppr
-               (CmmLabel (mkForeignLabel
-                         (mkFastString (show op))
-                         Nothing ForeignLabelInThisPackage IsFunction))
-
-pprNode :: CmmNode e x -> SDoc
-pprNode node = pp_node <+> pp_debug
-  where
-    pp_node :: SDoc
-    pp_node = sdocWithDynFlags $ \dflags -> case node of
-      -- label:
-      CmmEntry id tscope -> lbl <> colon <+>
-         (sdocWithDynFlags $ \dflags ->
-           ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope))
-          where
-            lbl = if gopt Opt_SuppressUniques dflags
-                then text "_lbl_"
-                else ppr id
-
-      -- // text
-      CmmComment s -> text "//" <+> ftext s
-
-      -- //tick bla<...>
-      CmmTick t -> ppUnless (gopt Opt_SuppressTicks dflags) $
-                   text "//tick" <+> ppr t
-
-      -- unwind reg = expr;
-      CmmUnwind regs ->
-          text "unwind "
-          <> commafy (map (\(r,e) -> ppr r <+> char '=' <+> ppr e) regs) <> semi
-
-      -- reg = expr;
-      CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi
-
-      -- rep[lv] = expr;
-      CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi
-          where
-            rep = sdocWithDynFlags $ \dflags ->
-                  ppr ( cmmExprType dflags expr )
-
-      -- call "ccall" foo(x, y)[r1, r2];
-      -- ToDo ppr volatile
-      CmmUnsafeForeignCall target results args ->
-          hsep [ ppUnless (null results) $
-                    parens (commafy $ map ppr results) <+> equals,
-                 text "call",
-                 ppr target <> parens (commafy $ map ppr args) <> semi]
-
-      -- goto label;
-      CmmBranch ident -> text "goto" <+> ppr ident <> semi
-
-      -- if (expr) goto t; else goto f;
-      CmmCondBranch expr t f l ->
-          hsep [ text "if"
-               , parens(ppr expr)
-               , case l of
-                   Nothing -> empty
-                   Just b -> parens (text "likely:" <+> ppr b)
-               , text "goto"
-               , ppr t <> semi
-               , text "else goto"
-               , ppr f <> semi
-               ]
-
-      CmmSwitch expr ids ->
-          hang (hsep [ text "switch"
-                     , range
-                     , if isTrivialCmmExpr expr
-                       then ppr expr
-                       else parens (ppr expr)
-                     , text "{"
-                     ])
-             4 (vcat (map ppCase cases) $$ def) $$ rbrace
-          where
-            (cases, mbdef) = switchTargetsFallThrough ids
-            ppCase (is,l) = hsep
-                            [ text "case"
-                            , commafy $ map integer is
-                            , text ": goto"
-                            , ppr l <> semi
-                            ]
-            def | Just l <- mbdef = hsep
-                            [ text "default:"
-                            , braces (text "goto" <+> ppr l <> semi)
-                            ]
-                | otherwise = empty
-
-            range = brackets $ hsep [integer lo, text "..", integer hi]
-              where (lo,hi) = switchTargetsRange ids
-
-      CmmCall tgt k regs out res updfr_off ->
-          hcat [ text "call", space
-               , pprFun tgt, parens (interpp'SP regs), space
-               , returns <+>
-                 text "args: " <> ppr out <> comma <+>
-                 text "res: " <> ppr res <> comma <+>
-                 text "upd: " <> ppr updfr_off
-               , semi ]
-          where pprFun f@(CmmLit _) = ppr f
-                pprFun f = parens (ppr f)
-
-                returns
-                  | Just r <- k = text "returns to" <+> ppr r <> comma
-                  | otherwise   = empty
-
-      CmmForeignCall {tgt=t, res=rs, args=as, succ=s, ret_args=a, ret_off=u, intrbl=i} ->
-          hcat $ if i then [text "interruptible", space] else [] ++
-               [ text "foreign call", space
-               , ppr t, text "(...)", space
-               , text "returns to" <+> ppr s
-                    <+> text "args:" <+> parens (ppr as)
-                    <+> text "ress:" <+> parens (ppr rs)
-               , text "ret_args:" <+> ppr a
-               , text "ret_off:" <+> ppr u
-               , semi ]
-
-    pp_debug :: SDoc
-    pp_debug =
-      if not debugIsOn then empty
-      else case node of
-             CmmEntry {}             -> empty -- Looks terrible with text "  // CmmEntry"
-             CmmComment {}           -> empty -- Looks also terrible with text "  // CmmComment"
-             CmmTick {}              -> empty
-             CmmUnwind {}            -> text "  // CmmUnwind"
-             CmmAssign {}            -> text "  // CmmAssign"
-             CmmStore {}             -> text "  // CmmStore"
-             CmmUnsafeForeignCall {} -> text "  // CmmUnsafeForeignCall"
-             CmmBranch {}            -> text "  // CmmBranch"
-             CmmCondBranch {}        -> text "  // CmmCondBranch"
-             CmmSwitch {}            -> text "  // CmmSwitch"
-             CmmCall {}              -> text "  // CmmCall"
-             CmmForeignCall {}       -> text "  // CmmForeignCall"
-
-    commafy :: [SDoc] -> SDoc
-    commafy xs = hsep $ punctuate comma xs
diff --git a/compiler/cmm/PprCmmDecl.hs b/compiler/cmm/PprCmmDecl.hs
deleted file mode 100644
index e54abdc8b6..0000000000
--- a/compiler/cmm/PprCmmDecl.hs
+++ /dev/null
@@ -1,169 +0,0 @@
-----------------------------------------------------------------------------
---
--- Pretty-printing of common Cmm types
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
---
--- This is where we walk over Cmm emitting an external representation,
--- suitable for parsing, in a syntax strongly reminiscent of C--. This
--- is the "External Core" for the Cmm layer.
---
--- As such, this should be a well-defined syntax: we want it to look nice.
--- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
--- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
--- than C--'s bits8 .. bits64.
---
--- We try to ensure that all information available in the abstract
--- syntax is reproduced, or reproducible, in the concrete syntax.
--- Data that is not in printed out can be reconstructed according to
--- conventions used in the pretty printer. There are at least two such
--- cases:
---      1) if a value has wordRep type, the type is not appended in the
---      output.
---      2) MachOps that operate over wordRep type are printed in a
---      C-style, rather than as their internal MachRep name.
---
--- These conventions produce much more readable Cmm output.
---
--- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
---
-
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-module PprCmmDecl
-    ( writeCmms, pprCmms, pprCmmGroup, pprSection, pprStatic
-    )
-where
-
-import GhcPrelude
-
-import PprCmmExpr
-import Cmm
-
-import DynFlags
-import Outputable
-import FastString
-
-import Data.List
-import System.IO
-
-import qualified Data.ByteString as BS
-
-
-pprCmms :: (Outputable info, Outputable g)
-        => [GenCmmGroup CmmStatics info g] -> SDoc
-pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
-        where
-          separator = space $$ text "-------------------" $$ space
-
-writeCmms :: (Outputable info, Outputable g)
-          => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO ()
-writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms)
-
------------------------------------------------------------------------------
-
-instance (Outputable d, Outputable info, Outputable i)
-      => Outputable (GenCmmDecl d info i) where
-    ppr t = pprTop t
-
-instance Outputable CmmStatics where
-    ppr = pprStatics
-
-instance Outputable CmmStatic where
-    ppr = pprStatic
-
-instance Outputable CmmInfoTable where
-    ppr = pprInfoTable
-
-
------------------------------------------------------------------------------
-
-pprCmmGroup :: (Outputable d, Outputable info, Outputable g)
-            => GenCmmGroup d info g -> SDoc
-pprCmmGroup tops
-    = vcat $ intersperse blankLine $ map pprTop tops
-
--- --------------------------------------------------------------------------
--- Top level `procedure' blocks.
---
-pprTop :: (Outputable d, Outputable info, Outputable i)
-       => GenCmmDecl d info i -> SDoc
-
-pprTop (CmmProc info lbl live graph)
-
-  = vcat [ ppr lbl <> lparen <> rparen <+> lbrace <+> text "// " <+> ppr live
-         , nest 8 $ lbrace <+> ppr info $$ rbrace
-         , nest 4 $ ppr graph
-         , rbrace ]
-
--- --------------------------------------------------------------------------
--- We follow [1], 4.5
---
---      section "data" { ... }
---
-pprTop (CmmData section ds) =
-    (hang (pprSection section <+> lbrace) 4 (ppr ds))
-    $$ rbrace
-
--- --------------------------------------------------------------------------
--- Info tables.
-
-pprInfoTable :: CmmInfoTable -> SDoc
-pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
-                           , cit_prof = prof_info
-                           , cit_srt = srt })
-  = vcat [ text "label: " <> ppr lbl
-         , text "rep: " <> ppr rep
-         , case prof_info of
-             NoProfilingInfo -> empty
-             ProfilingInfo ct cd ->
-               vcat [ text "type: " <> text (show (BS.unpack ct))
-                    , text "desc: " <> text (show (BS.unpack cd)) ]
-         , text "srt: " <> ppr srt ]
-
-instance Outputable ForeignHint where
-  ppr NoHint     = empty
-  ppr SignedHint = quotes(text "signed")
---  ppr AddrHint   = quotes(text "address")
--- Temp Jan08
-  ppr AddrHint   = (text "PtrHint")
-
--- --------------------------------------------------------------------------
--- Static data.
---      Strings are printed as C strings, and we print them as I8[],
---      following C--
---
-pprStatics :: CmmStatics -> SDoc
-pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
-
-pprStatic :: CmmStatic -> SDoc
-pprStatic s = case s of
-    CmmStaticLit lit   -> nest 4 $ text "const" <+> pprLit lit <> semi
-    CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
-    CmmString s'       -> nest 4 $ text "I8[]" <+> text (show s')
-
--- --------------------------------------------------------------------------
--- data sections
---
-pprSection :: Section -> SDoc
-pprSection (Section t suffix) =
-  section <+> doubleQuotes (pprSectionType t <+> char '.' <+> ppr suffix)
-  where
-    section = text "section"
-
-pprSectionType :: SectionType -> SDoc
-pprSectionType s = doubleQuotes (ptext t)
- where
-  t = case s of
-    Text              -> sLit "text"
-    Data              -> sLit "data"
-    ReadOnlyData      -> sLit "readonly"
-    ReadOnlyData16    -> sLit "readonly16"
-    RelocatableReadOnlyData
-                      -> sLit "relreadonly"
-    UninitialisedData -> sLit "uninitialised"
-    CString           -> sLit "cstring"
-    OtherSection s'   -> sLit s' -- Not actually a literal though.
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
deleted file mode 100644
index 7bf73f1ca6..0000000000
--- a/compiler/cmm/PprCmmExpr.hs
+++ /dev/null
@@ -1,286 +0,0 @@
-----------------------------------------------------------------------------
---
--- Pretty-printing of common Cmm types
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
---
--- This is where we walk over Cmm emitting an external representation,
--- suitable for parsing, in a syntax strongly reminiscent of C--. This
--- is the "External Core" for the Cmm layer.
---
--- As such, this should be a well-defined syntax: we want it to look nice.
--- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
--- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
--- than C--'s bits8 .. bits64.
---
--- We try to ensure that all information available in the abstract
--- syntax is reproduced, or reproducible, in the concrete syntax.
--- Data that is not in printed out can be reconstructed according to
--- conventions used in the pretty printer. There are at least two such
--- cases:
---      1) if a value has wordRep type, the type is not appended in the
---      output.
---      2) MachOps that operate over wordRep type are printed in a
---      C-style, rather than as their internal MachRep name.
---
--- These conventions produce much more readable Cmm output.
---
--- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
---
-
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-module PprCmmExpr
-    ( pprExpr, pprLit
-    )
-where
-
-import GhcPrelude
-
-import CmmExpr
-
-import Outputable
-import DynFlags
-
-import Data.Maybe
-import Numeric ( fromRat )
-
------------------------------------------------------------------------------
-
-instance Outputable CmmExpr where
-    ppr e = pprExpr e
-
-instance Outputable CmmReg where
-    ppr e = pprReg e
-
-instance Outputable CmmLit where
-    ppr l = pprLit l
-
-instance Outputable LocalReg where
-    ppr e = pprLocalReg e
-
-instance Outputable Area where
-    ppr e = pprArea e
-
-instance Outputable GlobalReg where
-    ppr e = pprGlobalReg e
-
--- --------------------------------------------------------------------------
--- Expressions
---
-
-pprExpr :: CmmExpr -> SDoc
-pprExpr e
-    = sdocWithDynFlags $ \dflags ->
-      case e of
-        CmmRegOff reg i ->
-                pprExpr (CmmMachOp (MO_Add rep)
-                           [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)])
-                where rep = typeWidth (cmmRegType dflags reg)
-        CmmLit lit -> pprLit lit
-        _other     -> pprExpr1 e
-
--- Here's the precedence table from CmmParse.y:
--- %nonassoc '>=' '>' '<=' '<' '!=' '=='
--- %left '|'
--- %left '^'
--- %left '&'
--- %left '>>' '<<'
--- %left '-' '+'
--- %left '/' '*' '%'
--- %right '~'
-
--- We just cope with the common operators for now, the rest will get
--- a default conservative behaviour.
-
--- %nonassoc '>=' '>' '<=' '<' '!=' '=='
-pprExpr1, pprExpr7, pprExpr8 :: CmmExpr -> SDoc
-pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op
-   = pprExpr7 x <+> doc <+> pprExpr7 y
-pprExpr1 e = pprExpr7 e
-
-infixMachOp1, infixMachOp7, infixMachOp8 :: MachOp -> Maybe SDoc
-
-infixMachOp1 (MO_Eq     _) = Just (text "==")
-infixMachOp1 (MO_Ne     _) = Just (text "!=")
-infixMachOp1 (MO_Shl    _) = Just (text "<<")
-infixMachOp1 (MO_U_Shr  _) = Just (text ">>")
-infixMachOp1 (MO_U_Ge   _) = Just (text ">=")
-infixMachOp1 (MO_U_Le   _) = Just (text "<=")
-infixMachOp1 (MO_U_Gt   _) = Just (char '>')
-infixMachOp1 (MO_U_Lt   _) = Just (char '<')
-infixMachOp1 _             = Nothing
-
--- %left '-' '+'
-pprExpr7 (CmmMachOp (MO_Add rep1) [x, CmmLit (CmmInt i rep2)]) | i < 0
-   = pprExpr7 (CmmMachOp (MO_Sub rep1) [x, CmmLit (CmmInt (negate i) rep2)])
-pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op
-   = pprExpr7 x <+> doc <+> pprExpr8 y
-pprExpr7 e = pprExpr8 e
-
-infixMachOp7 (MO_Add _)  = Just (char '+')
-infixMachOp7 (MO_Sub _)  = Just (char '-')
-infixMachOp7 _           = Nothing
-
--- %left '/' '*' '%'
-pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op
-   = pprExpr8 x <+> doc <+> pprExpr9 y
-pprExpr8 e = pprExpr9 e
-
-infixMachOp8 (MO_U_Quot _) = Just (char '/')
-infixMachOp8 (MO_Mul _)    = Just (char '*')
-infixMachOp8 (MO_U_Rem _)  = Just (char '%')
-infixMachOp8 _             = Nothing
-
-pprExpr9 :: CmmExpr -> SDoc
-pprExpr9 e =
-   case e of
-        CmmLit    lit       -> pprLit1 lit
-        CmmLoad   expr rep  -> ppr rep <> brackets (ppr expr)
-        CmmReg    reg       -> ppr reg
-        CmmRegOff  reg off  -> parens (ppr reg <+> char '+' <+> int off)
-        CmmStackSlot a off  -> parens (ppr a   <+> char '+' <+> int off)
-        CmmMachOp mop args  -> genMachOp mop args
-
-genMachOp :: MachOp -> [CmmExpr] -> SDoc
-genMachOp mop args
-   | Just doc <- infixMachOp mop = case args of
-        -- dyadic
-        [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y
-
-        -- unary
-        [x]   -> doc <> pprExpr9 x
-
-        _     -> pprTrace "PprCmm.genMachOp: machop with strange number of args"
-                          (pprMachOp mop <+>
-                            parens (hcat $ punctuate comma (map pprExpr args)))
-                          empty
-
-   | isJust (infixMachOp1 mop)
-   || isJust (infixMachOp7 mop)
-   || isJust (infixMachOp8 mop)  = parens (pprExpr (CmmMachOp mop args))
-
-   | otherwise = char '%' <> ppr_op <> parens (commafy (map pprExpr args))
-        where ppr_op = text (map (\c -> if c == ' ' then '_' else c)
-                                 (show mop))
-                -- replace spaces in (show mop) with underscores,
-
---
--- Unsigned ops on the word size of the machine get nice symbols.
--- All else get dumped in their ugly format.
---
-infixMachOp :: MachOp -> Maybe SDoc
-infixMachOp mop
-        = case mop of
-            MO_And    _ -> Just $ char '&'
-            MO_Or     _ -> Just $ char '|'
-            MO_Xor    _ -> Just $ char '^'
-            MO_Not    _ -> Just $ char '~'
-            MO_S_Neg  _ -> Just $ char '-' -- there is no unsigned neg :)
-            _ -> Nothing
-
--- --------------------------------------------------------------------------
--- Literals.
---  To minimise line noise we adopt the convention that if the literal
---  has the natural machine word size, we do not append the type
---
-pprLit :: CmmLit -> SDoc
-pprLit lit = sdocWithDynFlags $ \dflags ->
-             case lit of
-    CmmInt i rep ->
-        hcat [ (if i < 0 then parens else id)(integer i)
-             , ppUnless (rep == wordWidth dflags) $
-               space <> dcolon <+> ppr rep ]
-
-    CmmFloat f rep     -> hsep [ double (fromRat f), dcolon, ppr rep ]
-    CmmVec lits        -> char '<' <> commafy (map pprLit lits) <> char '>'
-    CmmLabel clbl      -> ppr clbl
-    CmmLabelOff clbl i -> ppr clbl <> ppr_offset i
-    CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-'
-                                  <> ppr clbl2 <> ppr_offset i
-    CmmBlock id        -> ppr id
-    CmmHighStackMark -> text "<highSp>"
-
-pprLit1 :: CmmLit -> SDoc
-pprLit1 lit@(CmmLabelOff {}) = parens (pprLit lit)
-pprLit1 lit                  = pprLit lit
-
-ppr_offset :: Int -> SDoc
-ppr_offset i
-    | i==0      = empty
-    | i>=0      = char '+' <> int i
-    | otherwise = char '-' <> int (-i)
-
--- --------------------------------------------------------------------------
--- Registers, whether local (temps) or global
---
-pprReg :: CmmReg -> SDoc
-pprReg r
-    = case r of
-        CmmLocal  local  -> pprLocalReg  local
-        CmmGlobal global -> pprGlobalReg global
-
---
--- We only print the type of the local reg if it isn't wordRep
---
-pprLocalReg :: LocalReg -> SDoc
-pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags ->
---   = ppr rep <> char '_' <> ppr uniq
--- Temp Jan08
-    char '_' <> pprUnique dflags uniq <>
-       (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08               -- sigh
-                    then dcolon <> ptr <> ppr rep
-                    else dcolon <> ptr <> ppr rep)
-   where
-     pprUnique dflags unique =
-        if gopt Opt_SuppressUniques dflags
-            then text "_locVar_"
-            else ppr unique
-     ptr = empty
-         --if isGcPtrType rep
-         --      then doubleQuotes (text "ptr")
-         --      else empty
-
--- Stack areas
-pprArea :: Area -> SDoc
-pprArea Old        = text "old"
-pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
-
--- needs to be kept in syn with CmmExpr.hs.GlobalReg
---
-pprGlobalReg :: GlobalReg -> SDoc
-pprGlobalReg gr
-    = case gr of
-        VanillaReg n _ -> char 'R' <> int n
--- Temp Jan08
---        VanillaReg n VNonGcPtr -> char 'R' <> int n
---        VanillaReg n VGcPtr    -> char 'P' <> int n
-        FloatReg   n   -> char 'F' <> int n
-        DoubleReg  n   -> char 'D' <> int n
-        LongReg    n   -> char 'L' <> int n
-        XmmReg     n   -> text "XMM" <> int n
-        YmmReg     n   -> text "YMM" <> int n
-        ZmmReg     n   -> text "ZMM" <> int n
-        Sp             -> text "Sp"
-        SpLim          -> text "SpLim"
-        Hp             -> text "Hp"
-        HpLim          -> text "HpLim"
-        MachSp         -> text "MachSp"
-        UnwindReturnReg-> text "UnwindReturnReg"
-        CCCS           -> text "CCCS"
-        CurrentTSO     -> text "CurrentTSO"
-        CurrentNursery -> text "CurrentNursery"
-        HpAlloc        -> text "HpAlloc"
-        EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info"
-        GCEnter1       -> text "stg_gc_enter_1"
-        GCFun          -> text "stg_gc_fun"
-        BaseReg        -> text "BaseReg"
-        PicBaseReg     -> text "PicBaseReg"
-
------------------------------------------------------------------------------
-
-commafy :: [SDoc] -> SDoc
-commafy xs = fsep $ punctuate comma xs
diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs
deleted file mode 100644
index fe4ed58bfe..0000000000
--- a/compiler/cmm/SMRep.hs
+++ /dev/null
@@ -1,563 +0,0 @@
--- (c) The University of Glasgow 2006
--- (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
---
--- Storage manager representation of closures
-
-{-# LANGUAGE CPP,GeneralizedNewtypeDeriving #-}
-
-module SMRep (
-        -- * Words and bytes
-        WordOff, ByteOff,
-        wordsToBytes, bytesToWordsRoundUp,
-        roundUpToWords, roundUpTo,
-
-        StgWord, fromStgWord, toStgWord,
-        StgHalfWord, fromStgHalfWord, toStgHalfWord,
-        halfWordSize, halfWordSizeInBits,
-
-        -- * Closure representation
-        SMRep(..), -- CmmInfo sees the rep; no one else does
-        IsStatic,
-        ClosureTypeInfo(..), ArgDescr(..), Liveness,
-        ConstrDescription,
-
-        -- ** Construction
-        mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
-        smallArrPtrsRep, arrWordsRep,
-
-        -- ** Predicates
-        isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
-        isStackRep,
-
-        -- ** Size-related things
-        heapClosureSizeW,
-        fixedHdrSizeW, arrWordsHdrSize, arrWordsHdrSizeW, arrPtrsHdrSize,
-        arrPtrsHdrSizeW, profHdrSize, thunkHdrSize, nonHdrSize, nonHdrSizeW,
-        smallArrPtrsHdrSize, smallArrPtrsHdrSizeW, hdrSize, hdrSizeW,
-        fixedHdrSize,
-
-        -- ** RTS closure types
-        rtsClosureType, rET_SMALL, rET_BIG,
-        aRG_GEN, aRG_GEN_BIG,
-
-        -- ** Arrays
-        card, cardRoundUp, cardTableSizeB, cardTableSizeW
-    ) where
-
-import GhcPrelude
-
-import BasicTypes( ConTagZ )
-import DynFlags
-import Outputable
-import GHC.Platform
-import FastString
-
-import Data.Word
-import Data.Bits
-import Data.ByteString (ByteString)
-
-{-
-************************************************************************
-*                                                                      *
-                Words and bytes
-*                                                                      *
-************************************************************************
--}
-
--- | Word offset, or word count
-type WordOff = Int
-
--- | Byte offset, or byte count
-type ByteOff = Int
-
--- | Round up the given byte count to the next byte count that's a
--- multiple of the machine's word size.
-roundUpToWords :: DynFlags -> ByteOff -> ByteOff
-roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags)
-
--- | Round up @base@ to a multiple of @size@.
-roundUpTo :: ByteOff -> ByteOff -> ByteOff
-roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1))
-
--- | Convert the given number of words to a number of bytes.
---
--- This function morally has type @WordOff -> ByteOff@, but uses @Num
--- a@ to allow for overloading.
-wordsToBytes :: Num a => DynFlags -> a -> a
-wordsToBytes dflags n = fromIntegral (wORD_SIZE dflags) * n
-{-# SPECIALIZE wordsToBytes :: DynFlags -> Int -> Int #-}
-{-# SPECIALIZE wordsToBytes :: DynFlags -> Word -> Word #-}
-{-# SPECIALIZE wordsToBytes :: DynFlags -> Integer -> Integer #-}
-
--- | First round the given byte count up to a multiple of the
--- machine's word size and then convert the result to words.
-bytesToWordsRoundUp :: DynFlags -> ByteOff -> WordOff
-bytesToWordsRoundUp dflags n = (n + word_size - 1) `quot` word_size
- where word_size = wORD_SIZE dflags
--- StgWord is a type representing an StgWord on the target platform.
--- A Word64 is large enough to hold a Word for either a 32bit or 64bit platform
-newtype StgWord = StgWord Word64
-    deriving (Eq, Bits)
-
-fromStgWord :: StgWord -> Integer
-fromStgWord (StgWord i) = toInteger i
-
-toStgWord :: DynFlags -> Integer -> StgWord
-toStgWord dflags i
-    = case platformWordSize (targetPlatform dflags) of
-      -- These conversions mean that things like toStgWord (-1)
-      -- do the right thing
-      PW4 -> StgWord (fromIntegral (fromInteger i :: Word32))
-      PW8 -> StgWord (fromInteger i)
-
-instance Outputable StgWord where
-    ppr (StgWord i) = integer (toInteger i)
-
---
-
--- A Word32 is large enough to hold half a Word for either a 32bit or
--- 64bit platform
-newtype StgHalfWord = StgHalfWord Word32
-    deriving Eq
-
-fromStgHalfWord :: StgHalfWord -> Integer
-fromStgHalfWord (StgHalfWord w) = toInteger w
-
-toStgHalfWord :: DynFlags -> Integer -> StgHalfWord
-toStgHalfWord dflags i
-    = case platformWordSize (targetPlatform dflags) of
-      -- These conversions mean that things like toStgHalfWord (-1)
-      -- do the right thing
-      PW4 -> StgHalfWord (fromIntegral (fromInteger i :: Word16))
-      PW8 -> StgHalfWord (fromInteger i :: Word32)
-
-instance Outputable StgHalfWord where
-    ppr (StgHalfWord w) = integer (toInteger w)
-
--- | Half word size in bytes
-halfWordSize :: DynFlags -> ByteOff
-halfWordSize dflags = platformWordSizeInBytes (targetPlatform dflags) `div` 2
-
-halfWordSizeInBits :: DynFlags -> Int
-halfWordSizeInBits dflags = platformWordSizeInBits (targetPlatform dflags) `div` 2
-
-{-
-************************************************************************
-*                                                                      *
-\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
-*                                                                      *
-************************************************************************
--}
-
--- | A description of the layout of a closure.  Corresponds directly
--- to the closure types in includes/rts/storage/ClosureTypes.h.
-data SMRep
-  = HeapRep              -- GC routines consult sizes in info tbl
-        IsStatic
-        !WordOff         --  # ptr words
-        !WordOff         --  # non-ptr words INCLUDING SLOP (see mkHeapRep below)
-        ClosureTypeInfo  -- type-specific info
-
-  | ArrayPtrsRep
-        !WordOff        -- # ptr words
-        !WordOff        -- # card table words
-
-  | SmallArrayPtrsRep
-        !WordOff        -- # ptr words
-
-  | ArrayWordsRep
-        !WordOff        -- # bytes expressed in words, rounded up
-
-  | StackRep            -- Stack frame (RET_SMALL or RET_BIG)
-        Liveness
-
-  | RTSRep              -- The RTS needs to declare info tables with specific
-        Int             -- type tags, so this form lets us override the default
-        SMRep           -- tag for an SMRep.
-
--- | True <=> This is a static closure.  Affects how we garbage-collect it.
--- Static closure have an extra static link field at the end.
--- Constructors do not have a static variant; see Note [static constructors]
-type IsStatic = Bool
-
--- From an SMRep you can get to the closure type defined in
--- includes/rts/storage/ClosureTypes.h. Described by the function
--- rtsClosureType below.
-
-data ClosureTypeInfo
-  = Constr        ConTagZ ConstrDescription
-  | Fun           FunArity ArgDescr
-  | Thunk
-  | ThunkSelector SelectorOffset
-  | BlackHole
-  | IndStatic
-
-type ConstrDescription = ByteString -- result of dataConIdentity
-type FunArity          = Int
-type SelectorOffset    = Int
-
--------------------------
--- We represent liveness bitmaps as a Bitmap (whose internal
--- representation really is a bitmap).  These are pinned onto case return
--- vectors to indicate the state of the stack for the garbage collector.
---
--- In the compiled program, liveness bitmaps that fit inside a single
--- word (StgWord) are stored as a single word, while larger bitmaps are
--- stored as a pointer to an array of words.
-
-type Liveness = [Bool]   -- One Bool per word; True  <=> non-ptr or dead
-                         --                    False <=> ptr
-
--------------------------
--- An ArgDescr describes the argument pattern of a function
-
-data ArgDescr
-  = ArgSpec             -- Fits one of the standard patterns
-        !Int            -- RTS type identifier ARG_P, ARG_N, ...
-
-  | ArgGen              -- General case
-        Liveness        -- Details about the arguments
-
-
------------------------------------------------------------------------------
--- Construction
-
-mkHeapRep :: DynFlags -> IsStatic -> WordOff -> WordOff -> ClosureTypeInfo
-          -> SMRep
-mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info
-  = HeapRep is_static
-            ptr_wds
-            (nonptr_wds + slop_wds)
-            cl_type_info
-  where
-     slop_wds
-      | is_static = 0
-      | otherwise = max 0 (minClosureSize dflags - (hdr_size + payload_size))
-
-     hdr_size     = closureTypeHdrSize dflags cl_type_info
-     payload_size = ptr_wds + nonptr_wds
-
-mkRTSRep :: Int -> SMRep -> SMRep
-mkRTSRep = RTSRep
-
-mkStackRep :: [Bool] -> SMRep
-mkStackRep liveness = StackRep liveness
-
-blackHoleRep :: SMRep
-blackHoleRep = HeapRep False 0 0 BlackHole
-
-indStaticRep :: SMRep
-indStaticRep = HeapRep True 1 0 IndStatic
-
-arrPtrsRep :: DynFlags -> WordOff -> SMRep
-arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
-
-smallArrPtrsRep :: WordOff -> SMRep
-smallArrPtrsRep elems = SmallArrayPtrsRep elems
-
-arrWordsRep :: DynFlags -> ByteOff -> SMRep
-arrWordsRep dflags bytes = ArrayWordsRep (bytesToWordsRoundUp dflags bytes)
-
------------------------------------------------------------------------------
--- Predicates
-
-isStaticRep :: SMRep -> IsStatic
-isStaticRep (HeapRep is_static _ _ _) = is_static
-isStaticRep (RTSRep _ rep)            = isStaticRep rep
-isStaticRep _                         = False
-
-isStackRep :: SMRep -> Bool
-isStackRep StackRep{}     = True
-isStackRep (RTSRep _ rep) = isStackRep rep
-isStackRep _              = False
-
-isConRep :: SMRep -> Bool
-isConRep (HeapRep _ _ _ Constr{}) = True
-isConRep _                        = False
-
-isThunkRep :: SMRep -> Bool
-isThunkRep (HeapRep _ _ _ Thunk)           = True
-isThunkRep (HeapRep _ _ _ ThunkSelector{}) = True
-isThunkRep (HeapRep _ _ _ BlackHole)       = True
-isThunkRep (HeapRep _ _ _ IndStatic)       = True
-isThunkRep _                               = False
-
-isFunRep :: SMRep -> Bool
-isFunRep (HeapRep _ _ _ Fun{}) = True
-isFunRep _                     = False
-
-isStaticNoCafCon :: SMRep -> Bool
--- This should line up exactly with CONSTR_NOCAF below
--- See Note [Static NoCaf constructors]
-isStaticNoCafCon (HeapRep _ 0 _ Constr{}) = True
-isStaticNoCafCon _                        = False
-
-
------------------------------------------------------------------------------
--- Size-related things
-
-fixedHdrSize :: DynFlags -> ByteOff
-fixedHdrSize dflags = wordsToBytes dflags (fixedHdrSizeW dflags)
-
--- | Size of a closure header (StgHeader in includes/rts/storage/Closures.h)
-fixedHdrSizeW :: DynFlags -> WordOff
-fixedHdrSizeW dflags = sTD_HDR_SIZE dflags + profHdrSize dflags
-
--- | Size of the profiling part of a closure header
--- (StgProfHeader in includes/rts/storage/Closures.h)
-profHdrSize  :: DynFlags -> WordOff
-profHdrSize dflags
- | gopt Opt_SccProfilingOn dflags = pROF_HDR_SIZE dflags
- | otherwise                      = 0
-
--- | The garbage collector requires that every closure is at least as
---   big as this.
-minClosureSize :: DynFlags -> WordOff
-minClosureSize dflags = fixedHdrSizeW dflags + mIN_PAYLOAD_SIZE dflags
-
-arrWordsHdrSize :: DynFlags -> ByteOff
-arrWordsHdrSize dflags
- = fixedHdrSize dflags + sIZEOF_StgArrBytes_NoHdr dflags
-
-arrWordsHdrSizeW :: DynFlags -> WordOff
-arrWordsHdrSizeW dflags =
-    fixedHdrSizeW dflags +
-    (sIZEOF_StgArrBytes_NoHdr dflags `quot` wORD_SIZE dflags)
-
-arrPtrsHdrSize :: DynFlags -> ByteOff
-arrPtrsHdrSize dflags
- = fixedHdrSize dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
-
-arrPtrsHdrSizeW :: DynFlags -> WordOff
-arrPtrsHdrSizeW dflags =
-    fixedHdrSizeW dflags +
-    (sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
-
-smallArrPtrsHdrSize :: DynFlags -> ByteOff
-smallArrPtrsHdrSize dflags
- = fixedHdrSize dflags + sIZEOF_StgSmallMutArrPtrs_NoHdr dflags
-
-smallArrPtrsHdrSizeW :: DynFlags -> WordOff
-smallArrPtrsHdrSizeW dflags =
-    fixedHdrSizeW dflags +
-    (sIZEOF_StgSmallMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
-
--- Thunks have an extra header word on SMP, so the update doesn't
--- splat the payload.
-thunkHdrSize :: DynFlags -> WordOff
-thunkHdrSize dflags = fixedHdrSizeW dflags + smp_hdr
-        where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
-
-hdrSize :: DynFlags -> SMRep -> ByteOff
-hdrSize dflags rep = wordsToBytes dflags (hdrSizeW dflags rep)
-
-hdrSizeW :: DynFlags -> SMRep -> WordOff
-hdrSizeW dflags (HeapRep _ _ _ ty)    = closureTypeHdrSize dflags ty
-hdrSizeW dflags (ArrayPtrsRep _ _)    = arrPtrsHdrSizeW dflags
-hdrSizeW dflags (SmallArrayPtrsRep _) = smallArrPtrsHdrSizeW dflags
-hdrSizeW dflags (ArrayWordsRep _)     = arrWordsHdrSizeW dflags
-hdrSizeW _ _                          = panic "SMRep.hdrSizeW"
-
-nonHdrSize :: DynFlags -> SMRep -> ByteOff
-nonHdrSize dflags rep = wordsToBytes dflags (nonHdrSizeW rep)
-
-nonHdrSizeW :: SMRep -> WordOff
-nonHdrSizeW (HeapRep _ p np _) = p + np
-nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
-nonHdrSizeW (SmallArrayPtrsRep elems) = elems
-nonHdrSizeW (ArrayWordsRep words) = words
-nonHdrSizeW (StackRep bs)      = length bs
-nonHdrSizeW (RTSRep _ rep)     = nonHdrSizeW rep
-
--- | The total size of the closure, in words.
-heapClosureSizeW :: DynFlags -> SMRep -> WordOff
-heapClosureSizeW dflags (HeapRep _ p np ty)
- = closureTypeHdrSize dflags ty + p + np
-heapClosureSizeW dflags (ArrayPtrsRep elems ct)
- = arrPtrsHdrSizeW dflags + elems + ct
-heapClosureSizeW dflags (SmallArrayPtrsRep elems)
- = smallArrPtrsHdrSizeW dflags + elems
-heapClosureSizeW dflags (ArrayWordsRep words)
- = arrWordsHdrSizeW dflags + words
-heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
-
-closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
-closureTypeHdrSize dflags ty = case ty of
-                  Thunk           -> thunkHdrSize dflags
-                  ThunkSelector{} -> thunkHdrSize dflags
-                  BlackHole       -> thunkHdrSize dflags
-                  IndStatic       -> thunkHdrSize dflags
-                  _               -> fixedHdrSizeW dflags
-        -- All thunks use thunkHdrSize, even if they are non-updatable.
-        -- this is because we don't have separate closure types for
-        -- updatable vs. non-updatable thunks, so the GC can't tell the
-        -- difference.  If we ever have significant numbers of non-
-        -- updatable thunks, it might be worth fixing this.
-
--- ---------------------------------------------------------------------------
--- Arrays
-
--- | The byte offset into the card table of the card for a given element
-card :: DynFlags -> Int -> Int
-card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-
--- | Convert a number of elements to a number of cards, rounding up
-cardRoundUp :: DynFlags -> Int -> Int
-cardRoundUp dflags i =
-  card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
-
--- | The size of a card table, in bytes
-cardTableSizeB :: DynFlags -> Int -> ByteOff
-cardTableSizeB dflags elems = cardRoundUp dflags elems
-
--- | The size of a card table, in words
-cardTableSizeW :: DynFlags -> Int -> WordOff
-cardTableSizeW dflags elems =
-  bytesToWordsRoundUp dflags (cardTableSizeB dflags elems)
-
------------------------------------------------------------------------------
--- deriving the RTS closure type from an SMRep
-
-#include "../includes/rts/storage/ClosureTypes.h"
-#include "../includes/rts/storage/FunTypes.h"
--- Defines CONSTR, CONSTR_1_0 etc
-
--- | Derives the RTS closure type from an 'SMRep'
-rtsClosureType :: SMRep -> Int
-rtsClosureType rep
-    = case rep of
-      RTSRep ty _ -> ty
-
-      -- See Note [static constructors]
-      HeapRep _     1 0 Constr{} -> CONSTR_1_0
-      HeapRep _     0 1 Constr{} -> CONSTR_0_1
-      HeapRep _     2 0 Constr{} -> CONSTR_2_0
-      HeapRep _     1 1 Constr{} -> CONSTR_1_1
-      HeapRep _     0 2 Constr{} -> CONSTR_0_2
-      HeapRep _     0 _ Constr{} -> CONSTR_NOCAF
-           -- See Note [Static NoCaf constructors]
-      HeapRep _     _ _ Constr{} -> CONSTR
-
-      HeapRep False 1 0 Fun{} -> FUN_1_0
-      HeapRep False 0 1 Fun{} -> FUN_0_1
-      HeapRep False 2 0 Fun{} -> FUN_2_0
-      HeapRep False 1 1 Fun{} -> FUN_1_1
-      HeapRep False 0 2 Fun{} -> FUN_0_2
-      HeapRep False _ _ Fun{} -> FUN
-
-      HeapRep False 1 0 Thunk -> THUNK_1_0
-      HeapRep False 0 1 Thunk -> THUNK_0_1
-      HeapRep False 2 0 Thunk -> THUNK_2_0
-      HeapRep False 1 1 Thunk -> THUNK_1_1
-      HeapRep False 0 2 Thunk -> THUNK_0_2
-      HeapRep False _ _ Thunk -> THUNK
-
-      HeapRep False _ _ ThunkSelector{} ->  THUNK_SELECTOR
-
-      HeapRep True _ _ Fun{}      -> FUN_STATIC
-      HeapRep True _ _ Thunk      -> THUNK_STATIC
-      HeapRep False _ _ BlackHole -> BLACKHOLE
-      HeapRep False _ _ IndStatic -> IND_STATIC
-
-      _ -> panic "rtsClosureType"
-
--- We export these ones
-rET_SMALL, rET_BIG, aRG_GEN, aRG_GEN_BIG :: Int
-rET_SMALL   = RET_SMALL
-rET_BIG     = RET_BIG
-aRG_GEN     = ARG_GEN
-aRG_GEN_BIG = ARG_GEN_BIG
-
-{-
-Note [static constructors]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We used to have a CONSTR_STATIC closure type, and each constructor had
-two info tables: one with CONSTR (or CONSTR_1_0 etc.), and one with
-CONSTR_STATIC.
-
-This distinction was removed, because when copying a data structure
-into a compact region, we must copy static constructors into the
-compact region too.  If we didn't do this, we would need to track the
-references from the compact region out to the static constructors,
-because they might (indirectly) refer to CAFs.
-
-Since static constructors will be copied to the heap, if we wanted to
-use different info tables for static and dynamic constructors, we
-would have to switch the info pointer when copying the constructor
-into the compact region, which means we would need an extra field of
-the static info table to point to the dynamic one.
-
-However, since the distinction between static and dynamic closure
-types is never actually needed (other than for assertions), we can
-just drop the distinction and use the same info table for both.
-
-The GC *does* need to distinguish between static and dynamic closures,
-but it does this using the HEAP_ALLOCED() macro which checks whether
-the address of the closure resides within the dynamic heap.
-HEAP_ALLOCED() doesn't read the closure's info table.
-
-Note [Static NoCaf constructors]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If we know that a top-level binding 'x' is not Caffy (ie no CAFs are
-reachable from 'x'), then a statically allocated constructor (Just x)
-is also not Caffy, and the garbage collector need not follow its
-argument fields.  Exploiting this would require two static info tables
-for Just, for the two cases where the argument was Caffy or non-Caffy.
-
-Currently we don't do this; instead we treat nullary constructors
-as non-Caffy, and the others as potentially Caffy.
-
-
-************************************************************************
-*                                                                      *
-             Pretty printing of SMRep and friends
-*                                                                      *
-************************************************************************
--}
-
-instance Outputable ClosureTypeInfo where
-   ppr = pprTypeInfo
-
-instance Outputable SMRep where
-   ppr (HeapRep static ps nps tyinfo)
-     = hang (header <+> lbrace) 2 (ppr tyinfo <+> rbrace)
-     where
-       header = text "HeapRep"
-                <+> if static then text "static" else empty
-                <+> pp_n "ptrs" ps <+> pp_n "nonptrs" nps
-       pp_n :: String -> Int -> SDoc
-       pp_n _ 0 = empty
-       pp_n s n = int n <+> text s
-
-   ppr (ArrayPtrsRep size _) = text "ArrayPtrsRep" <+> ppr size
-
-   ppr (SmallArrayPtrsRep size) = text "SmallArrayPtrsRep" <+> ppr size
-
-   ppr (ArrayWordsRep words) = text "ArrayWordsRep" <+> ppr words
-
-   ppr (StackRep bs) = text "StackRep" <+> ppr bs
-
-   ppr (RTSRep ty rep) = text "tag:" <> ppr ty <+> ppr rep
-
-instance Outputable ArgDescr where
-  ppr (ArgSpec n) = text "ArgSpec" <+> ppr n
-  ppr (ArgGen ls) = text "ArgGen" <+> ppr ls
-
-pprTypeInfo :: ClosureTypeInfo -> SDoc
-pprTypeInfo (Constr tag descr)
-  = text "Con" <+>
-    braces (sep [ text "tag:" <+> ppr tag
-                , text "descr:" <> text (show descr) ])
-
-pprTypeInfo (Fun arity args)
-  = text "Fun" <+>
-    braces (sep [ text "arity:" <+> ppr arity
-                , ptext (sLit ("fun_type:")) <+> ppr args ])
-
-pprTypeInfo (ThunkSelector offset)
-  = text "ThunkSel" <+> ppr offset
-
-pprTypeInfo Thunk     = text "Thunk"
-pprTypeInfo BlackHole = text "BlackHole"
-pprTypeInfo IndStatic = text "IndStatic"
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
deleted file mode 100644
index 699f218257..0000000000
--- a/compiler/cmm/cmm-notes
+++ /dev/null
@@ -1,184 +0,0 @@
-More notes (Aug 11)
-~~~~~~~~~~~~~~~~~~
-* CmmInfo.cmmToRawCmm expands info tables to their representations
-  (needed for .cmm files as well as the code generators)
-
-* Why is FCode a lazy monad?  That makes it inefficient.
-  We want laziness to get code out one procedure at a time,
-  but not at the instruction level.
-  UPDATE (31/5/2016): FCode is strict since 09afcc9b.
-
-Things we did
-  * Remove CmmCvt.graphToZgraph (Conversion from old to new Cmm reps)
-  * Remove HscMain.optionallyConvertAndOrCPS (converted old Cmm to
-    new, ran pipeline, and converted back)
-  * Remove CmmDecl. Put its types in Cmm.  Import Cmm into OldCmm
-    so it can get those types.
-
-
-More notes (June 11)
-~~~~~~~~~~~~~~~~~~~~
-
-* In CmmContFlowOpts.branchChainElim, can a single block be the
-  successor of two calls?
-
-* Check in ClosureInfo:
-     -- NB: Results here should line up with the results of SMRep.rtsClosureType
-
-More notes (May 11)
-~~~~~~~~~~~~~~~~~~~
-In CmmNode, consider splitting CmmCall into two: call and jump
-
-Notes on new codegen (Aug 10)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Things to do:
- - Proc points pass all arguments on the stack, adding more code and
-   slowing down things a lot. We either need to fix this or even better
-   would be to get rid of proc points.
-
- - Sort out Label, LabelMap, LabelSet versus BlockId, BlockEnv, BlockSet
-   dichotomy. Mostly this means global replace, but we also need to make
-   Label an instance of Outputable (probably in the Outputable module).
-
-   EZY: We should use Label, since that's the terminology Hoopl uses.
-
- - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
-   EZY (2011-04-16): The mini-inliner has been generalized and ported,
-   but the constant folding and other optimizations need to still be
-   ported.
-
- - AsmCodeGen has post-native-cg branch eliminator (shortCutBranches);
-   we ultimately want to share this with the Cmm branch eliminator.
-
- - At the moment, references to global registers like Hp are "lowered" 
-   late (in CgUtils.fixStgRegisters). We should do this early, in the
-	new native codegen, much in the way that we lower calling conventions.
-	Might need to be a bit sophisticated about aliasing.
-
- - Move to new Cmm rep:
-     * Make native CG consume New Cmm; 
-     * Convert Old Cmm->New Cmm to keep old path alive
-     * Produce New Cmm when reading in .cmm files
-
- - Top-level SRT threading is a bit ugly
-
- - See "CAFs" below; we want to totally refactor the way SRTs are calculated
-
- - Garbage-collect https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cps
-   moving good stuff into 
-   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/new-code-gen-pipeline
-
- - Currently AsmCodeGen top level calls AsmCodeGen.cmmToCmm, which is a small
-   C-- optimiser.  It has quite a lot of boilerplate folding code in AsmCodeGen
-   (cmmBlockConFold, cmmStmtConFold, cmmExprConFold), before calling out to
-   CmmOpt.  ToDo: see what optimisations are being done; and do them before
-   AsmCodeGen.
-
- - If we stick CAF and stack liveness info on a LastCall node (not LastRet/Jump)
-   then all CAF and stack liveness stuff be completed before we split
-   into separate C procedures.
-
-   Short term:
-     compute and attach liveness into LastCall
-     right at end, split, cvt to old rep
-     [must split before cvt, because old rep is not expressive enough]
-
-   Longer term: 
-     when old rep disappears, 
-     move the whole splitting game into the C back end *only*
-	 (guided by the procpoint set)
-
-----------------------------------------------------
-	Proc-points
-----------------------------------------------------
-
-Consider this program, which has a diamond control flow, 
-with a call on one branch
- fn(p,x) {
-        h()
-	if b then { ... f(x) ...; q=5; goto J }
-             else { ...; q=7; goto J }
-     J: ..p...q...
-  }
-then the join point J is a "proc-point".  So, is 'p' passed to J
-as a parameter?  Or, if 'p' was saved on the stack anyway, perhaps
-to keep it alive across the call to h(), maybe 'p' gets communicated
-to J that way. This is an awkward choice.  (We think that we currently
-never pass variables to join points via arguments.)
-
-Furthermore, there is *no way* to pass q to J in a register (other
-than a parameter register).
-
-What we want is to do register allocation across the whole caboodle.
-Then we could drop all the code that deals with the above awkward
-decisions about spilling variables across proc-points.
-
-Note that J doesn't need an info table.
-
-What we really want is for each LastCall (not LastJump/Ret) 
-to have an info table.   Note that ProcPoints that are not successors
-of calls don't need an info table.
-
-Figuring out proc-points
-~~~~~~~~~~~~~~~~~~~~~~~~
-Proc-points are identified by
-CmmProcPoint.minimalProcPointSet/extendPPSet Although there isn't
-that much code, JD thinks that it could be done much more nicely using
-a dominator analysis, using the Dataflow Engine.
-
-----------------------------------------------------
-		CAFs
-----------------------------------------------------
-
-* The code for a procedure f may refer to either the *closure* 
-  or the *entry point* of another top-level procedure g.  
-  If f is live, then so is g.  f's SRT must include g's closure.
-
-* The CLabel for the entry-point/closure reveals whether g is 
-  a CAF (or refers to CAFs).  See the IdLabel constructor of CLabel.
-
-* The CAF-ness of the original top-level definitions is figured out
-  (by GHC.Iface.Tidy) before we generate C--.  This CafInfo is only set for
-  top-level Ids; nested bindings stay with MayHaveCafRefs.
-
-* Currently an SRT contains (only) pointers to (top-level) closures.
-
-* Consider this Core code
-	f = \x -> let g = \y -> ...x...y...h1...
-                  in ...h2...g...
-  and suppose that h1, h2 have IdInfo of MayHaveCafRefs.
-  Therefore, so will f,  But g will not (since it's nested).
-
-  This generates C-- roughly like this:
-     f_closure: .word f_entry
-     f_entry() [info-tbl-for-f] { ...jump g_entry...jump h2... }
-     g_entry() [info-tbl-for-g] { ...jump h1... }
-
-  Note that there is no top-level closure for g (only an info table).
-  This fact (whether or not there is a top-level closure) is recorded
-  in the InfoTable attached to the CmmProc for f, g
-  INVARIANT: 
-     Any out-of-Group references to an IdLabel goes to
-     a Proc whose InfoTable says "I have a top-level closure".
-  Equivalently: 
-     A CmmProc whose InfoTable says "I do not have a top-level
-     closure" is referred to only from its own Group.
-
-* So:   info-tbl-for-f must have an SRT that keeps h1,h2 alive
-        info-tbl-for-g must have an SRT that keeps h1 (only) alive
-
-  But if we just look for the free CAF refs, we get:
-	f   h2 (only)
-        g   h1
-
-  So we need to do a transitive closure thing to flesh out 
-  f's keep-alive refs to include h1.
-
-* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a
-  CmmInfoTable attached to each CmmProc.  CmmPipeline.toTops actually does
-  the attaching, right at the end of the pipeline.  The C_SRT part
-  gives offsets within a single, shared table of closure pointers.
-
-* DECIDED: we can generate SRTs based on the final Cmm program
-  without knowledge of how it is generated.
diff --git a/compiler/deSugar/Coverage.hs b/compiler/deSugar/Coverage.hs
index 75eeb07570..d94f640f84 100644
--- a/compiler/deSugar/Coverage.hs
+++ b/compiler/deSugar/Coverage.hs
@@ -41,7 +41,7 @@ import TyCon
 import BasicTypes
 import MonadUtils
 import Maybes
-import CLabel
+import GHC.Cmm.CLabel
 import Util
 
 import Data.Time
diff --git a/compiler/deSugar/DsForeign.hs b/compiler/deSugar/DsForeign.hs
index 0a3755e94b..cdf58e709e 100644
--- a/compiler/deSugar/DsForeign.hs
+++ b/compiler/deSugar/DsForeign.hs
@@ -37,8 +37,8 @@ import Coercion
 import TcEnv
 import TcType
 
-import CmmExpr
-import CmmUtils
+import GHC.Cmm.Expr
+import GHC.Cmm.Utils
 import HscTypes
 import ForeignCall
 import TysWiredIn
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 640f325c03..ddcf2aeacb 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -203,7 +203,7 @@ Library
         DataCon
         PatSyn
         Demand
-        Debug
+        GHC.Cmm.DebugBlock
         Exception
         FieldLabel
         GhcMonad
@@ -240,42 +240,42 @@ Library
         VarEnv
         VarSet
         UnVarGraph
-        BlockId
-        CLabel
-        Cmm
-        CmmBuildInfoTables
-        CmmPipeline
-        CmmCallConv
-        CmmCommonBlockElim
-        CmmImplementSwitchPlans
-        CmmContFlowOpt
-        CmmExpr
-        CmmInfo
-        CmmLex
-        CmmLint
-        CmmLive
-        CmmMachOp
-        CmmMonad
-        CmmSwitch
-        CmmNode
-        CmmOpt
-        CmmParse
-        CmmProcPoint
-        CmmSink
-        CmmType
-        CmmUtils
-        CmmLayoutStack
+        GHC.Cmm.BlockId
+        GHC.Cmm.CLabel
+        GHC.Cmm
+        GHC.Cmm.Info.Build
+        GHC.Cmm.Pipeline
+        GHC.Cmm.CallConv
+        GHC.Cmm.CommonBlockElim
+        GHC.Cmm.Switch.Implement
+        GHC.Cmm.ContFlowOpt
+        GHC.Cmm.Expr
+        GHC.Cmm.Info
+        GHC.Cmm.Lexer
+        GHC.Cmm.Lint
+        GHC.Cmm.Liveness
+        GHC.Cmm.MachOp
+        GHC.Cmm.Monad
+        GHC.Cmm.Switch
+        GHC.Cmm.Node
+        GHC.Cmm.Opt
+        GHC.Cmm.Parser
+        GHC.Cmm.ProcPoint
+        GHC.Cmm.Sink
+        GHC.Cmm.Type
+        GHC.Cmm.Utils
+        GHC.Cmm.LayoutStack
         CliOption
         EnumSet
         GhcNameVersion
         FileSettings
-        MkGraph
+        GHC.Cmm.Graph
         PprBase
-        PprC
-        PprCmm
-        PprCmmDecl
-        PprCmmExpr
-        Bitmap
+        GHC.CmmToC
+        GHC.Cmm.Ppr
+        GHC.Cmm.Ppr.Decl
+        GHC.Cmm.Ppr.Expr
+        GHC.Data.Bitmap
         GHC.Platform.Regs
         GHC.Platform.ARM
         GHC.Platform.ARM64
@@ -303,7 +303,7 @@ Library
         GHC.StgToCmm.Ticky
         GHC.StgToCmm.Utils
         GHC.StgToCmm.ExtCode
-        SMRep
+        GHC.Runtime.Layout
         CoreArity
         CoreFVs
         CoreLint
@@ -576,11 +576,11 @@ Library
         UniqMap
         UniqSet
         Util
-        Hoopl.Block
-        Hoopl.Collections
-        Hoopl.Dataflow
-        Hoopl.Graph
-        Hoopl.Label
+        GHC.Cmm.Dataflow
+        GHC.Cmm.Dataflow.Block
+        GHC.Cmm.Dataflow.Collections
+        GHC.Cmm.Dataflow.Graph
+        GHC.Cmm.Dataflow.Label
 
     Exposed-Modules:
             AsmCodeGen
diff --git a/compiler/ghci/ByteCodeAsm.hs b/compiler/ghci/ByteCodeAsm.hs
index 82de14346e..801cdc7068 100644
--- a/compiler/ghci/ByteCodeAsm.hs
+++ b/compiler/ghci/ByteCodeAsm.hs
@@ -30,7 +30,7 @@ import Literal
 import TyCon
 import FastString
 import GHC.StgToCmm.Layout     ( ArgRep(..) )
-import SMRep
+import GHC.Runtime.Layout
 import DynFlags
 import Outputable
 import GHC.Platform
diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs
index 2e24bf540c..186d094bff 100644
--- a/compiler/ghci/ByteCodeGen.hs
+++ b/compiler/ghci/ByteCodeGen.hs
@@ -50,8 +50,8 @@ import FastString
 import Panic
 import GHC.StgToCmm.Closure    ( NonVoid(..), fromNonVoid, nonVoidIds )
 import GHC.StgToCmm.Layout
-import SMRep hiding (WordOff, ByteOff, wordsToBytes)
-import Bitmap
+import GHC.Runtime.Layout hiding (WordOff, ByteOff, wordsToBytes)
+import GHC.Data.Bitmap
 import OrdList
 import Maybes
 import VarEnv
diff --git a/compiler/ghci/ByteCodeInstr.hs b/compiler/ghci/ByteCodeInstr.hs
index b0db198037..9cdd297dbd 100644
--- a/compiler/ghci/ByteCodeInstr.hs
+++ b/compiler/ghci/ByteCodeInstr.hs
@@ -28,7 +28,7 @@ import Literal
 import DataCon
 import VarSet
 import PrimOp
-import SMRep
+import GHC.Runtime.Layout
 
 import Data.Word
 import GHC.Stack.CCS (CostCentre)
diff --git a/compiler/ghci/RtClosureInspect.hs b/compiler/ghci/RtClosureInspect.hs
index 96df8b547c..a523ae07bf 100644
--- a/compiler/ghci/RtClosureInspect.hs
+++ b/compiler/ghci/RtClosureInspect.hs
@@ -58,7 +58,7 @@ import DynFlags
 import Outputable as Ppr
 import GHC.Char
 import GHC.Exts.Heap
-import SMRep ( roundUpTo )
+import GHC.Runtime.Layout ( roundUpTo )
 
 import Control.Monad
 import Data.Maybe
diff --git a/compiler/llvmGen/LlvmCodeGen.hs b/compiler/llvmGen/LlvmCodeGen.hs
index 0fc7e76e58..8bff8fd6e5 100644
--- a/compiler/llvmGen/LlvmCodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen.hs
@@ -18,9 +18,9 @@ import LlvmCodeGen.Regs
 import LlvmMangler
 
 import GHC.StgToCmm.CgUtils ( fixStgRegisters )
-import Cmm
-import Hoopl.Collections
-import PprCmm
+import GHC.Cmm
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Ppr
 
 import BufWrite
 import DynFlags
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index ce9f22052f..165f733af4 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -44,12 +44,12 @@ import GhcPrelude
 import Llvm
 import LlvmCodeGen.Regs
 
-import CLabel
+import GHC.Cmm.CLabel
 import GHC.Platform.Regs ( activeStgRegs )
 import DynFlags
 import FastString
-import Cmm              hiding ( succ )
-import CmmUtils ( regsOverlap )
+import GHC.Cmm              hiding ( succ )
+import GHC.Cmm.Utils (regsOverlap)
 import Outputable as Outp
 import GHC.Platform
 import UniqFM
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index bfaf7706d1..f9b10679ef 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -13,16 +13,16 @@ import Llvm
 import LlvmCodeGen.Base
 import LlvmCodeGen.Regs
 
-import BlockId
+import GHC.Cmm.BlockId
 import GHC.Platform.Regs ( activeStgRegs )
-import CLabel
-import Cmm
-import PprCmm
-import CmmUtils
-import CmmSwitch
-import Hoopl.Block
-import Hoopl.Graph
-import Hoopl.Collections
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Ppr as PprCmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Collections
 
 import DynFlags
 import FastString
diff --git a/compiler/llvmGen/LlvmCodeGen/Data.hs b/compiler/llvmGen/LlvmCodeGen/Data.hs
index 4c07f8ee8f..46fb1afbcd 100644
--- a/compiler/llvmGen/LlvmCodeGen/Data.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Data.hs
@@ -14,9 +14,9 @@ import GhcPrelude
 import Llvm
 import LlvmCodeGen.Base
 
-import BlockId
-import CLabel
-import Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm
 import DynFlags
 import GHC.Platform
 
diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
index 3f29133e59..5fcc72f25a 100644
--- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
@@ -15,8 +15,8 @@ import Llvm
 import LlvmCodeGen.Base
 import LlvmCodeGen.Data
 
-import CLabel
-import Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm
 
 import FastString
 import Outputable
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index 8cdf3c6869..4b1a15674e 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -15,7 +15,7 @@ import GhcPrelude
 
 import Llvm
 
-import CmmExpr
+import GHC.Cmm.Expr
 import DynFlags
 import FastString
 import Outputable ( panic )
diff --git a/compiler/main/CodeOutput.hs b/compiler/main/CodeOutput.hs
index 2b9770c78e..6656a4f4d8 100644
--- a/compiler/main/CodeOutput.hs
+++ b/compiler/main/CodeOutput.hs
@@ -18,10 +18,10 @@ import LlvmCodeGen      ( llvmCodeGen )
 import UniqSupply       ( mkSplitUniqSupply )
 
 import Finder           ( mkStubPaths )
-import PprC             ( writeC )
-import CmmLint          ( cmmLint )
+import GHC.CmmToC             ( writeC )
+import GHC.Cmm.Lint          ( cmmLint )
 import Packages
-import Cmm              ( RawCmmGroup )
+import GHC.Cmm              ( RawCmmGroup )
 import HscTypes
 import DynFlags
 import Stream           ( Stream )
diff --git a/compiler/main/Hooks.hs b/compiler/main/Hooks.hs
index d5ced7d5a0..8caebfc556 100644
--- a/compiler/main/Hooks.hs
+++ b/compiler/main/Hooks.hs
@@ -50,7 +50,7 @@ import TyCon
 import CostCentre
 import GHC.Stg.Syntax
 import Stream
-import Cmm
+import GHC.Cmm
 import GHC.Hs.Extension
 
 import Data.Maybe
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index ffb9b3ced9..1c27542270 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -133,11 +133,11 @@ import CostCentre
 import ProfInit
 import TyCon
 import Name
-import Cmm
-import CmmParse         ( parseCmmFile )
-import CmmBuildInfoTables
-import CmmPipeline
-import CmmInfo
+import GHC.Cmm
+import GHC.Cmm.Parser         ( parseCmmFile )
+import GHC.Cmm.Info.Build
+import GHC.Cmm.Pipeline
+import GHC.Cmm.Info
 import CodeOutput
 import InstEnv
 import FamInstEnv
diff --git a/compiler/main/StaticPtrTable.hs b/compiler/main/StaticPtrTable.hs
index 4f67ba0190..dfc54799d7 100644
--- a/compiler/main/StaticPtrTable.hs
+++ b/compiler/main/StaticPtrTable.hs
@@ -124,7 +124,7 @@ Here is a running example:
 
 import GhcPrelude
 
-import CLabel
+import GHC.Cmm.CLabel
 import CoreSyn
 import CoreUtils (collectMakeStaticArgs)
 import DataCon
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs
index 556c943dc2..021fbae195 100644
--- a/compiler/nativeGen/AsmCodeGen.hs
+++ b/compiler/nativeGen/AsmCodeGen.hs
@@ -67,18 +67,18 @@ import Reg
 import NCGMonad
 import CFG
 import Dwarf
-import Debug
+import GHC.Cmm.DebugBlock
 
-import BlockId
+import GHC.Cmm.BlockId
 import GHC.StgToCmm.CgUtils ( fixStgRegisters )
-import Cmm
-import CmmUtils
-import Hoopl.Collections
-import Hoopl.Label
-import Hoopl.Block
-import CmmOpt           ( cmmMachOpFold )
-import PprCmm
-import CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Opt           ( cmmMachOpFold )
+import GHC.Cmm.Ppr
+import GHC.Cmm.CLabel
 
 import UniqFM
 import UniqSupply
@@ -826,7 +826,7 @@ computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
     -- relevant register writes within a procedure.
     --
     -- However, the only unwinding information that we care about in GHC is for
-    -- Sp. The fact that CmmLayoutStack already ensures that we have unwind
+    -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
     -- information at the beginning of every block means that there is no need
     -- to perform this sort of push-down.
     mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs
index 5e81316ab3..3f74065e4e 100644
--- a/compiler/nativeGen/BlockLayout.hs
+++ b/compiler/nativeGen/BlockLayout.hs
@@ -20,10 +20,10 @@ import Instruction
 import NCGMonad
 import CFG
 
-import BlockId
-import Cmm
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 
 import DynFlags (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg)
 import UniqFM
@@ -35,7 +35,7 @@ import Outputable
 import Maybes
 
 -- DEBUGGING ONLY
---import Debug
+--import GHC.Cmm.DebugBlock
 --import Debug.Trace
 import ListSetOps (removeDups)
 
diff --git a/compiler/nativeGen/CFG.hs b/compiler/nativeGen/CFG.hs
index 4dc5f9ccb3..90573221f8 100644
--- a/compiler/nativeGen/CFG.hs
+++ b/compiler/nativeGen/CFG.hs
@@ -46,15 +46,15 @@ where
 
 import GhcPrelude
 
-import BlockId
-import Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm as Cmm
 
-import CmmUtils
-import CmmSwitch
-import Hoopl.Collections
-import Hoopl.Label
-import Hoopl.Block
-import qualified Hoopl.Graph as G
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import qualified GHC.Cmm.Dataflow.Graph as G
 
 import Util
 import Digraph
@@ -74,11 +74,10 @@ import Data.Bifunctor
 
 import Outputable
 -- DEBUGGING ONLY
---import Debug
--- import Debug.Trace
+--import GHC.Cmm.DebugBlock
 --import OrdList
---import Debug.Trace
-import PprCmm () -- For Outputable instances
+--import GHC.Cmm.DebugBlock.Trace
+import GHC.Cmm.Ppr () -- For Outputable instances
 import qualified DynFlags as D
 
 import Data.List
@@ -250,7 +249,7 @@ filterEdges f cfg =
 {- Note [Updating the CFG during shortcutting]
 
 See Note [What is shortcutting] in the control flow optimization
-code (CmmContFlowOpt.hs) for a slightly more in depth explanation on shortcutting.
+code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting.
 
 In the native backend we shortcut jumps at the assembly level. (AsmCodeGen.hs)
 This means we remove blocks containing only one jump from the code
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index 17e5cda845..344e62d53c 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -16,8 +16,8 @@ module CPrim
 
 import GhcPrelude
 
-import CmmType
-import CmmMachOp
+import GHC.Cmm.Type
+import GHC.Cmm.MachOp
 import Outputable
 
 popCntLabel :: Width -> String
diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs
index 33f1c5b2f7..a64df287f5 100644
--- a/compiler/nativeGen/Dwarf.hs
+++ b/compiler/nativeGen/Dwarf.hs
@@ -4,11 +4,11 @@ module Dwarf (
 
 import GhcPrelude
 
-import CLabel
-import CmmExpr         ( GlobalReg(..) )
+import GHC.Cmm.CLabel
+import GHC.Cmm.Expr    ( GlobalReg(..) )
 import Config          ( cProjectName, cProjectVersion )
 import CoreSyn         ( Tickish(..) )
-import Debug
+import GHC.Cmm.DebugBlock
 import DynFlags
 import Module
 import Outputable
@@ -28,8 +28,8 @@ import qualified Data.Map as Map
 import System.FilePath
 import System.Directory ( getCurrentDirectory )
 
-import qualified Hoopl.Label as H
-import qualified Hoopl.Collections as H
+import qualified GHC.Cmm.Dataflow.Label as H
+import qualified GHC.Cmm.Dataflow.Collections as H
 
 -- | Generate DWARF/debug information
 dwarfGen :: DynFlags -> ModLocation -> UniqSupply -> [DebugBlock]
diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs
index a6ba596f35..df578e2671 100644
--- a/compiler/nativeGen/Dwarf/Types.hs
+++ b/compiler/nativeGen/Dwarf/Types.hs
@@ -24,9 +24,9 @@ module Dwarf.Types
 
 import GhcPrelude
 
-import Debug
-import CLabel
-import CmmExpr         ( GlobalReg(..) )
+import GHC.Cmm.DebugBlock
+import GHC.Cmm.CLabel
+import GHC.Cmm.Expr         ( GlobalReg(..) )
 import Encoding
 import FastString
 import Outputable
diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs
index 745d1e7b65..d7b6f6b868 100644
--- a/compiler/nativeGen/Format.hs
+++ b/compiler/nativeGen/Format.hs
@@ -22,7 +22,7 @@ where
 
 import GhcPrelude
 
-import Cmm
+import GHC.Cmm
 import Outputable
 
 -- It looks very like the old MachRep, but it's now of purely local
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 4f18a45c16..150bd8adba 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -18,11 +18,11 @@ import GhcPrelude
 
 import Reg
 
-import BlockId
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 import DynFlags
-import Cmm hiding (topInfoTable)
+import GHC.Cmm hiding (topInfoTable)
 import GHC.Platform
 
 -- | Holds a list of source and destination registers used by a
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
index e1bb927d0b..b963623535 100644
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -49,11 +49,11 @@ import Reg
 import Format
 import TargetReg
 
-import BlockId
-import Hoopl.Collections
-import Hoopl.Label
-import CLabel           ( CLabel )
-import Debug
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.CLabel           ( CLabel )
+import GHC.Cmm.DebugBlock
 import FastString       ( FastString )
 import UniqFM
 import UniqSupply
@@ -65,7 +65,7 @@ import Control.Monad    ( ap )
 
 import Instruction
 import Outputable (SDoc, pprPanic, ppr)
-import Cmm (RawCmmDecl, CmmStatics)
+import GHC.Cmm (RawCmmDecl, CmmStatics)
 import CFG
 
 data NcgImpl statics instr jumpDest = NcgImpl {
diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs
index 760ba7925d..e4aba00596 100644
--- a/compiler/nativeGen/PIC.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -60,14 +60,14 @@ import Reg
 import NCGMonad
 
 
-import Hoopl.Collections
-import Cmm
-import CLabel           ( CLabel, ForeignLabelSource(..), pprCLabel,
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm
+import GHC.Cmm.CLabel           ( CLabel, ForeignLabelSource(..), pprCLabel,
                           mkDynamicLinkerLabel, DynamicLinkerLabelInfo(..),
                           dynamicLinkerLabelInfo, mkPicBaseLabel,
                           labelDynamic, externallyVisibleCLabel )
 
-import CLabel           ( mkForeignLabel )
+import GHC.Cmm.CLabel           ( mkForeignLabel )
 
 
 import BasicTypes
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index e669630956..4d9a38b9de 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -42,14 +42,14 @@ import TargetReg
 import GHC.Platform
 
 -- Our intermediate code:
-import BlockId
-import PprCmm           ( pprExpr )
-import Cmm
-import CmmUtils
-import CmmSwitch
-import CLabel
-import Hoopl.Block
-import Hoopl.Graph
+import GHC.Cmm.BlockId
+import GHC.Cmm.Ppr           ( pprExpr )
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
 
 -- The rest:
 import OrdList
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 69aa954485..d19282fee6 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -33,14 +33,14 @@ import RegClass
 import Reg
 
 import GHC.Platform.Regs
-import BlockId
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 import DynFlags
-import Cmm
-import CmmInfo
+import GHC.Cmm
+import GHC.Cmm.Info
 import FastString
-import CLabel
+import GHC.Cmm.CLabel
 import Outputable
 import GHC.Platform
 import UniqFM (listToUFM, lookupUFM)
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index ea0b36fb64..9669076bef 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -21,13 +21,13 @@ import Reg
 import RegClass
 import TargetReg
 
-import Cmm hiding (topInfoTable)
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm hiding (topInfoTable)
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 
-import BlockId
-import CLabel
-import PprCmmExpr () -- For Outputable instances
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Ppr.Expr () -- For Outputable instances
 
 import Unique                ( pprUniqueAlways, getUnique )
 import GHC.Platform
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
index 5ed0ccded3..e99a69313e 100644
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -23,9 +23,9 @@ import GhcPrelude
 
 import PPC.Instr
 
-import BlockId
-import Cmm
-import CLabel
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.CLabel
 
 import Unique
 import Outputable (ppr, text, Outputable, (<>))
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index b0087901a8..66aa006311 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -55,8 +55,8 @@ import Reg
 import RegClass
 import Format
 
-import Cmm
-import CLabel           ( CLabel )
+import GHC.Cmm
+import GHC.Cmm.CLabel           ( CLabel )
 import Unique
 
 import GHC.Platform.Regs
diff --git a/compiler/nativeGen/PprBase.hs b/compiler/nativeGen/PprBase.hs
index 48e9e26ae4..c5574b35f0 100644
--- a/compiler/nativeGen/PprBase.hs
+++ b/compiler/nativeGen/PprBase.hs
@@ -23,8 +23,8 @@ where
 import GhcPrelude
 
 import AsmUtils
-import CLabel
-import Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm
 import DynFlags
 import FastString
 import Outputable
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
index 5ca2412c73..f42ff9450a 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
@@ -9,7 +9,7 @@ import RegAlloc.Liveness
 import Instruction
 import Reg
 
-import Cmm
+import GHC.Cmm
 import Bag
 import Digraph
 import UniqFM
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
index 22a88c02c0..9ffb51ee29 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
@@ -12,9 +12,9 @@ import GhcPrelude
 import RegAlloc.Liveness
 import Instruction
 import Reg
-import Cmm hiding (RegSet)
-import BlockId
-import Hoopl.Collections
+import GHC.Cmm hiding (RegSet)
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
 
 import MonadUtils
 import State
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
index 79dbf63a66..bd8b449cbb 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
@@ -35,15 +35,15 @@ import RegAlloc.Liveness
 import Instruction
 import Reg
 
-import BlockId
-import Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm
 import UniqSet
 import UniqFM
 import Unique
 import State
 import Outputable
 import GHC.Platform
-import Hoopl.Collections
+import GHC.Cmm.Dataflow.Collections
 
 import Data.List
 import Data.Maybe
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
index 42de5503ba..4870bf5269 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
@@ -22,9 +22,9 @@ import Reg
 
 import GraphBase
 
-import Hoopl.Collections (mapLookup)
-import Hoopl.Label
-import Cmm
+import GHC.Cmm.Dataflow.Collections (mapLookup)
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm
 import UniqFM
 import UniqSet
 import Digraph          (flattenSCCs)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs
index ad0fafb3ed..3c6965c1dd 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs
@@ -28,7 +28,7 @@ import Outputable
 import Unique
 import UniqFM
 import UniqSupply
-import BlockId
+import GHC.Cmm.BlockId
 
 
 -- | Used to store the register assignment on entry to a basic block.
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
index 546d48af21..c21ab1bea1 100644
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
@@ -18,8 +18,8 @@ import RegAlloc.Liveness
 import Instruction
 import Reg
 
-import BlockId
-import Hoopl.Collections
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
 import Digraph
 import DynFlags
 import Outputable
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index eac9194c6a..bccffb208c 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -119,9 +119,9 @@ import RegAlloc.Liveness
 import Instruction
 import Reg
 
-import BlockId
-import Hoopl.Collections
-import Cmm hiding (RegSet)
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm hiding (RegSet)
 
 import Digraph
 import DynFlags
@@ -777,7 +777,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
                    -- NOTE: if the input to the NCG contains some
                    -- unreachable blocks with junk code, this panic
                    -- might be triggered.  Make sure you only feed
-                   -- sensible code into the NCG.  In CmmPipeline we
+                   -- sensible code into the NCG.  In GHC.Cmm.Pipeline we
                    -- call removeUnreachableBlocks at the end for this
                    -- reason.
 
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs
index 43b8f6c129..d24690f04c 100644
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/State.hs
@@ -44,7 +44,7 @@ import RegAlloc.Linear.Base
 import RegAlloc.Liveness
 import Instruction
 import Reg
-import BlockId
+import GHC.Cmm.BlockId
 
 import DynFlags
 import Unique
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index a5a9b503cd..c39ee4895a 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -40,11 +40,11 @@ import GhcPrelude
 import Reg
 import Instruction
 
-import BlockId
+import GHC.Cmm.BlockId
 import CFG
-import Hoopl.Collections
-import Hoopl.Label
-import Cmm hiding (RegSet, emptyRegSet)
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm hiding (RegSet, emptyRegSet)
 
 import Digraph
 import DynFlags
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 46b29d0a03..d8cda40d1a 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -39,15 +39,15 @@ import Format
 import NCGMonad   ( NatM, getNewRegNat, getNewLabelNat )
 
 -- Our intermediate code:
-import BlockId
-import Cmm
-import CmmUtils
-import CmmSwitch
-import Hoopl.Block
-import Hoopl.Graph
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
 import PIC
 import Reg
-import CLabel
+import GHC.Cmm.CLabel
 import CPrim
 
 -- The rest:
diff --git a/compiler/nativeGen/SPARC/CodeGen/Amode.hs b/compiler/nativeGen/SPARC/CodeGen/Amode.hs
index 33e3f535da..5351fc054b 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Amode.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Amode.hs
@@ -16,7 +16,7 @@ import SPARC.Base
 import NCGMonad
 import Format
 
-import Cmm
+import GHC.Cmm
 
 import OrdList
 
diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs
index 8a2f2f5a08..4497e1bd5d 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Base.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Base.hs
@@ -24,8 +24,8 @@ import Reg
 
 import GHC.Platform.Regs
 import DynFlags
-import Cmm
-import PprCmmExpr () -- For Outputable instances
+import GHC.Cmm
+import GHC.Cmm.Ppr.Expr () -- For Outputable instances
 import GHC.Platform
 
 import Outputable
diff --git a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
index e6b2e174b6..892cbb1a8f 100644
--- a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
@@ -18,7 +18,7 @@ import SPARC.Base
 import NCGMonad
 import Format
 
-import Cmm
+import GHC.Cmm
 
 import OrdList
 import Outputable
diff --git a/compiler/nativeGen/SPARC/CodeGen/Expand.hs b/compiler/nativeGen/SPARC/CodeGen/Expand.hs
index 237311956e..ba7577602f 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Expand.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Expand.hs
@@ -14,7 +14,7 @@ import SPARC.Regs
 import Instruction
 import Reg
 import Format
-import Cmm
+import GHC.Cmm
 
 
 import Outputable
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
index a7a1f60416..a4f6214edc 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
@@ -23,7 +23,7 @@ import NCGMonad
 import Format
 import Reg
 
-import Cmm
+import GHC.Cmm
 
 import Control.Monad (liftM)
 import DynFlags
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot
index 43632c676d..1dbd2d3612 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot
@@ -10,7 +10,7 @@ import SPARC.CodeGen.Base
 import NCGMonad
 import Reg
 
-import Cmm
+import GHC.Cmm
 
 getSomeReg  :: CmmExpr -> NatM (Reg, InstrBlock)
 getRegister :: CmmExpr -> NatM Register
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
index 18df9e19a3..a267cd22ab 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
@@ -22,7 +22,7 @@ import Instruction
 import Format
 import Reg
 
-import Cmm
+import GHC.Cmm
 
 import DynFlags
 import OrdList
diff --git a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
index 7f9bfed229..b60c958a73 100644
--- a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
@@ -12,7 +12,7 @@ import SPARC.Instr
 import SPARC.Ppr        () -- For Outputable instances
 import Instruction
 
-import Cmm
+import GHC.Cmm
 
 import Outputable
 
diff --git a/compiler/nativeGen/SPARC/Imm.hs b/compiler/nativeGen/SPARC/Imm.hs
index bd2d4ab131..78b6612bbf 100644
--- a/compiler/nativeGen/SPARC/Imm.hs
+++ b/compiler/nativeGen/SPARC/Imm.hs
@@ -9,8 +9,8 @@ where
 
 import GhcPrelude
 
-import Cmm
-import CLabel
+import GHC.Cmm
+import GHC.Cmm.CLabel
 
 import Outputable
 
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index c26cfcc4a0..43edfc61f4 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -38,11 +38,11 @@ import RegClass
 import Reg
 import Format
 
-import CLabel
+import GHC.Cmm.CLabel
 import GHC.Platform.Regs
-import BlockId
+import GHC.Cmm.BlockId
 import DynFlags
-import Cmm
+import GHC.Cmm
 import FastString
 import Outputable
 import GHC.Platform
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 5c7d9fabbd..7e40f0d60b 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -37,12 +37,12 @@ import Reg
 import Format
 import PprBase
 
-import Cmm hiding (topInfoTable)
-import PprCmm() -- For Outputable instances
-import BlockId
-import CLabel
-import Hoopl.Label
-import Hoopl.Collections
+import GHC.Cmm hiding (topInfoTable)
+import GHC.Cmm.Ppr() -- For Outputable instances
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
 
 import Unique           ( pprUniqueAlways )
 import Outputable
diff --git a/compiler/nativeGen/SPARC/ShortcutJump.hs b/compiler/nativeGen/SPARC/ShortcutJump.hs
index e2a8a71572..02d51de30f 100644
--- a/compiler/nativeGen/SPARC/ShortcutJump.hs
+++ b/compiler/nativeGen/SPARC/ShortcutJump.hs
@@ -13,9 +13,9 @@ import GhcPrelude
 import SPARC.Instr
 import SPARC.Imm
 
-import CLabel
-import BlockId
-import Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm.BlockId
+import GHC.Cmm
 
 import Panic
 import Outputable
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 8cea28d920..14e7cb56ce 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -44,7 +44,7 @@ import X86.RegInfo
 
 import GHC.Platform.Regs
 import CPrim
-import Debug            ( DebugBlock(..), UnwindPoint(..), UnwindTable
+import GHC.Cmm.DebugBlock            ( DebugBlock(..), UnwindPoint(..), UnwindTable
                         , UnwindExpr(UwReg), toUnwindExpr )
 import Instruction
 import PIC
@@ -59,16 +59,16 @@ import GHC.Platform
 
 -- Our intermediate code:
 import BasicTypes
-import BlockId
+import GHC.Cmm.BlockId
 import Module           ( primUnitId )
-import CmmUtils
-import CmmSwitch
-import Cmm
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
-import CLabel
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.CLabel
 import CoreSyn          ( Tickish(..) )
 import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
 
@@ -360,7 +360,7 @@ stmtToInstrs bid stmt = do
       CmmBranch id          -> return $ genBranch id
 
       --We try to arrange blocks such that the likely branch is the fallthrough
-      --in CmmContFlowOpt. So we can assume the condition is likely false here.
+      --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
       CmmCondBranch arg true false _ -> genCondBranch bid true false arg
       CmmSwitch arg ids -> do dflags <- getDynFlags
                               genSwitch dflags arg ids
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 80a2c8b28e..4591464671 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -26,22 +26,22 @@ import RegClass
 import Reg
 import TargetReg
 
-import BlockId
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 import GHC.Platform.Regs
-import Cmm
+import GHC.Cmm
 import FastString
 import Outputable
 import GHC.Platform
 
 import BasicTypes       (Alignment)
-import CLabel
+import GHC.Cmm.CLabel
 import DynFlags
 import UniqSet
 import Unique
 import UniqSupply
-import Debug (UnwindTable)
+import GHC.Cmm.DebugBlock (UnwindTable)
 
 import Control.Monad
 import Data.Maybe       (fromMaybe)
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 76a806982e..d857a952ce 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -33,13 +33,13 @@ import Reg
 import PprBase
 
 
-import Hoopl.Collections
-import Hoopl.Label
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
 import BasicTypes       (Alignment, mkAlignment, alignmentBytes)
 import DynFlags
-import Cmm              hiding (topInfoTable)
-import BlockId
-import CLabel
+import GHC.Cmm              hiding (topInfoTable)
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
 import Unique           ( pprUniqueAlways )
 import GHC.Platform
 import FastString
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 24cdff89af..44f92017a1 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -55,8 +55,8 @@ import GHC.Platform.Regs
 import Reg
 import RegClass
 
-import Cmm
-import CLabel           ( CLabel )
+import GHC.Cmm
+import GHC.Cmm.CLabel           ( CLabel )
 import DynFlags
 import Outputable
 import GHC.Platform
diff --git a/compiler/prelude/PrimOp.hs b/compiler/prelude/PrimOp.hs
index c51304b85d..81d643fc66 100644
--- a/compiler/prelude/PrimOp.hs
+++ b/compiler/prelude/PrimOp.hs
@@ -30,7 +30,7 @@ import GhcPrelude
 import TysPrim
 import TysWiredIn
 
-import CmmType
+import GHC.Cmm.Type
 import Demand
 import Id               ( Id, mkVanillaGlobalWithInfo )
 import IdInfo           ( vanillaIdInfo, setCafInfo, CafInfo(NoCafRefs) )
diff --git a/compiler/profiling/ProfInit.hs b/compiler/profiling/ProfInit.hs
index 931299a655..f8dc8822ba 100644
--- a/compiler/profiling/ProfInit.hs
+++ b/compiler/profiling/ProfInit.hs
@@ -10,7 +10,7 @@ module ProfInit (profilingInitCode) where
 
 import GhcPrelude
 
-import CLabel
+import GHC.Cmm.CLabel
 import CostCentre
 import DynFlags
 import Outputable
diff --git a/ghc.mk b/ghc.mk
index 83a2853ddb..a7ebdfbdaa 100644
--- a/ghc.mk
+++ b/ghc.mk
@@ -1217,8 +1217,8 @@ sdist-ghc-prep-tree :
 
 # Add files generated by alex and happy.
 # These rules depend on sdist-ghc-prep-tree.
-$(eval $(call sdist-ghc-file,compiler,stage2,cmm,CmmLex,x))
-$(eval $(call sdist-ghc-file,compiler,stage2,cmm,CmmParse,y))
+$(eval $(call sdist-ghc-file,compiler,stage2,GHC,Cmm,Lexer,x))
+$(eval $(call sdist-ghc-file,compiler,stage2,GHC,Cmm,Parser,y))
 $(eval $(call sdist-ghc-file,compiler,stage2,parser,Lexer,x))
 $(eval $(call sdist-ghc-file,compiler,stage2,parser,Parser,y))
 $(eval $(call sdist-ghc-file,utils/hpc,dist-install,,HpcParser,y))
diff --git a/hadrian/src/Rules.hs b/hadrian/src/Rules.hs
index 3f6397fdcc..08f8b571f6 100644
--- a/hadrian/src/Rules.hs
+++ b/hadrian/src/Rules.hs
@@ -54,8 +54,8 @@ toolArgsTarget = do
     need [ root -/- dir -/- "Config.hs" ]
     need [ root -/- dir -/- "Parser.hs" ]
     need [ root -/- dir -/- "Lexer.hs" ]
-    need [ root -/- dir -/- "CmmParse.hs" ]
-    need [ root -/- dir -/- "CmmLex.hs" ]
+    need [ root -/- dir -/- "GHC" -/- "Cmm" -/- "Parser.hs" ]
+    need [ root -/- dir -/- "GHC" -/- "Cmm" -/- "Lexer.hs"  ]
 
     -- Find out the arguments that are needed to load a module into the
     -- session
diff --git a/hadrian/src/Rules/SourceDist.hs b/hadrian/src/Rules/SourceDist.hs
index 8eb215d9ea..b6b41f1677 100644
--- a/hadrian/src/Rules/SourceDist.hs
+++ b/hadrian/src/Rules/SourceDist.hs
@@ -146,8 +146,8 @@ prepareTree dest = do
     -- files, which implements exactly the logic that we
     -- have for 'alexHappyFiles' above.
     alexHappyFiles =
-        [ (Stage0, compiler, "CmmParse.y", Just "cmm", "CmmParse.hs")
-        , (Stage0, compiler, "CmmLex.x", Just "cmm", "CmmLex.hs")
+        [ (Stage0, compiler, "Parser.y", Just ("GHC" -/- "Cmm"), "Parser.hs")
+        , (Stage0, compiler, "Lexer.x", Just ("GHC" -/- "Cmm"), "Lexer.hs")
         , (Stage0, compiler, "Parser.y", Just "parser", "Parser.hs")
         , (Stage0, compiler, "Lexer.x", Just "parser", "Lexer.hs")
         , (Stage0, hpcBin, "HpcParser.y", Nothing, "HpcParser.hs")
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 546e81e8f6..4e2d1b1a22 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -7,7 +7,7 @@
  * making .cmm code a bit less error-prone to write, and a bit easier
  * on the eye for the reader.
  *
- * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * For the syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * Accessing fields of structures defined in the RTS header files is
  * done via automatically-generated macros in DerivedConstants.h.  For
@@ -469,7 +469,7 @@
 // Version of GC_PRIM for use in low-level Cmm.  We can call
 // stg_gc_prim, because it takes one argument and therefore has a
 // platform-independent calling convention (Note [Syntax of .cmm
-// files] in CmmParse.y).
+// files] in GHC.Cmm.Parser).
 #define GC_PRIM_LL(fun)                         \
         R1 = fun;                               \
         jump stg_gc_prim [R1];
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index b108a61c0a..228e16e55c 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -1,5 +1,5 @@
 
-import CmmExpr
+import GHC.Cmm.Expr
 #if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \
     || defined(MACHREGS_sparc) || defined(MACHREGS_powerpc))
 import PlainPanic
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index dcfaa446f2..f23a507402 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * -------------------------------------------------------------------------- */
 
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index 334d0ef823..726489e191 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 85fb1cbef6..461cf13df1 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 0486399b46..7f0b7d5d90 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -17,7 +17,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 03ea91fcb6..42c7d98d58 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * --------------------------------------------------------------------------*/
 
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm
index 571e0637fc..122eace1f3 100644
--- a/rts/StgStartup.cmm
+++ b/rts/StgStartup.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index 204cd1a04e..5239496be5 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index 9d00fb8efb..d459607752 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -6,7 +6,7 @@
  *
  * This file is written in a subset of C--, extended with various
  * features specific to GHC.  It is compiled by GHC directly.  For the
- * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
  *
  * ---------------------------------------------------------------------------*/
 
diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.hs b/testsuite/tests/cmm/should_run/HooplPostorder.hs
index 269efa4021..6171c7edf8 100644
--- a/testsuite/tests/cmm/should_run/HooplPostorder.hs
+++ b/testsuite/tests/cmm/should_run/HooplPostorder.hs
@@ -2,10 +2,10 @@
 {-# LANGUAGE KindSignatures #-}
 module Main where
 
-import Hoopl.Block
-import Hoopl.Collections
-import Hoopl.Graph
-import Hoopl.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
 
 import Data.Maybe
 
diff --git a/testsuite/tests/codeGen/should_run/T13825-unit.hs b/testsuite/tests/codeGen/should_run/T13825-unit.hs
index 24fc463b91..85777bfe72 100644
--- a/testsuite/tests/codeGen/should_run/T13825-unit.hs
+++ b/testsuite/tests/codeGen/should_run/T13825-unit.hs
@@ -2,7 +2,7 @@ module Main where
 
 import DynFlags
 import GHC.Types.RepType
-import SMRep
+import GHC.Runtime.Layout
 import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Closure
 import GHC
diff --git a/testsuite/tests/regalloc/regalloc_unit_tests.hs b/testsuite/tests/regalloc/regalloc_unit_tests.hs
index 5c6d9da624..cbd0361d15 100644
--- a/testsuite/tests/regalloc/regalloc_unit_tests.hs
+++ b/testsuite/tests/regalloc/regalloc_unit_tests.hs
@@ -26,13 +26,13 @@ import qualified X86.Instr
 import HscMain
 import GHC.StgToCmm.CgUtils
 import AsmCodeGen
-import CmmBuildInfoTables
-import CmmPipeline
-import CmmParse
-import CmmInfo
-import Cmm
+import GHC.Cmm.Info.Build
+import GHC.Cmm.Pipeline
+import GHC.Cmm.Parser
+import GHC.Cmm.Info
+import GHC.Cmm
 import Module
-import Debug
+import GHC.Cmm.DebugBlock
 import GHC
 import GhcMonad
 import UniqFM
-- 
cgit v1.2.1