summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToAsm/Reg
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/CmmToAsm/Reg')
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph.hs472
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/Base.hs163
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs99
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs382
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs616
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs317
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs346
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs274
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Graph/X86.hs161
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear.hs920
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/Base.hs141
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs89
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs378
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs60
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs188
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs61
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/State.hs184
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs87
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86.hs52
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs53
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Liveness.hs1025
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Target.hs135
22 files changed, 6203 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph.hs b/compiler/GHC/CmmToAsm/Reg/Graph.hs
new file mode 100644
index 0000000000..6dfe84cf95
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph.hs
@@ -0,0 +1,472 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-- | Graph coloring register allocator.
+module GHC.CmmToAsm.Reg.Graph (
+ regAlloc
+) where
+import GhcPrelude
+
+import qualified GraphColor as Color
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Reg.Graph.Spill
+import GHC.CmmToAsm.Reg.Graph.SpillClean
+import GHC.CmmToAsm.Reg.Graph.SpillCost
+import GHC.CmmToAsm.Reg.Graph.Stats
+import GHC.CmmToAsm.Reg.Graph.TrivColorable
+import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.Reg.Target
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import Bag
+import GHC.Driver.Session
+import Outputable
+import GHC.Platform
+import UniqFM
+import UniqSet
+import UniqSupply
+import Util (seqList)
+import GHC.CmmToAsm.CFG
+
+import Data.Maybe
+import Control.Monad
+
+
+-- | The maximum number of build\/spill cycles we'll allow.
+--
+-- It should only take 3 or 4 cycles for the allocator to converge.
+-- If it takes any longer than this it's probably in an infinite loop,
+-- so it's better just to bail out and report a bug.
+maxSpinCount :: Int
+maxSpinCount = 10
+
+
+-- | The top level of the graph coloring register allocator.
+regAlloc
+ :: (Outputable statics, Outputable instr, Instruction instr)
+ => DynFlags
+ -> UniqFM (UniqSet RealReg) -- ^ registers we can use for allocation
+ -> UniqSet Int -- ^ set of available spill slots.
+ -> Int -- ^ current number of spill slots
+ -> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information.
+ -> Maybe CFG -- ^ CFG of basic blocks if available
+ -> UniqSM ( [NatCmmDecl statics instr]
+ , Maybe Int, [RegAllocStats statics instr] )
+ -- ^ code with registers allocated, additional stacks required
+ -- and stats for each stage of allocation
+
+regAlloc dflags regsFree slotsFree slotsCount code cfg
+ = do
+ -- TODO: the regClass function is currently hard coded to the default
+ -- target architecture. Would prefer to determine this from dflags.
+ -- There are other uses of targetRegClass later in this module.
+ let platform = targetPlatform dflags
+ triv = trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform)
+
+ (code_final, debug_codeGraphs, slotsCount', _)
+ <- regAlloc_spin dflags 0
+ triv
+ regsFree slotsFree slotsCount [] code cfg
+
+ let needStack
+ | slotsCount == slotsCount'
+ = Nothing
+ | otherwise
+ = Just slotsCount'
+
+ return ( code_final
+ , needStack
+ , reverse debug_codeGraphs )
+
+
+-- | Perform solver iterations for the graph coloring allocator.
+--
+-- We extract a register conflict graph from the provided cmm code,
+-- and try to colour it. If that works then we use the solution rewrite
+-- the code with real hregs. If coloring doesn't work we add spill code
+-- and try to colour it again. After `maxSpinCount` iterations we give up.
+--
+regAlloc_spin
+ :: forall instr statics.
+ (Instruction instr,
+ Outputable instr,
+ Outputable statics)
+ => DynFlags
+ -> Int -- ^ Number of solver iterations we've already performed.
+ -> Color.Triv VirtualReg RegClass RealReg
+ -- ^ Function for calculating whether a register is trivially
+ -- colourable.
+ -> UniqFM (UniqSet RealReg) -- ^ Free registers that we can allocate.
+ -> UniqSet Int -- ^ Free stack slots that we can use.
+ -> Int -- ^ Number of spill slots in use
+ -> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to.
+ -> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate.
+ -> Maybe CFG
+ -> UniqSM ( [NatCmmDecl statics instr]
+ , [RegAllocStats statics instr]
+ , Int -- Slots in use
+ , Color.Graph VirtualReg RegClass RealReg)
+
+regAlloc_spin dflags spinCount triv regsFree slotsFree slotsCount debug_codeGraphs code cfg
+ = do
+ let platform = targetPlatform dflags
+
+ -- If any of these dump flags are turned on we want to hang on to
+ -- intermediate structures in the allocator - otherwise tell the
+ -- allocator to ditch them early so we don't end up creating space leaks.
+ let dump = or
+ [ dopt Opt_D_dump_asm_regalloc_stages dflags
+ , dopt Opt_D_dump_asm_stats dflags
+ , dopt Opt_D_dump_asm_conflicts dflags ]
+
+ -- Check that we're not running off down the garden path.
+ when (spinCount > maxSpinCount)
+ $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
+ ( text "It looks like the register allocator is stuck in an infinite loop."
+ $$ text "max cycles = " <> int maxSpinCount
+ $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
+ $ nonDetEltsUniqSet $ unionManyUniqSets
+ $ nonDetEltsUFM regsFree)
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
+
+ -- Build the register conflict graph from the cmm code.
+ (graph :: Color.Graph VirtualReg RegClass RealReg)
+ <- {-# SCC "BuildGraph" #-} buildGraph code
+
+ -- VERY IMPORTANT:
+ -- We really do want the graph to be fully evaluated _before_ we
+ -- start coloring. If we don't do this now then when the call to
+ -- Color.colorGraph forces bits of it, the heap will be filled with
+ -- half evaluated pieces of graph and zillions of apply thunks.
+ seqGraph graph `seq` return ()
+
+ -- Build a map of the cost of spilling each instruction.
+ -- This is a lazy binding, so the map will only be computed if we
+ -- actually have to spill to the stack.
+ let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo
+ $ map (slurpSpillCostInfo platform cfg) code
+
+ -- The function to choose regs to leave uncolored.
+ let spill = chooseSpill spillCosts
+
+ -- Record startup state in our log.
+ let stat1
+ = if spinCount == 0
+ then Just $ RegAllocStatsStart
+ { raLiveCmm = code
+ , raGraph = graph
+ , raSpillCosts = spillCosts }
+ else Nothing
+
+ -- Try and color the graph.
+ let (graph_colored, rsSpill, rmCoalesce)
+ = {-# SCC "ColorGraph" #-}
+ Color.colorGraph
+ (gopt Opt_RegsIterative dflags)
+ spinCount
+ regsFree triv spill graph
+
+ -- Rewrite registers in the code that have been coalesced.
+ let patchF reg
+ | RegVirtual vr <- reg
+ = case lookupUFM rmCoalesce vr of
+ Just vr' -> patchF (RegVirtual vr')
+ Nothing -> reg
+
+ | otherwise
+ = reg
+
+ let (code_coalesced :: [LiveCmmDecl statics instr])
+ = map (patchEraseLive patchF) code
+
+ -- Check whether we've found a coloring.
+ if isEmptyUniqSet rsSpill
+
+ -- Coloring was successful because no registers needed to be spilled.
+ then do
+ -- if -fasm-lint is turned on then validate the graph.
+ -- This checks for bugs in the graph allocator itself.
+ let graph_colored_lint =
+ if gopt Opt_DoAsmLinting dflags
+ then Color.validateGraph (text "")
+ True -- Require all nodes to be colored.
+ graph_colored
+ else graph_colored
+
+ -- Rewrite the code to use real hregs, using the colored graph.
+ let code_patched
+ = map (patchRegsFromGraph platform graph_colored_lint)
+ code_coalesced
+
+ -- Clean out unneeded SPILL/RELOAD meta instructions.
+ -- The spill code generator just spills the entire live range
+ -- of a vreg, but it might not need to be on the stack for
+ -- its entire lifetime.
+ let code_spillclean
+ = map (cleanSpills platform) code_patched
+
+ -- Strip off liveness information from the allocated code.
+ -- Also rewrite SPILL/RELOAD meta instructions into real machine
+ -- instructions along the way
+ let code_final
+ = map (stripLive dflags) code_spillclean
+
+ -- Record what happened in this stage for debugging
+ let stat
+ = RegAllocStatsColored
+ { raCode = code
+ , raGraph = graph
+ , raGraphColored = graph_colored_lint
+ , raCoalesced = rmCoalesce
+ , raCodeCoalesced = code_coalesced
+ , raPatched = code_patched
+ , raSpillClean = code_spillclean
+ , raFinal = code_final
+ , raSRMs = foldl' addSRM (0, 0, 0)
+ $ map countSRMs code_spillclean }
+
+ -- Bundle up all the register allocator statistics.
+ -- .. but make sure to drop them on the floor if they're not
+ -- needed, otherwise we'll get a space leak.
+ let statList =
+ if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
+ else []
+
+ -- Ensure all the statistics are evaluated, to avoid space leaks.
+ seqList statList (return ())
+
+ return ( code_final
+ , statList
+ , slotsCount
+ , graph_colored_lint)
+
+ -- Coloring was unsuccessful. We need to spill some register to the
+ -- stack, make a new graph, and try to color it again.
+ else do
+ -- if -fasm-lint is turned on then validate the graph
+ let graph_colored_lint =
+ if gopt Opt_DoAsmLinting dflags
+ then Color.validateGraph (text "")
+ False -- don't require nodes to be colored
+ graph_colored
+ else graph_colored
+
+ -- Spill uncolored regs to the stack.
+ (code_spilled, slotsFree', slotsCount', spillStats)
+ <- regSpill platform code_coalesced slotsFree slotsCount rsSpill
+
+ -- Recalculate liveness information.
+ -- NOTE: we have to reverse the SCCs here to get them back into
+ -- the reverse-dependency order required by computeLiveness.
+ -- If they're not in the correct order that function will panic.
+ code_relive <- mapM (regLiveness platform . reverseBlocksInTops)
+ code_spilled
+
+ -- Record what happened in this stage for debugging.
+ let stat =
+ RegAllocStatsSpill
+ { raCode = code
+ , raGraph = graph_colored_lint
+ , raCoalesced = rmCoalesce
+ , raSpillStats = spillStats
+ , raSpillCosts = spillCosts
+ , raSpilled = code_spilled }
+
+ -- Bundle up all the register allocator statistics.
+ -- .. but make sure to drop them on the floor if they're not
+ -- needed, otherwise we'll get a space leak.
+ let statList =
+ if dump
+ then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
+ else []
+
+ -- Ensure all the statistics are evaluated, to avoid space leaks.
+ seqList statList (return ())
+
+ regAlloc_spin dflags (spinCount + 1) triv regsFree slotsFree'
+ slotsCount' statList code_relive cfg
+
+
+-- | Build a graph from the liveness and coalesce information in this code.
+buildGraph
+ :: Instruction instr
+ => [LiveCmmDecl statics instr]
+ -> UniqSM (Color.Graph VirtualReg RegClass RealReg)
+
+buildGraph code
+ = do
+ -- Slurp out the conflicts and reg->reg moves from this code.
+ let (conflictList, moveList) =
+ unzip $ map slurpConflicts code
+
+ -- Slurp out the spill/reload coalesces.
+ let moveList2 = map slurpReloadCoalesce code
+
+ -- Add the reg-reg conflicts to the graph.
+ let conflictBag = unionManyBags conflictList
+ let graph_conflict
+ = foldr graphAddConflictSet Color.initGraph conflictBag
+
+ -- Add the coalescences edges to the graph.
+ let moveBag
+ = unionBags (unionManyBags moveList2)
+ (unionManyBags moveList)
+
+ let graph_coalesce
+ = foldr graphAddCoalesce graph_conflict moveBag
+
+ return graph_coalesce
+
+
+-- | Add some conflict edges to the graph.
+-- Conflicts between virtual and real regs are recorded as exclusions.
+graphAddConflictSet
+ :: UniqSet Reg
+ -> Color.Graph VirtualReg RegClass RealReg
+ -> Color.Graph VirtualReg RegClass RealReg
+
+graphAddConflictSet set graph
+ = let virtuals = mkUniqSet
+ [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+
+ graph1 = Color.addConflicts virtuals classOfVirtualReg graph
+
+ graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2)
+ graph1
+ [ (vr, rr)
+ | RegVirtual vr <- nonDetEltsUniqSet set
+ , RegReal rr <- nonDetEltsUniqSet set]
+ -- See Note [Unique Determinism and code generation]
+
+ in graph2
+
+
+-- | Add some coalesence edges to the graph
+-- Coalesences between virtual and real regs are recorded as preferences.
+graphAddCoalesce
+ :: (Reg, Reg)
+ -> Color.Graph VirtualReg RegClass RealReg
+ -> Color.Graph VirtualReg RegClass RealReg
+
+graphAddCoalesce (r1, r2) graph
+ | RegReal rr <- r1
+ , RegVirtual vr <- r2
+ = Color.addPreference (vr, classOfVirtualReg vr) rr graph
+
+ | RegReal rr <- r2
+ , RegVirtual vr <- r1
+ = Color.addPreference (vr, classOfVirtualReg vr) rr graph
+
+ | RegVirtual vr1 <- r1
+ , RegVirtual vr2 <- r2
+ = Color.addCoalesce
+ (vr1, classOfVirtualReg vr1)
+ (vr2, classOfVirtualReg vr2)
+ graph
+
+ -- We can't coalesce two real regs, but there could well be existing
+ -- hreg,hreg moves in the input code. We'll just ignore these
+ -- for coalescing purposes.
+ | RegReal _ <- r1
+ , RegReal _ <- r2
+ = graph
+
+#if __GLASGOW_HASKELL__ <= 810
+ | otherwise
+ = panic "graphAddCoalesce"
+#endif
+
+
+-- | Patch registers in code using the reg -> reg mapping in this graph.
+patchRegsFromGraph
+ :: (Outputable statics, Outputable instr, Instruction instr)
+ => Platform -> Color.Graph VirtualReg RegClass RealReg
+ -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
+
+patchRegsFromGraph platform graph code
+ = patchEraseLive patchF code
+ where
+ -- Function to lookup the hardreg for a virtual reg from the graph.
+ patchF reg
+ -- leave real regs alone.
+ | RegReal{} <- reg
+ = reg
+
+ -- this virtual has a regular node in the graph.
+ | RegVirtual vr <- reg
+ , Just node <- Color.lookupNode graph vr
+ = case Color.nodeColor node of
+ Just color -> RegReal color
+ Nothing -> RegVirtual vr
+
+ -- no node in the graph for this virtual, bad news.
+ | otherwise
+ = pprPanic "patchRegsFromGraph: register mapping failed."
+ ( text "There is no node in the graph for register "
+ <> ppr reg
+ $$ ppr code
+ $$ Color.dotGraph
+ (\_ -> text "white")
+ (trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ graph)
+
+
+-----
+-- for when laziness just isn't what you wanted...
+-- We need to deepSeq the whole graph before trying to colour it to avoid
+-- space leaks.
+seqGraph :: Color.Graph VirtualReg RegClass RealReg -> ()
+seqGraph graph = seqNodes (nonDetEltsUFM (Color.graphMap graph))
+ -- See Note [Unique Determinism and code generation]
+
+seqNodes :: [Color.Node VirtualReg RegClass RealReg] -> ()
+seqNodes ns
+ = case ns of
+ [] -> ()
+ (n : ns) -> seqNode n `seq` seqNodes ns
+
+seqNode :: Color.Node VirtualReg RegClass RealReg -> ()
+seqNode node
+ = seqVirtualReg (Color.nodeId node)
+ `seq` seqRegClass (Color.nodeClass node)
+ `seq` seqMaybeRealReg (Color.nodeColor node)
+ `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeConflicts node)))
+ `seq` (seqRealRegList (nonDetEltsUniqSet (Color.nodeExclusions node)))
+ `seq` (seqRealRegList (Color.nodePreference node))
+ `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeCoalesce node)))
+ -- It's OK to use nonDetEltsUniqSet for seq
+
+seqVirtualReg :: VirtualReg -> ()
+seqVirtualReg reg = reg `seq` ()
+
+seqRealReg :: RealReg -> ()
+seqRealReg reg = reg `seq` ()
+
+seqRegClass :: RegClass -> ()
+seqRegClass c = c `seq` ()
+
+seqMaybeRealReg :: Maybe RealReg -> ()
+seqMaybeRealReg mr
+ = case mr of
+ Nothing -> ()
+ Just r -> seqRealReg r
+
+seqVirtualRegList :: [VirtualReg] -> ()
+seqVirtualRegList rs
+ = case rs of
+ [] -> ()
+ (r : rs) -> seqVirtualReg r `seq` seqVirtualRegList rs
+
+seqRealRegList :: [RealReg] -> ()
+seqRealRegList rs
+ = case rs of
+ [] -> ()
+ (r : rs) -> seqRealReg r `seq` seqRealRegList rs
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs
new file mode 100644
index 0000000000..95fa174415
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs
@@ -0,0 +1,163 @@
+
+-- | Utils for calculating general worst, bound, squeese and free, functions.
+--
+-- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation"
+-- Michael Smith, Normal Ramsey, Glenn Holloway.
+-- PLDI 2004
+--
+-- These general versions are not used in GHC proper because they are too slow.
+-- Instead, hand written optimised versions are provided for each architecture
+-- in MachRegs*.hs
+--
+-- This code is here because we can test the architecture specific code against
+-- it.
+--
+module GHC.CmmToAsm.Reg.Graph.Base (
+ RegClass(..),
+ Reg(..),
+ RegSub(..),
+
+ worst,
+ bound,
+ squeese
+) where
+
+import GhcPrelude
+
+import UniqSet
+import UniqFM
+import Unique
+import MonadUtils (concatMapM)
+
+
+-- Some basic register classes.
+-- These aren't necessarily in 1-to-1 correspondence with the allocatable
+-- RegClasses in MachRegs.hs
+data RegClass
+ -- general purpose regs
+ = ClassG32 -- 32 bit GPRs
+ | ClassG16 -- 16 bit GPRs
+ | ClassG8 -- 8 bit GPRs
+
+ -- floating point regs
+ | ClassF64 -- 64 bit FPRs
+ deriving (Show, Eq, Enum)
+
+
+-- | A register of some class
+data Reg
+ -- a register of some class
+ = Reg RegClass Int
+
+ -- a sub-component of one of the other regs
+ | RegSub RegSub Reg
+ deriving (Show, Eq)
+
+
+-- | so we can put regs in UniqSets
+instance Uniquable Reg where
+ getUnique (Reg c i)
+ = mkRegSingleUnique
+ $ fromEnum c * 1000 + i
+
+ getUnique (RegSub s (Reg c i))
+ = mkRegSubUnique
+ $ fromEnum s * 10000 + fromEnum c * 1000 + i
+
+ getUnique (RegSub _ (RegSub _ _))
+ = error "RegArchBase.getUnique: can't have a sub-reg of a sub-reg."
+
+
+-- | A subcomponent of another register
+data RegSub
+ = SubL16 -- lowest 16 bits
+ | SubL8 -- lowest 8 bits
+ | SubL8H -- second lowest 8 bits
+ deriving (Show, Enum, Ord, Eq)
+
+
+-- | Worst case displacement
+--
+-- a node N of classN has some number of neighbors,
+-- all of which are from classC.
+--
+-- (worst neighbors classN classC) is the maximum number of potential
+-- colors for N that can be lost by coloring its neighbors.
+--
+-- This should be hand coded/cached for each particular architecture,
+-- because the compute time is very long..
+worst :: (RegClass -> UniqSet Reg)
+ -> (Reg -> UniqSet Reg)
+ -> Int -> RegClass -> RegClass -> Int
+
+worst regsOfClass regAlias neighbors classN classC
+ = let regAliasS regs = unionManyUniqSets
+ $ map regAlias
+ $ nonDetEltsUniqSet regs
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+
+ -- all the regs in classes N, C
+ regsN = regsOfClass classN
+ regsC = regsOfClass classC
+
+ -- all the possible subsets of c which have size < m
+ regsS = filter (\s -> sizeUniqSet s >= 1
+ && sizeUniqSet s <= neighbors)
+ $ powersetLS regsC
+
+ -- for each of the subsets of C, the regs which conflict
+ -- with posiblities for N
+ regsS_conflict
+ = map (\s -> intersectUniqSets regsN (regAliasS s)) regsS
+
+ in maximum $ map sizeUniqSet $ regsS_conflict
+
+
+-- | For a node N of classN and neighbors of classesC
+-- (bound classN classesC) is the maximum number of potential
+-- colors for N that can be lost by coloring its neighbors.
+bound :: (RegClass -> UniqSet Reg)
+ -> (Reg -> UniqSet Reg)
+ -> RegClass -> [RegClass] -> Int
+
+bound regsOfClass regAlias classN classesC
+ = let regAliasS regs = unionManyUniqSets
+ $ map regAlias
+ $ nonDetEltsUFM regs
+ -- See Note [Unique Determinism and code generation]
+
+ regsC_aliases
+ = unionManyUniqSets
+ $ map (regAliasS . getUniqSet . regsOfClass) classesC
+
+ overlap = intersectUniqSets (regsOfClass classN) regsC_aliases
+
+ in sizeUniqSet overlap
+
+
+-- | The total squeese on a particular node with a list of neighbors.
+--
+-- A version of this should be constructed for each particular architecture,
+-- possibly including uses of bound, so that alised registers don't get
+-- counted twice, as per the paper.
+squeese :: (RegClass -> UniqSet Reg)
+ -> (Reg -> UniqSet Reg)
+ -> RegClass -> [(Int, RegClass)] -> Int
+
+squeese regsOfClass regAlias classN countCs
+ = sum
+ $ map (\(i, classC) -> worst regsOfClass regAlias i classN classC)
+ $ countCs
+
+
+-- | powerset (for lists)
+powersetL :: [a] -> [[a]]
+powersetL = concatMapM (\x -> [[],[x]])
+
+
+-- | powersetLS (list of sets)
+powersetLS :: Uniquable a => UniqSet a -> [UniqSet a]
+powersetLS s = map mkUniqSet $ powersetL $ nonDetEltsUniqSet s
+ -- See Note [Unique Determinism and code generation]
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
new file mode 100644
index 0000000000..d223137dd0
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
@@ -0,0 +1,99 @@
+-- | Register coalescing.
+module GHC.CmmToAsm.Reg.Graph.Coalesce (
+ regCoalesce,
+ slurpJoinMovs
+) where
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+
+import GHC.Cmm
+import Bag
+import Digraph
+import UniqFM
+import UniqSet
+import UniqSupply
+
+
+-- | Do register coalescing on this top level thing
+--
+-- For Reg -> Reg moves, if the first reg dies at the same time the
+-- second reg is born then the mov only serves to join live ranges.
+-- The two regs can be renamed to be the same and the move instruction
+-- safely erased.
+regCoalesce
+ :: Instruction instr
+ => [LiveCmmDecl statics instr]
+ -> UniqSM [LiveCmmDecl statics instr]
+
+regCoalesce code
+ = do
+ let joins = foldl' unionBags emptyBag
+ $ map slurpJoinMovs code
+
+ let alloc = foldl' buildAlloc emptyUFM
+ $ bagToList joins
+
+ let patched = map (patchEraseLive (sinkReg alloc)) code
+
+ return patched
+
+
+-- | Add a v1 = v2 register renaming to the map.
+-- The register with the lowest lexical name is set as the
+-- canonical version.
+buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg
+buildAlloc fm (r1, r2)
+ = let rmin = min r1 r2
+ rmax = max r1 r2
+ in addToUFM fm rmax rmin
+
+
+-- | Determine the canonical name for a register by following
+-- v1 = v2 renamings in this map.
+sinkReg :: UniqFM Reg -> Reg -> Reg
+sinkReg fm r
+ = case lookupUFM fm r of
+ Nothing -> r
+ Just r' -> sinkReg fm r'
+
+
+-- | Slurp out mov instructions that only serve to join live ranges.
+--
+-- During a mov, if the source reg dies and the destination reg is
+-- born then we can rename the two regs to the same thing and
+-- eliminate the move.
+slurpJoinMovs
+ :: Instruction instr
+ => LiveCmmDecl statics instr
+ -> Bag (Reg, Reg)
+
+slurpJoinMovs live
+ = slurpCmm emptyBag live
+ where
+ slurpCmm rs CmmData{}
+ = rs
+
+ slurpCmm rs (CmmProc _ _ _ sccs)
+ = foldl' slurpBlock rs (flattenSCCs sccs)
+
+ slurpBlock rs (BasicBlock _ instrs)
+ = foldl' slurpLI rs instrs
+
+ slurpLI rs (LiveInstr _ Nothing) = rs
+ slurpLI rs (LiveInstr instr (Just live))
+ | Just (r1, r2) <- takeRegRegMoveInstr instr
+ , elementOfUniqSet r1 $ liveDieRead live
+ , elementOfUniqSet r2 $ liveBorn live
+
+ -- only coalesce movs between two virtuals for now,
+ -- else we end up with allocatable regs in the live
+ -- regs list..
+ , isVirtualReg r1 && isVirtualReg r2
+ = consBag (r1, r2) rs
+
+ | otherwise
+ = rs
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
new file mode 100644
index 0000000000..a0e11433f7
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
@@ -0,0 +1,382 @@
+
+-- | When there aren't enough registers to hold all the vregs we have to spill
+-- some of those vregs to slots on the stack. This module is used modify the
+-- code to use those slots.
+module GHC.CmmToAsm.Reg.Graph.Spill (
+ regSpill,
+ SpillStats(..),
+ accSpillSL
+) where
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+import GHC.Cmm hiding (RegSet)
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+
+import MonadUtils
+import State
+import Unique
+import UniqFM
+import UniqSet
+import UniqSupply
+import Outputable
+import GHC.Platform
+
+import Data.List
+import Data.Maybe
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IntSet
+
+
+-- | Spill all these virtual regs to stack slots.
+--
+-- Bumps the number of required stack slots if required.
+--
+--
+-- TODO: See if we can split some of the live ranges instead of just globally
+-- spilling the virtual reg. This might make the spill cleaner's job easier.
+--
+-- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction
+-- when making spills. If an instr is using a spilled virtual we may be able to
+-- address the spill slot directly.
+--
+regSpill
+ :: Instruction instr
+ => Platform
+ -> [LiveCmmDecl statics instr] -- ^ the code
+ -> UniqSet Int -- ^ available stack slots
+ -> Int -- ^ current number of spill slots.
+ -> UniqSet VirtualReg -- ^ the regs to spill
+ -> UniqSM
+ ([LiveCmmDecl statics instr]
+ -- code with SPILL and RELOAD meta instructions added.
+ , UniqSet Int -- left over slots
+ , Int -- slot count in use now.
+ , SpillStats ) -- stats about what happened during spilling
+
+regSpill platform code slotsFree slotCount regs
+
+ -- Not enough slots to spill these regs.
+ | sizeUniqSet slotsFree < sizeUniqSet regs
+ = -- pprTrace "Bumping slot count:" (ppr slotCount <> text " -> " <> ppr (slotCount+512)) $
+ let slotsFree' = (addListToUniqSet slotsFree [slotCount+1 .. slotCount+512])
+ in regSpill platform code slotsFree' (slotCount+512) regs
+
+ | otherwise
+ = do
+ -- Allocate a slot for each of the spilled regs.
+ let slots = take (sizeUniqSet regs) $ nonDetEltsUniqSet slotsFree
+ let regSlotMap = listToUFM
+ $ zip (nonDetEltsUniqSet regs) slots
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+
+ -- Grab the unique supply from the monad.
+ us <- getUniqueSupplyM
+
+ -- Run the spiller on all the blocks.
+ let (code', state') =
+ runState (mapM (regSpill_top platform regSlotMap) code)
+ (initSpillS us)
+
+ return ( code'
+ , minusUniqSet slotsFree (mkUniqSet slots)
+ , slotCount
+ , makeSpillStats state')
+
+
+-- | Spill some registers to stack slots in a top-level thing.
+regSpill_top
+ :: Instruction instr
+ => Platform
+ -> RegMap Int
+ -- ^ map of vregs to slots they're being spilled to.
+ -> LiveCmmDecl statics instr
+ -- ^ the top level thing.
+ -> SpillM (LiveCmmDecl statics instr)
+
+regSpill_top platform regSlotMap cmm
+ = case cmm of
+ CmmData{}
+ -> return cmm
+
+ CmmProc info label live sccs
+ | LiveInfo static firstId liveVRegsOnEntry liveSlotsOnEntry <- info
+ -> do
+ -- The liveVRegsOnEntry contains the set of vregs that are live
+ -- on entry to each basic block. If we spill one of those vregs
+ -- we remove it from that set and add the corresponding slot
+ -- number to the liveSlotsOnEntry set. The spill cleaner needs
+ -- this information to erase unneeded spill and reload instructions
+ -- after we've done a successful allocation.
+ let liveSlotsOnEntry' :: BlockMap IntSet
+ liveSlotsOnEntry'
+ = mapFoldlWithKey patchLiveSlot
+ liveSlotsOnEntry liveVRegsOnEntry
+
+ let info'
+ = LiveInfo static firstId
+ liveVRegsOnEntry
+ liveSlotsOnEntry'
+
+ -- Apply the spiller to all the basic blocks in the CmmProc.
+ sccs' <- mapM (mapSCCM (regSpill_block platform regSlotMap)) sccs
+
+ return $ CmmProc info' label live sccs'
+
+ where -- Given a BlockId and the set of registers live in it,
+ -- if registers in this block are being spilled to stack slots,
+ -- then record the fact that these slots are now live in those blocks
+ -- in the given slotmap.
+ patchLiveSlot
+ :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet
+
+ patchLiveSlot slotMap blockId regsLive
+ = let
+ -- Slots that are already recorded as being live.
+ curSlotsLive = fromMaybe IntSet.empty
+ $ mapLookup blockId slotMap
+
+ moreSlotsLive = IntSet.fromList
+ $ catMaybes
+ $ map (lookupUFM regSlotMap)
+ $ nonDetEltsUniqSet regsLive
+ -- See Note [Unique Determinism and code generation]
+
+ slotMap'
+ = mapInsert blockId (IntSet.union curSlotsLive moreSlotsLive)
+ slotMap
+
+ in slotMap'
+
+
+-- | Spill some registers to stack slots in a basic block.
+regSpill_block
+ :: Instruction instr
+ => Platform
+ -> UniqFM Int -- ^ map of vregs to slots they're being spilled to.
+ -> LiveBasicBlock instr
+ -> SpillM (LiveBasicBlock instr)
+
+regSpill_block platform regSlotMap (BasicBlock i instrs)
+ = do instrss' <- mapM (regSpill_instr platform regSlotMap) instrs
+ return $ BasicBlock i (concat instrss')
+
+
+-- | Spill some registers to stack slots in a single instruction.
+-- If the instruction uses registers that need to be spilled, then it is
+-- prefixed (or postfixed) with the appropriate RELOAD or SPILL meta
+-- instructions.
+regSpill_instr
+ :: Instruction instr
+ => Platform
+ -> UniqFM Int -- ^ map of vregs to slots they're being spilled to.
+ -> LiveInstr instr
+ -> SpillM [LiveInstr instr]
+
+regSpill_instr _ _ li@(LiveInstr _ Nothing)
+ = do return [li]
+
+regSpill_instr platform regSlotMap
+ (LiveInstr instr (Just _))
+ = do
+ -- work out which regs are read and written in this instr
+ let RU rlRead rlWritten = regUsageOfInstr platform instr
+
+ -- sometimes a register is listed as being read more than once,
+ -- nub this so we don't end up inserting two lots of spill code.
+ let rsRead_ = nub rlRead
+ let rsWritten_ = nub rlWritten
+
+ -- if a reg is modified, it appears in both lists, want to undo this..
+ let rsRead = rsRead_ \\ rsWritten_
+ let rsWritten = rsWritten_ \\ rsRead_
+ let rsModify = intersect rsRead_ rsWritten_
+
+ -- work out if any of the regs being used are currently being spilled.
+ let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead
+ let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten
+ let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify
+
+ -- rewrite the instr and work out spill code.
+ (instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead
+ (instr2, prepost2) <- mapAccumLM (spillWrite regSlotMap) instr1 rsSpillWritten
+ (instr3, prepost3) <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify
+
+ let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3)
+ let prefixes = concat mPrefixes
+ let postfixes = concat mPostfixes
+
+ -- final code
+ let instrs' = prefixes
+ ++ [LiveInstr instr3 Nothing]
+ ++ postfixes
+
+ return $ instrs'
+
+
+-- | Add a RELOAD met a instruction to load a value for an instruction that
+-- writes to a vreg that is being spilled.
+spillRead
+ :: Instruction instr
+ => UniqFM Int
+ -> instr
+ -> Reg
+ -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
+
+spillRead regSlotMap instr reg
+ | Just slot <- lookupUFM regSlotMap reg
+ = do (instr', nReg) <- patchInstr reg instr
+
+ modify $ \s -> s
+ { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) }
+
+ return ( instr'
+ , ( [LiveInstr (RELOAD slot nReg) Nothing]
+ , []) )
+
+ | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg"
+
+
+-- | Add a SPILL meta instruction to store a value for an instruction that
+-- writes to a vreg that is being spilled.
+spillWrite
+ :: Instruction instr
+ => UniqFM Int
+ -> instr
+ -> Reg
+ -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
+
+spillWrite regSlotMap instr reg
+ | Just slot <- lookupUFM regSlotMap reg
+ = do (instr', nReg) <- patchInstr reg instr
+
+ modify $ \s -> s
+ { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) }
+
+ return ( instr'
+ , ( []
+ , [LiveInstr (SPILL nReg slot) Nothing]))
+
+ | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg"
+
+
+-- | Add both RELOAD and SPILL meta instructions for an instruction that
+-- both reads and writes to a vreg that is being spilled.
+spillModify
+ :: Instruction instr
+ => UniqFM Int
+ -> instr
+ -> Reg
+ -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
+
+spillModify regSlotMap instr reg
+ | Just slot <- lookupUFM regSlotMap reg
+ = do (instr', nReg) <- patchInstr reg instr
+
+ modify $ \s -> s
+ { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) }
+
+ return ( instr'
+ , ( [LiveInstr (RELOAD slot nReg) Nothing]
+ , [LiveInstr (SPILL nReg slot) Nothing]))
+
+ | otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg"
+
+
+-- | Rewrite uses of this virtual reg in an instr to use a different
+-- virtual reg.
+patchInstr
+ :: Instruction instr
+ => Reg -> instr -> SpillM (instr, Reg)
+
+patchInstr reg instr
+ = do nUnique <- newUnique
+
+ -- The register we're rewriting is supposed to be virtual.
+ -- If it's not then something has gone horribly wrong.
+ let nReg
+ = case reg of
+ RegVirtual vr
+ -> RegVirtual (renameVirtualReg nUnique vr)
+
+ RegReal{}
+ -> panic "RegAlloc.Graph.Spill.patchIntr: not patching real reg"
+
+ let instr' = patchReg1 reg nReg instr
+ return (instr', nReg)
+
+
+patchReg1
+ :: Instruction instr
+ => Reg -> Reg -> instr -> instr
+
+patchReg1 old new instr
+ = let patchF r
+ | r == old = new
+ | otherwise = r
+ in patchRegsOfInstr instr patchF
+
+
+-- Spiller monad --------------------------------------------------------------
+-- | State monad for the spill code generator.
+type SpillM a
+ = State SpillS a
+
+-- | Spill code generator state.
+data SpillS
+ = SpillS
+ { -- | Unique supply for generating fresh vregs.
+ stateUS :: UniqSupply
+
+ -- | Spilled vreg vs the number of times it was loaded, stored.
+ , stateSpillSL :: UniqFM (Reg, Int, Int) }
+
+
+-- | Create a new spiller state.
+initSpillS :: UniqSupply -> SpillS
+initSpillS uniqueSupply
+ = SpillS
+ { stateUS = uniqueSupply
+ , stateSpillSL = emptyUFM }
+
+
+-- | Allocate a new unique in the spiller monad.
+newUnique :: SpillM Unique
+newUnique
+ = do us <- gets stateUS
+ case takeUniqFromSupply us of
+ (uniq, us')
+ -> do modify $ \s -> s { stateUS = us' }
+ return uniq
+
+
+-- | Add a spill/reload count to a stats record for a register.
+accSpillSL :: (Reg, Int, Int) -> (Reg, Int, Int) -> (Reg, Int, Int)
+accSpillSL (r1, s1, l1) (_, s2, l2)
+ = (r1, s1 + s2, l1 + l2)
+
+
+-- Spiller stats --------------------------------------------------------------
+-- | Spiller statistics.
+-- Tells us what registers were spilled.
+data SpillStats
+ = SpillStats
+ { spillStoreLoad :: UniqFM (Reg, Int, Int) }
+
+
+-- | Extract spiller statistics from the spiller state.
+makeSpillStats :: SpillS -> SpillStats
+makeSpillStats s
+ = SpillStats
+ { spillStoreLoad = stateSpillSL s }
+
+
+instance Outputable SpillStats where
+ ppr stats
+ = pprUFM (spillStoreLoad stats)
+ (vcat . map (\(r, s, l) -> ppr r <+> int s <+> int l))
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs b/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
new file mode 100644
index 0000000000..6d14c7194b
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
@@ -0,0 +1,616 @@
+{-# LANGUAGE CPP #-}
+
+-- | Clean out unneeded spill\/reload instructions.
+--
+-- Handling of join points
+-- ~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- B1: B2:
+-- ... ...
+-- RELOAD SLOT(0), %r1 RELOAD SLOT(0), %r1
+-- ... A ... ... B ...
+-- jump B3 jump B3
+--
+-- B3: ... C ...
+-- RELOAD SLOT(0), %r1
+-- ...
+--
+-- The Plan
+-- ~~~~~~~~
+-- As long as %r1 hasn't been written to in A, B or C then we don't need
+-- the reload in B3.
+--
+-- What we really care about here is that on the entry to B3, %r1 will
+-- always have the same value that is in SLOT(0) (ie, %r1 is _valid_)
+--
+-- This also works if the reloads in B1\/B2 were spills instead, because
+-- spilling %r1 to a slot makes that slot have the same value as %r1.
+--
+module GHC.CmmToAsm.Reg.Graph.SpillClean (
+ cleanSpills
+) where
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import UniqSet
+import UniqFM
+import Unique
+import State
+import Outputable
+import GHC.Platform
+import GHC.Cmm.Dataflow.Collections
+
+import Data.List
+import Data.Maybe
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IntSet
+
+
+-- | The identification number of a spill slot.
+-- A value is stored in a spill slot when we don't have a free
+-- register to hold it.
+type Slot = Int
+
+
+-- | Clean out unneeded spill\/reloads from this top level thing.
+cleanSpills
+ :: Instruction instr
+ => Platform
+ -> LiveCmmDecl statics instr
+ -> LiveCmmDecl statics instr
+
+cleanSpills platform cmm
+ = evalState (cleanSpin platform 0 cmm) initCleanS
+
+
+-- | Do one pass of cleaning.
+cleanSpin
+ :: Instruction instr
+ => Platform
+ -> Int -- ^ Iteration number for the cleaner.
+ -> LiveCmmDecl statics instr -- ^ Liveness annotated code to clean.
+ -> CleanM (LiveCmmDecl statics instr)
+
+cleanSpin platform spinCount code
+ = do
+ -- Initialise count of cleaned spill and reload instructions.
+ modify $ \s -> s
+ { sCleanedSpillsAcc = 0
+ , sCleanedReloadsAcc = 0
+ , sReloadedBy = emptyUFM }
+
+ code_forward <- mapBlockTopM (cleanBlockForward platform) code
+ code_backward <- cleanTopBackward code_forward
+
+ -- During the cleaning of each block we collected information about
+ -- what regs were valid across each jump. Based on this, work out
+ -- whether it will be safe to erase reloads after join points for
+ -- the next pass.
+ collateJoinPoints
+
+ -- Remember how many spill and reload instructions we cleaned in this pass.
+ spills <- gets sCleanedSpillsAcc
+ reloads <- gets sCleanedReloadsAcc
+ modify $ \s -> s
+ { sCleanedCount = (spills, reloads) : sCleanedCount s }
+
+ -- If nothing was cleaned in this pass or the last one
+ -- then we're done and it's time to bail out.
+ cleanedCount <- gets sCleanedCount
+ if take 2 cleanedCount == [(0, 0), (0, 0)]
+ then return code
+
+ -- otherwise go around again
+ else cleanSpin platform (spinCount + 1) code_backward
+
+
+-------------------------------------------------------------------------------
+-- | Clean out unneeded reload instructions,
+-- while walking forward over the code.
+cleanBlockForward
+ :: Instruction instr
+ => Platform
+ -> LiveBasicBlock instr
+ -> CleanM (LiveBasicBlock instr)
+
+cleanBlockForward platform (BasicBlock blockId instrs)
+ = do
+ -- See if we have a valid association for the entry to this block.
+ jumpValid <- gets sJumpValid
+ let assoc = case lookupUFM jumpValid blockId of
+ Just assoc -> assoc
+ Nothing -> emptyAssoc
+
+ instrs_reload <- cleanForward platform blockId assoc [] instrs
+ return $ BasicBlock blockId instrs_reload
+
+
+
+-- | Clean out unneeded reload instructions.
+--
+-- Walking forwards across the code
+-- On a reload, if we know a reg already has the same value as a slot
+-- then we don't need to do the reload.
+--
+cleanForward
+ :: Instruction instr
+ => Platform
+ -> BlockId -- ^ the block that we're currently in
+ -> Assoc Store -- ^ two store locations are associated if
+ -- they have the same value
+ -> [LiveInstr instr] -- ^ acc
+ -> [LiveInstr instr] -- ^ instrs to clean (in backwards order)
+ -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order)
+
+cleanForward _ _ _ acc []
+ = return acc
+
+-- Rewrite live range joins via spill slots to just a spill and a reg-reg move
+-- hopefully the spill will be also be cleaned in the next pass
+cleanForward platform blockId assoc acc (li1 : li2 : instrs)
+
+ | LiveInstr (SPILL reg1 slot1) _ <- li1
+ , LiveInstr (RELOAD slot2 reg2) _ <- li2
+ , slot1 == slot2
+ = do
+ modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
+ cleanForward platform blockId assoc acc
+ $ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing
+ : instrs
+
+cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
+ | Just (r1, r2) <- takeRegRegMoveInstr i1
+ = if r1 == r2
+ -- Erase any left over nop reg reg moves while we're here
+ -- this will also catch any nop moves that the previous case
+ -- happens to add.
+ then cleanForward platform blockId assoc acc instrs
+
+ -- If r1 has the same value as some slots and we copy r1 to r2,
+ -- then r2 is now associated with those slots instead
+ else do let assoc' = addAssoc (SReg r1) (SReg r2)
+ $ delAssoc (SReg r2)
+ $ assoc
+
+ cleanForward platform blockId assoc' (li : acc) instrs
+
+
+cleanForward platform blockId assoc acc (li : instrs)
+
+ -- Update association due to the spill.
+ | LiveInstr (SPILL reg slot) _ <- li
+ = let assoc' = addAssoc (SReg reg) (SSlot slot)
+ $ delAssoc (SSlot slot)
+ $ assoc
+ in cleanForward platform blockId assoc' (li : acc) instrs
+
+ -- Clean a reload instr.
+ | LiveInstr (RELOAD{}) _ <- li
+ = do (assoc', mli) <- cleanReload platform blockId assoc li
+ case mli of
+ Nothing -> cleanForward platform blockId assoc' acc
+ instrs
+
+ Just li' -> cleanForward platform blockId assoc' (li' : acc)
+ instrs
+
+ -- Remember the association over a jump.
+ | LiveInstr instr _ <- li
+ , targets <- jumpDestsOfInstr instr
+ , not $ null targets
+ = do mapM_ (accJumpValid assoc) targets
+ cleanForward platform blockId assoc (li : acc) instrs
+
+ -- Writing to a reg changes its value.
+ | LiveInstr instr _ <- li
+ , RU _ written <- regUsageOfInstr platform instr
+ = let assoc' = foldr delAssoc assoc (map SReg $ nub written)
+ in cleanForward platform blockId assoc' (li : acc) instrs
+
+
+
+-- | Try and rewrite a reload instruction to something more pleasing
+cleanReload
+ :: Instruction instr
+ => Platform
+ -> BlockId
+ -> Assoc Store
+ -> LiveInstr instr
+ -> CleanM (Assoc Store, Maybe (LiveInstr instr))
+
+cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _)
+
+ -- If the reg we're reloading already has the same value as the slot
+ -- then we can erase the instruction outright.
+ | elemAssoc (SSlot slot) (SReg reg) assoc
+ = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
+ return (assoc, Nothing)
+
+ -- If we can find another reg with the same value as this slot then
+ -- do a move instead of a reload.
+ | Just reg2 <- findRegOfSlot assoc slot
+ = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
+
+ let assoc' = addAssoc (SReg reg) (SReg reg2)
+ $ delAssoc (SReg reg)
+ $ assoc
+
+ return ( assoc'
+ , Just $ LiveInstr (mkRegRegMoveInstr platform reg2 reg) Nothing)
+
+ -- Gotta keep this instr.
+ | otherwise
+ = do -- Update the association.
+ let assoc'
+ = addAssoc (SReg reg) (SSlot slot)
+ -- doing the reload makes reg and slot the same value
+ $ delAssoc (SReg reg)
+ -- reg value changes on reload
+ $ assoc
+
+ -- Remember that this block reloads from this slot.
+ accBlockReloadsSlot blockId slot
+
+ return (assoc', Just li)
+
+cleanReload _ _ _ _
+ = panic "RegSpillClean.cleanReload: unhandled instr"
+
+
+-------------------------------------------------------------------------------
+-- | Clean out unneeded spill instructions,
+-- while walking backwards over the code.
+--
+-- If there were no reloads from a slot between a spill and the last one
+-- then the slot was never read and we don't need the spill.
+--
+-- SPILL r0 -> s1
+-- RELOAD s1 -> r2
+-- SPILL r3 -> s1 <--- don't need this spill
+-- SPILL r4 -> s1
+-- RELOAD s1 -> r5
+--
+-- Maintain a set of
+-- "slots which were spilled to but not reloaded from yet"
+--
+-- Walking backwards across the code:
+-- a) On a reload from a slot, remove it from the set.
+--
+-- a) On a spill from a slot
+-- If the slot is in set then we can erase the spill,
+-- because it won't be reloaded from until after the next spill.
+--
+-- otherwise
+-- keep the spill and add the slot to the set
+--
+-- TODO: This is mostly inter-block
+-- we should really be updating the noReloads set as we cross jumps also.
+--
+-- TODO: generate noReloads from liveSlotsOnEntry
+--
+cleanTopBackward
+ :: Instruction instr
+ => LiveCmmDecl statics instr
+ -> CleanM (LiveCmmDecl statics instr)
+
+cleanTopBackward cmm
+ = case cmm of
+ CmmData{}
+ -> return cmm
+
+ CmmProc info label live sccs
+ | LiveInfo _ _ _ liveSlotsOnEntry <- info
+ -> do sccs' <- mapM (mapSCCM (cleanBlockBackward liveSlotsOnEntry)) sccs
+ return $ CmmProc info label live sccs'
+
+
+cleanBlockBackward
+ :: Instruction instr
+ => BlockMap IntSet
+ -> LiveBasicBlock instr
+ -> CleanM (LiveBasicBlock instr)
+
+cleanBlockBackward liveSlotsOnEntry (BasicBlock blockId instrs)
+ = do instrs_spill <- cleanBackward liveSlotsOnEntry emptyUniqSet [] instrs
+ return $ BasicBlock blockId instrs_spill
+
+
+
+cleanBackward
+ :: Instruction instr
+ => BlockMap IntSet -- ^ Slots live on entry to each block
+ -> UniqSet Int -- ^ Slots that have been spilled, but not reloaded from
+ -> [LiveInstr instr] -- ^ acc
+ -> [LiveInstr instr] -- ^ Instrs to clean (in forwards order)
+ -> CleanM [LiveInstr instr] -- ^ Cleaned instrs (in backwards order)
+
+cleanBackward liveSlotsOnEntry noReloads acc lis
+ = do reloadedBy <- gets sReloadedBy
+ cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc lis
+
+
+cleanBackward'
+ :: Instruction instr
+ => BlockMap IntSet
+ -> UniqFM [BlockId]
+ -> UniqSet Int
+ -> [LiveInstr instr]
+ -> [LiveInstr instr]
+ -> State CleanS [LiveInstr instr]
+
+cleanBackward' _ _ _ acc []
+ = return acc
+
+cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
+
+ -- If nothing ever reloads from this slot then we don't need the spill.
+ | LiveInstr (SPILL _ slot) _ <- li
+ , Nothing <- lookupUFM reloadedBy (SSlot slot)
+ = do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
+ cleanBackward liveSlotsOnEntry noReloads acc instrs
+
+ | LiveInstr (SPILL _ slot) _ <- li
+ = if elementOfUniqSet slot noReloads
+
+ -- We can erase this spill because the slot won't be read until
+ -- after the next one
+ then do
+ modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
+ cleanBackward liveSlotsOnEntry noReloads acc instrs
+
+ else do
+ -- This slot is being spilled to, but we haven't seen any reloads yet.
+ let noReloads' = addOneToUniqSet noReloads slot
+ cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
+
+ -- if we reload from a slot then it's no longer unused
+ | LiveInstr (RELOAD slot _) _ <- li
+ , noReloads' <- delOneFromUniqSet noReloads slot
+ = cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
+
+ -- If a slot is live in a jump target then assume it's reloaded there.
+ --
+ -- TODO: A real dataflow analysis would do a better job here.
+ -- If the target block _ever_ used the slot then we assume
+ -- it always does, but if those reloads are cleaned the slot
+ -- liveness map doesn't get updated.
+ | LiveInstr instr _ <- li
+ , targets <- jumpDestsOfInstr instr
+ = do
+ let slotsReloadedByTargets
+ = IntSet.unions
+ $ catMaybes
+ $ map (flip mapLookup liveSlotsOnEntry)
+ $ targets
+
+ let noReloads'
+ = foldl' delOneFromUniqSet noReloads
+ $ IntSet.toList slotsReloadedByTargets
+
+ cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
+
+#if __GLASGOW_HASKELL__ <= 810
+ -- some other instruction
+ | otherwise
+ = cleanBackward liveSlotsOnEntry noReloads (li : acc) instrs
+#endif
+
+
+-- | Combine the associations from all the inward control flow edges.
+--
+collateJoinPoints :: CleanM ()
+collateJoinPoints
+ = modify $ \s -> s
+ { sJumpValid = mapUFM intersects (sJumpValidAcc s)
+ , sJumpValidAcc = emptyUFM }
+
+intersects :: [Assoc Store] -> Assoc Store
+intersects [] = emptyAssoc
+intersects assocs = foldl1' intersectAssoc assocs
+
+
+-- | See if we have a reg with the same value as this slot in the association table.
+findRegOfSlot :: Assoc Store -> Int -> Maybe Reg
+findRegOfSlot assoc slot
+ | close <- closeAssoc (SSlot slot) assoc
+ , Just (SReg reg) <- find isStoreReg $ nonDetEltsUniqSet close
+ -- See Note [Unique Determinism and code generation]
+ = Just reg
+
+ | otherwise
+ = Nothing
+
+
+-------------------------------------------------------------------------------
+-- | Cleaner monad.
+type CleanM
+ = State CleanS
+
+-- | Cleaner state.
+data CleanS
+ = CleanS
+ { -- | Regs which are valid at the start of each block.
+ sJumpValid :: UniqFM (Assoc Store)
+
+ -- | Collecting up what regs were valid across each jump.
+ -- in the next pass we can collate these and write the results
+ -- to sJumpValid.
+ , sJumpValidAcc :: UniqFM [Assoc Store]
+
+ -- | Map of (slot -> blocks which reload from this slot)
+ -- used to decide if whether slot spilled to will ever be
+ -- reloaded from on this path.
+ , sReloadedBy :: UniqFM [BlockId]
+
+ -- | Spills and reloads cleaned each pass (latest at front)
+ , sCleanedCount :: [(Int, Int)]
+
+ -- | Spills and reloads that have been cleaned in this pass so far.
+ , sCleanedSpillsAcc :: Int
+ , sCleanedReloadsAcc :: Int }
+
+
+-- | Construct the initial cleaner state.
+initCleanS :: CleanS
+initCleanS
+ = CleanS
+ { sJumpValid = emptyUFM
+ , sJumpValidAcc = emptyUFM
+
+ , sReloadedBy = emptyUFM
+
+ , sCleanedCount = []
+
+ , sCleanedSpillsAcc = 0
+ , sCleanedReloadsAcc = 0 }
+
+
+-- | Remember the associations before a jump.
+accJumpValid :: Assoc Store -> BlockId -> CleanM ()
+accJumpValid assocs target
+ = modify $ \s -> s {
+ sJumpValidAcc = addToUFM_C (++)
+ (sJumpValidAcc s)
+ target
+ [assocs] }
+
+
+accBlockReloadsSlot :: BlockId -> Slot -> CleanM ()
+accBlockReloadsSlot blockId slot
+ = modify $ \s -> s {
+ sReloadedBy = addToUFM_C (++)
+ (sReloadedBy s)
+ (SSlot slot)
+ [blockId] }
+
+
+-------------------------------------------------------------------------------
+-- A store location can be a stack slot or a register
+data Store
+ = SSlot Int
+ | SReg Reg
+
+
+-- | Check if this is a reg store.
+isStoreReg :: Store -> Bool
+isStoreReg ss
+ = case ss of
+ SSlot _ -> False
+ SReg _ -> True
+
+
+-- Spill cleaning is only done once all virtuals have been allocated to realRegs
+instance Uniquable Store where
+ getUnique (SReg r)
+ | RegReal (RealRegSingle i) <- r
+ = mkRegSingleUnique i
+
+ | RegReal (RealRegPair r1 r2) <- r
+ = mkRegPairUnique (r1 * 65535 + r2)
+
+ | otherwise
+ = error $ "RegSpillClean.getUnique: found virtual reg during spill clean,"
+ ++ "only real regs expected."
+
+ getUnique (SSlot i) = mkRegSubUnique i -- [SLPJ] I hope "SubUnique" is ok
+
+
+instance Outputable Store where
+ ppr (SSlot i) = text "slot" <> int i
+ ppr (SReg r) = ppr r
+
+
+-------------------------------------------------------------------------------
+-- Association graphs.
+-- In the spill cleaner, two store locations are associated if they are known
+-- to hold the same value.
+--
+type Assoc a = UniqFM (UniqSet a)
+
+-- | An empty association
+emptyAssoc :: Assoc a
+emptyAssoc = emptyUFM
+
+
+-- | Add an association between these two things.
+addAssoc :: Uniquable a
+ => a -> a -> Assoc a -> Assoc a
+
+addAssoc a b m
+ = let m1 = addToUFM_C unionUniqSets m a (unitUniqSet b)
+ m2 = addToUFM_C unionUniqSets m1 b (unitUniqSet a)
+ in m2
+
+
+-- | Delete all associations to a node.
+delAssoc :: (Uniquable a)
+ => a -> Assoc a -> Assoc a
+
+delAssoc a m
+ | Just aSet <- lookupUFM m a
+ , m1 <- delFromUFM m a
+ = nonDetFoldUniqSet (\x m -> delAssoc1 x a m) m1 aSet
+ -- It's OK to use nonDetFoldUFM here because deletion is commutative
+
+ | otherwise = m
+
+
+-- | Delete a single association edge (a -> b).
+delAssoc1 :: Uniquable a
+ => a -> a -> Assoc a -> Assoc a
+
+delAssoc1 a b m
+ | Just aSet <- lookupUFM m a
+ = addToUFM m a (delOneFromUniqSet aSet b)
+
+ | otherwise = m
+
+
+-- | Check if these two things are associated.
+elemAssoc :: (Uniquable a)
+ => a -> a -> Assoc a -> Bool
+
+elemAssoc a b m
+ = elementOfUniqSet b (closeAssoc a m)
+
+
+-- | Find the refl. trans. closure of the association from this point.
+closeAssoc :: (Uniquable a)
+ => a -> Assoc a -> UniqSet a
+
+closeAssoc a assoc
+ = closeAssoc' assoc emptyUniqSet (unitUniqSet a)
+ where
+ closeAssoc' assoc visited toVisit
+ = case nonDetEltsUniqSet toVisit of
+ -- See Note [Unique Determinism and code generation]
+
+ -- nothing else to visit, we're done
+ [] -> visited
+
+ (x:_)
+ -- we've already seen this node
+ | elementOfUniqSet x visited
+ -> closeAssoc' assoc visited (delOneFromUniqSet toVisit x)
+
+ -- haven't seen this node before,
+ -- remember to visit all its neighbors
+ | otherwise
+ -> let neighbors
+ = case lookupUFM assoc x of
+ Nothing -> emptyUniqSet
+ Just set -> set
+
+ in closeAssoc' assoc
+ (addOneToUniqSet visited x)
+ (unionUniqSets toVisit neighbors)
+
+-- | Intersect two associations.
+intersectAssoc :: Assoc a -> Assoc a -> Assoc a
+intersectAssoc a b
+ = intersectUFM_C (intersectUniqSets) a b
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs b/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
new file mode 100644
index 0000000000..e3e456e98d
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
@@ -0,0 +1,317 @@
+{-# LANGUAGE ScopedTypeVariables, GADTs, BangPatterns #-}
+module GHC.CmmToAsm.Reg.Graph.SpillCost (
+ SpillCostRecord,
+ plusSpillCostRecord,
+ pprSpillCostRecord,
+
+ SpillCostInfo,
+ zeroSpillCostInfo,
+ plusSpillCostInfo,
+
+ slurpSpillCostInfo,
+ chooseSpill,
+
+ lifeMapFromSpillCostInfo
+) where
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import GraphBase
+
+import GHC.Cmm.Dataflow.Collections (mapLookup)
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm
+import UniqFM
+import UniqSet
+import Digraph (flattenSCCs)
+import Outputable
+import GHC.Platform
+import State
+import GHC.CmmToAsm.CFG
+
+import Data.List (nub, minimumBy)
+import Data.Maybe
+import Control.Monad (join)
+
+
+-- | Records the expected cost to spill some register.
+type SpillCostRecord
+ = ( VirtualReg -- register name
+ , Int -- number of writes to this reg
+ , Int -- number of reads from this reg
+ , Int) -- number of instrs this reg was live on entry to
+
+
+-- | Map of `SpillCostRecord`
+type SpillCostInfo
+ = UniqFM SpillCostRecord
+
+type SpillCostState = State (UniqFM SpillCostRecord) ()
+
+-- | An empty map of spill costs.
+zeroSpillCostInfo :: SpillCostInfo
+zeroSpillCostInfo = emptyUFM
+
+
+-- | Add two spill cost infos.
+plusSpillCostInfo :: SpillCostInfo -> SpillCostInfo -> SpillCostInfo
+plusSpillCostInfo sc1 sc2
+ = plusUFM_C plusSpillCostRecord sc1 sc2
+
+
+-- | Add two spill cost records.
+plusSpillCostRecord :: SpillCostRecord -> SpillCostRecord -> SpillCostRecord
+plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2)
+ | r1 == r2 = (r1, a1 + a2, b1 + b2, c1 + c2)
+ | otherwise = error "RegSpillCost.plusRegInt: regs don't match"
+
+
+-- | Slurp out information used for determining spill costs.
+--
+-- For each vreg, the number of times it was written to, read from,
+-- and the number of instructions it was live on entry to (lifetime)
+--
+slurpSpillCostInfo :: forall instr statics. (Outputable instr, Instruction instr)
+ => Platform
+ -> Maybe CFG
+ -> LiveCmmDecl statics instr
+ -> SpillCostInfo
+
+slurpSpillCostInfo platform cfg cmm
+ = execState (countCmm cmm) zeroSpillCostInfo
+ where
+ countCmm CmmData{} = return ()
+ countCmm (CmmProc info _ _ sccs)
+ = mapM_ (countBlock info freqMap)
+ $ flattenSCCs sccs
+ where
+ LiveInfo _ entries _ _ = info
+ freqMap = (fst . mkGlobalWeights (head entries)) <$> cfg
+
+ -- Lookup the regs that are live on entry to this block in
+ -- the info table from the CmmProc.
+ countBlock info freqMap (BasicBlock blockId instrs)
+ | LiveInfo _ _ blockLive _ <- info
+ , Just rsLiveEntry <- mapLookup blockId blockLive
+ , rsLiveEntry_virt <- takeVirtuals rsLiveEntry
+ = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs
+
+ | otherwise
+ = error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block"
+
+
+ countLIs :: Int -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState
+ countLIs _ _ []
+ = return ()
+
+ -- Skip over comment and delta pseudo instrs.
+ countLIs scale rsLive (LiveInstr instr Nothing : lis)
+ | isMetaInstr instr
+ = countLIs scale rsLive lis
+
+ | otherwise
+ = pprPanic "RegSpillCost.slurpSpillCostInfo"
+ $ text "no liveness information on instruction " <> ppr instr
+
+ countLIs scale rsLiveEntry (LiveInstr instr (Just live) : lis)
+ = do
+ -- Increment the lifetime counts for regs live on entry to this instr.
+ mapM_ incLifetime $ nonDetEltsUniqSet rsLiveEntry
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+
+ -- Increment counts for what regs were read/written from.
+ let (RU read written) = regUsageOfInstr platform instr
+ mapM_ (incUses scale) $ catMaybes $ map takeVirtualReg $ nub read
+ mapM_ (incDefs scale) $ catMaybes $ map takeVirtualReg $ nub written
+
+ -- Compute liveness for entry to next instruction.
+ let liveDieRead_virt = takeVirtuals (liveDieRead live)
+ let liveDieWrite_virt = takeVirtuals (liveDieWrite live)
+ let liveBorn_virt = takeVirtuals (liveBorn live)
+
+ let rsLiveAcross
+ = rsLiveEntry `minusUniqSet` liveDieRead_virt
+
+ let rsLiveNext
+ = (rsLiveAcross `unionUniqSets` liveBorn_virt)
+ `minusUniqSet` liveDieWrite_virt
+
+ countLIs scale rsLiveNext lis
+
+ incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0)
+ incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0)
+ incLifetime reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, 1)
+
+ blockFreq :: Maybe (LabelMap Double) -> Label -> Double
+ blockFreq freqs bid
+ | Just freq <- join (mapLookup bid <$> freqs)
+ = max 1.0 (10000 * freq)
+ | otherwise
+ = 1.0 -- Only if no cfg given
+
+-- | Take all the virtual registers from this set.
+takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg
+takeVirtuals set = mkUniqSet
+ [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
+ -- See Note [Unique Determinism and code generation]
+
+
+-- | Choose a node to spill from this graph
+chooseSpill
+ :: SpillCostInfo
+ -> Graph VirtualReg RegClass RealReg
+ -> VirtualReg
+
+chooseSpill info graph
+ = let cost = spillCost_length info graph
+ node = minimumBy (\n1 n2 -> compare (cost $ nodeId n1) (cost $ nodeId n2))
+ $ nonDetEltsUFM $ graphMap graph
+ -- See Note [Unique Determinism and code generation]
+
+ in nodeId node
+
+
+-------------------------------------------------------------------------------
+-- | Chaitins spill cost function is:
+--
+-- cost = sum loadCost * freq (u) + sum storeCost * freq (d)
+-- u <- uses (v) d <- defs (v)
+--
+-- There are no loops in our code at the moment, so we can set the freq's to 1.
+--
+-- If we don't have live range splitting then Chaitins function performs badly
+-- if we have lots of nested live ranges and very few registers.
+--
+-- v1 v2 v3
+-- def v1 .
+-- use v1 .
+-- def v2 . .
+-- def v3 . . .
+-- use v1 . . .
+-- use v3 . . .
+-- use v2 . .
+-- use v1 .
+--
+-- defs uses degree cost
+-- v1: 1 3 3 1.5
+-- v2: 1 2 3 1.0
+-- v3: 1 1 3 0.666
+--
+-- v3 has the lowest cost, but if we only have 2 hardregs and we insert
+-- spill code for v3 then this isn't going to improve the colorability of
+-- the graph.
+--
+-- When compiling SHA1, which as very long basic blocks and some vregs
+-- with very long live ranges the allocator seems to try and spill from
+-- the inside out and eventually run out of stack slots.
+--
+-- Without live range splitting, its's better to spill from the outside
+-- in so set the cost of very long live ranges to zero
+--
+
+-- spillCost_chaitin
+-- :: SpillCostInfo
+-- -> Graph VirtualReg RegClass RealReg
+-- -> VirtualReg
+-- -> Float
+
+-- spillCost_chaitin info graph reg
+-- -- Spilling a live range that only lives for 1 instruction
+-- -- isn't going to help us at all - and we definitely want to avoid
+-- -- trying to re-spill previously inserted spill code.
+-- | lifetime <= 1 = 1/0
+
+-- -- It's unlikely that we'll find a reg for a live range this long
+-- -- better to spill it straight up and not risk trying to keep it around
+-- -- and have to go through the build/color cycle again.
+
+-- -- To facility this we scale down the spill cost of long ranges.
+-- -- This makes sure long ranges are still spilled first.
+-- -- But this way spill cost remains relevant for long live
+-- -- ranges.
+-- | lifetime >= 128
+-- = (spillCost / conflicts) / 10.0
+
+
+-- -- Otherwise revert to chaitin's regular cost function.
+-- | otherwise = (spillCost / conflicts)
+-- where
+-- !spillCost = fromIntegral (uses + defs) :: Float
+-- conflicts = fromIntegral (nodeDegree classOfVirtualReg graph reg)
+-- (_, defs, uses, lifetime)
+-- = fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg
+
+
+-- Just spill the longest live range.
+spillCost_length
+ :: SpillCostInfo
+ -> Graph VirtualReg RegClass RealReg
+ -> VirtualReg
+ -> Float
+
+spillCost_length info _ reg
+ | lifetime <= 1 = 1/0
+ | otherwise = 1 / fromIntegral lifetime
+ where (_, _, _, lifetime)
+ = fromMaybe (reg, 0, 0, 0)
+ $ lookupUFM info reg
+
+
+-- | Extract a map of register lifetimes from a `SpillCostInfo`.
+lifeMapFromSpillCostInfo :: SpillCostInfo -> UniqFM (VirtualReg, Int)
+lifeMapFromSpillCostInfo info
+ = listToUFM
+ $ map (\(r, _, _, life) -> (r, (r, life)))
+ $ nonDetEltsUFM info
+ -- See Note [Unique Determinism and code generation]
+
+
+-- | Determine the degree (number of neighbors) of this node which
+-- have the same class.
+nodeDegree
+ :: (VirtualReg -> RegClass)
+ -> Graph VirtualReg RegClass RealReg
+ -> VirtualReg
+ -> Int
+
+nodeDegree classOfVirtualReg graph reg
+ | Just node <- lookupUFM (graphMap graph) reg
+
+ , virtConflicts
+ <- length
+ $ filter (\r -> classOfVirtualReg r == classOfVirtualReg reg)
+ $ nonDetEltsUniqSet
+ -- See Note [Unique Determinism and code generation]
+ $ nodeConflicts node
+
+ = virtConflicts + sizeUniqSet (nodeExclusions node)
+
+ | otherwise
+ = 0
+
+
+-- | Show a spill cost record, including the degree from the graph
+-- and final calculated spill cost.
+pprSpillCostRecord
+ :: (VirtualReg -> RegClass)
+ -> (Reg -> SDoc)
+ -> Graph VirtualReg RegClass RealReg
+ -> SpillCostRecord
+ -> SDoc
+
+pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life)
+ = hsep
+ [ pprReg (RegVirtual reg)
+ , ppr uses
+ , ppr defs
+ , ppr life
+ , ppr $ nodeDegree regClass graph reg
+ , text $ show $ (fromIntegral (uses + defs)
+ / fromIntegral (nodeDegree regClass graph reg) :: Float) ]
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs
new file mode 100644
index 0000000000..05d2e814af
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs
@@ -0,0 +1,346 @@
+{-# LANGUAGE BangPatterns, CPP #-}
+
+{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+
+-- | Carries interesting info for debugging / profiling of the
+-- graph coloring register allocator.
+module GHC.CmmToAsm.Reg.Graph.Stats (
+ RegAllocStats (..),
+
+ pprStats,
+ pprStatsSpills,
+ pprStatsLifetimes,
+ pprStatsConflict,
+ pprStatsLifeConflict,
+
+ countSRMs, addSRM
+) where
+
+import GhcPrelude
+
+import qualified GraphColor as Color
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Reg.Graph.Spill
+import GHC.CmmToAsm.Reg.Graph.SpillCost
+import GHC.CmmToAsm.Reg.Graph.TrivColorable
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+import GHC.CmmToAsm.Reg.Target
+
+import Outputable
+import UniqFM
+import UniqSet
+import State
+
+-- | Holds interesting statistics from the register allocator.
+data RegAllocStats statics instr
+
+ -- Information about the initial conflict graph.
+ = RegAllocStatsStart
+ { -- | Initial code, with liveness.
+ raLiveCmm :: [LiveCmmDecl statics instr]
+
+ -- | The initial, uncolored graph.
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg
+
+ -- | Information to help choose which regs to spill.
+ , raSpillCosts :: SpillCostInfo }
+
+
+ -- Information about an intermediate graph.
+ -- This is one that we couldn't color, so had to insert spill code
+ -- instruction stream.
+ | RegAllocStatsSpill
+ { -- | Code we tried to allocate registers for.
+ raCode :: [LiveCmmDecl statics instr]
+
+ -- | Partially colored graph.
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg
+
+ -- | The regs that were coalesced.
+ , raCoalesced :: UniqFM VirtualReg
+
+ -- | Spiller stats.
+ , raSpillStats :: SpillStats
+
+ -- | Number of instructions each reg lives for.
+ , raSpillCosts :: SpillCostInfo
+
+ -- | Code with spill instructions added.
+ , raSpilled :: [LiveCmmDecl statics instr] }
+
+
+ -- a successful coloring
+ | RegAllocStatsColored
+ { -- | Code we tried to allocate registers for.
+ raCode :: [LiveCmmDecl statics instr]
+
+ -- | Uncolored graph.
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg
+
+ -- | Coalesced and colored graph.
+ , raGraphColored :: Color.Graph VirtualReg RegClass RealReg
+
+ -- | Regs that were coalesced.
+ , raCoalesced :: UniqFM VirtualReg
+
+ -- | Code with coalescings applied.
+ , raCodeCoalesced :: [LiveCmmDecl statics instr]
+
+ -- | Code with vregs replaced by hregs.
+ , raPatched :: [LiveCmmDecl statics instr]
+
+ -- | Code with unneeded spill\/reloads cleaned out.
+ , raSpillClean :: [LiveCmmDecl statics instr]
+
+ -- | Final code.
+ , raFinal :: [NatCmmDecl statics instr]
+
+ -- | Spill\/reload\/reg-reg moves present in this code.
+ , raSRMs :: (Int, Int, Int) }
+
+
+instance (Outputable statics, Outputable instr)
+ => Outputable (RegAllocStats statics instr) where
+
+ ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform ->
+ text "# Start"
+ $$ text "# Native code with liveness information."
+ $$ ppr (raLiveCmm s)
+ $$ text ""
+ $$ text "# Initial register conflict graph."
+ $$ Color.dotGraph
+ (targetRegDotColor platform)
+ (trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ (raGraph s)
+
+
+ ppr (s@RegAllocStatsSpill{}) =
+ text "# Spill"
+
+ $$ text "# Code with liveness information."
+ $$ ppr (raCode s)
+ $$ text ""
+
+ $$ (if (not $ isNullUFM $ raCoalesced s)
+ then text "# Registers coalesced."
+ $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr)
+ $$ text ""
+ else empty)
+
+ $$ text "# Spills inserted."
+ $$ ppr (raSpillStats s)
+ $$ text ""
+
+ $$ text "# Code with spills inserted."
+ $$ ppr (raSpilled s)
+
+
+ ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
+ = sdocWithPlatform $ \platform ->
+ text "# Colored"
+
+ $$ text "# Code with liveness information."
+ $$ ppr (raCode s)
+ $$ text ""
+
+ $$ text "# Register conflict graph (colored)."
+ $$ Color.dotGraph
+ (targetRegDotColor platform)
+ (trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ (raGraphColored s)
+ $$ text ""
+
+ $$ (if (not $ isNullUFM $ raCoalesced s)
+ then text "# Registers coalesced."
+ $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr)
+ $$ text ""
+ else empty)
+
+ $$ text "# Native code after coalescings applied."
+ $$ ppr (raCodeCoalesced s)
+ $$ text ""
+
+ $$ text "# Native code after register allocation."
+ $$ ppr (raPatched s)
+ $$ text ""
+
+ $$ text "# Clean out unneeded spill/reloads."
+ $$ ppr (raSpillClean s)
+ $$ text ""
+
+ $$ text "# Final code, after rewriting spill/rewrite pseudo instrs."
+ $$ ppr (raFinal s)
+ $$ text ""
+ $$ text "# Score:"
+ $$ (text "# spills inserted: " <> int spills)
+ $$ (text "# reloads inserted: " <> int reloads)
+ $$ (text "# reg-reg moves remaining: " <> int moves)
+ $$ text ""
+
+
+-- | Do all the different analysis on this list of RegAllocStats
+pprStats
+ :: [RegAllocStats statics instr]
+ -> Color.Graph VirtualReg RegClass RealReg
+ -> SDoc
+
+pprStats stats graph
+ = let outSpills = pprStatsSpills stats
+ outLife = pprStatsLifetimes stats
+ outConflict = pprStatsConflict stats
+ outScatter = pprStatsLifeConflict stats graph
+
+ in vcat [outSpills, outLife, outConflict, outScatter]
+
+
+-- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
+pprStatsSpills
+ :: [RegAllocStats statics instr] -> SDoc
+
+pprStatsSpills stats
+ = let
+ finals = [ s | s@RegAllocStatsColored{} <- stats]
+
+ -- sum up how many stores\/loads\/reg-reg-moves were left in the code
+ total = foldl' addSRM (0, 0, 0)
+ $ map raSRMs finals
+
+ in ( text "-- spills-added-total"
+ $$ text "-- (stores, loads, reg_reg_moves_remaining)"
+ $$ ppr total
+ $$ text "")
+
+
+-- | Dump a table of how long vregs tend to live for in the initial code.
+pprStatsLifetimes
+ :: [RegAllocStats statics instr] -> SDoc
+
+pprStatsLifetimes stats
+ = let info = foldl' plusSpillCostInfo zeroSpillCostInfo
+ [ raSpillCosts s
+ | s@RegAllocStatsStart{} <- stats ]
+
+ lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info
+
+ in ( text "-- vreg-population-lifetimes"
+ $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)"
+ $$ pprUFM lifeBins (vcat . map ppr)
+ $$ text "\n")
+
+
+binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int)
+binLifetimeCount fm
+ = let lifes = map (\l -> (l, (l, 1)))
+ $ map snd
+ $ nonDetEltsUFM fm
+ -- See Note [Unique Determinism and code generation]
+
+ in addListToUFM_C
+ (\(l1, c1) (_, c2) -> (l1, c1 + c2))
+ emptyUFM
+ lifes
+
+
+-- | Dump a table of how many conflicts vregs tend to have in the initial code.
+pprStatsConflict
+ :: [RegAllocStats statics instr] -> SDoc
+
+pprStatsConflict stats
+ = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
+ emptyUFM
+ $ map Color.slurpNodeConflictCount
+ [ raGraph s | s@RegAllocStatsStart{} <- stats ]
+
+ in ( text "-- vreg-conflicts"
+ $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)"
+ $$ pprUFM confMap (vcat . map ppr)
+ $$ text "\n")
+
+
+-- | For every vreg, dump how many conflicts it has, and its lifetime.
+-- Good for making a scatter plot.
+pprStatsLifeConflict
+ :: [RegAllocStats statics instr]
+ -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph
+ -> SDoc
+
+pprStatsLifeConflict stats graph
+ = let lifeMap = lifeMapFromSpillCostInfo
+ $ foldl' plusSpillCostInfo zeroSpillCostInfo
+ $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ]
+
+ scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
+ Just (_, l) -> l
+ Nothing -> 0
+ Just node = Color.lookupNode graph r
+ in parens $ hcat $ punctuate (text ", ")
+ [ doubleQuotes $ ppr $ Color.nodeId node
+ , ppr $ sizeUniqSet (Color.nodeConflicts node)
+ , ppr $ lifetime ])
+ $ map Color.nodeId
+ $ nonDetEltsUFM
+ -- See Note [Unique Determinism and code generation]
+ $ Color.graphMap graph
+
+ in ( text "-- vreg-conflict-lifetime"
+ $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)"
+ $$ (vcat scatter)
+ $$ text "\n")
+
+
+-- | Count spill/reload/reg-reg moves.
+-- Lets us see how well the register allocator has done.
+countSRMs
+ :: Instruction instr
+ => LiveCmmDecl statics instr -> (Int, Int, Int)
+
+countSRMs cmm
+ = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
+
+
+countSRM_block
+ :: Instruction instr
+ => GenBasicBlock (LiveInstr instr)
+ -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr))
+
+countSRM_block (BasicBlock i instrs)
+ = do instrs' <- mapM countSRM_instr instrs
+ return $ BasicBlock i instrs'
+
+
+countSRM_instr
+ :: Instruction instr
+ => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr)
+
+countSRM_instr li
+ | LiveInstr SPILL{} _ <- li
+ = do modify $ \(s, r, m) -> (s + 1, r, m)
+ return li
+
+ | LiveInstr RELOAD{} _ <- li
+ = do modify $ \(s, r, m) -> (s, r + 1, m)
+ return li
+
+ | LiveInstr instr _ <- li
+ , Just _ <- takeRegRegMoveInstr instr
+ = do modify $ \(s, r, m) -> (s, r, m + 1)
+ return li
+
+ | otherwise
+ = return li
+
+
+-- sigh..
+addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
+addSRM (s1, r1, m1) (s2, r2, m2)
+ = let !s = s1 + s2
+ !r = r1 + r2
+ !m = m1 + m2
+ in (s, r, m)
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
new file mode 100644
index 0000000000..ec7c5ad13e
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
@@ -0,0 +1,274 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.CmmToAsm.Reg.Graph.TrivColorable (
+ trivColorable,
+)
+
+where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import GraphBase
+
+import UniqSet
+import GHC.Platform
+import Panic
+
+-- trivColorable ---------------------------------------------------------------
+
+-- trivColorable function for the graph coloring allocator
+--
+-- This gets hammered by scanGraph during register allocation,
+-- so needs to be fairly efficient.
+--
+-- NOTE: This only works for architectures with just RcInteger and RcDouble
+-- (which are disjoint) ie. x86, x86_64 and ppc
+--
+-- The number of allocatable regs is hard coded in here so we can do
+-- a fast comparison in trivColorable.
+--
+-- It's ok if these numbers are _less_ than the actual number of free
+-- regs, but they can't be more or the register conflict
+-- graph won't color.
+--
+-- If the graph doesn't color then the allocator will panic, but it won't
+-- generate bad object code or anything nasty like that.
+--
+-- There is an allocatableRegsInClass :: RegClass -> Int, but doing
+-- the unboxing is too slow for us here.
+-- TODO: Is that still true? Could we use allocatableRegsInClass
+-- without losing performance now?
+--
+-- Look at includes/stg/MachRegs.h to get the numbers.
+--
+
+
+-- Disjoint registers ----------------------------------------------------------
+--
+-- The definition has been unfolded into individual cases for speed.
+-- Each architecture has a different register setup, so we use a
+-- different regSqueeze function for each.
+--
+accSqueeze
+ :: Int
+ -> Int
+ -> (reg -> Int)
+ -> UniqSet reg
+ -> Int
+
+accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us)
+ -- See Note [Unique Determinism and code generation]
+ where acc count [] = count
+ acc count _ | count >= maxCount = count
+ acc count (r:rs) = acc (count + squeeze r) rs
+
+{- Note [accSqueeze]
+~~~~~~~~~~~~~~~~~~~~
+BL 2007/09
+Doing a nice fold over the UniqSet makes trivColorable use
+32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
+Therefore the UniqFM is made non-abstract and we use custom fold.
+
+MS 2010/04
+When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
+representation any more. But it is imperative that the accSqueeze stops
+the folding if the count gets greater or equal to maxCount. We thus convert
+UniqFM to a (lazy) list, do the fold and stops if necessary, which was
+the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
+(original = previous implementation, folding = fold of the whole UFM,
+ lazyFold = the current implementation,
+ hackFold = using internal representation of Data.IntMap)
+
+ original folding hackFold lazyFold
+ -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
+ 100.00% 96.44% 97.72% 97.12%
+ -fregs-graph 67.938s 74.875s 62.673s 64.679s
+ 100.00% 110.21% 92.25% 95.20%
+ -fregs-iterative 89.761s 143.913s 81.075s 86.912s
+ 100.00% 160.33% 90.32% 96.83%
+ -fnew-codegen 38.225s 37.142s 37.551s 37.119s
+ 100.00% 97.17% 98.24% 97.11%
+ -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
+ 100.00% 99.70% 95.19% 94.65%
+ -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
+ 100.00% 166.23% 94.18% 100.95%
+-}
+
+trivColorable
+ :: Platform
+ -> (RegClass -> VirtualReg -> Int)
+ -> (RegClass -> RealReg -> Int)
+ -> Triv VirtualReg RegClass RealReg
+
+trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
+ | let cALLOCATABLE_REGS_INTEGER
+ = (case platformArch platform of
+ ArchX86 -> 3
+ ArchX86_64 -> 5
+ ArchPPC -> 16
+ ArchSPARC -> 14
+ ArchSPARC64 -> panic "trivColorable ArchSPARC64"
+ ArchPPC_64 _ -> 15
+ ArchARM _ _ _ -> panic "trivColorable ArchARM"
+ ArchARM64 -> panic "trivColorable ArchARM64"
+ ArchAlpha -> panic "trivColorable ArchAlpha"
+ ArchMipseb -> panic "trivColorable ArchMipseb"
+ ArchMipsel -> panic "trivColorable ArchMipsel"
+ ArchS390X -> panic "trivColorable ArchS390X"
+ ArchJavaScript-> panic "trivColorable ArchJavaScript"
+ ArchUnknown -> panic "trivColorable ArchUnknown")
+ , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER
+ (virtualRegSqueeze RcInteger)
+ conflicts
+
+ , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
+ (realRegSqueeze RcInteger)
+ exclusions
+
+ = count3 < cALLOCATABLE_REGS_INTEGER
+
+trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
+ | let cALLOCATABLE_REGS_FLOAT
+ = (case platformArch platform of
+ -- On x86_64 and x86, Float and RcDouble
+ -- use the same registers,
+ -- so we only use RcDouble to represent the
+ -- register allocation problem on those types.
+ ArchX86 -> 0
+ ArchX86_64 -> 0
+ ArchPPC -> 0
+ ArchSPARC -> 22
+ ArchSPARC64 -> panic "trivColorable ArchSPARC64"
+ ArchPPC_64 _ -> 0
+ ArchARM _ _ _ -> panic "trivColorable ArchARM"
+ ArchARM64 -> panic "trivColorable ArchARM64"
+ ArchAlpha -> panic "trivColorable ArchAlpha"
+ ArchMipseb -> panic "trivColorable ArchMipseb"
+ ArchMipsel -> panic "trivColorable ArchMipsel"
+ ArchS390X -> panic "trivColorable ArchS390X"
+ ArchJavaScript-> panic "trivColorable ArchJavaScript"
+ ArchUnknown -> panic "trivColorable ArchUnknown")
+ , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
+ (virtualRegSqueeze RcFloat)
+ conflicts
+
+ , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
+ (realRegSqueeze RcFloat)
+ exclusions
+
+ = count3 < cALLOCATABLE_REGS_FLOAT
+
+trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
+ | let cALLOCATABLE_REGS_DOUBLE
+ = (case platformArch platform of
+ ArchX86 -> 8
+ -- in x86 32bit mode sse2 there are only
+ -- 8 XMM registers xmm0 ... xmm7
+ ArchX86_64 -> 10
+ -- in x86_64 there are 16 XMM registers
+ -- xmm0 .. xmm15, here 10 is a
+ -- "dont need to solve conflicts" count that
+ -- was chosen at some point in the past.
+ ArchPPC -> 26
+ ArchSPARC -> 11
+ ArchSPARC64 -> panic "trivColorable ArchSPARC64"
+ ArchPPC_64 _ -> 20
+ ArchARM _ _ _ -> panic "trivColorable ArchARM"
+ ArchARM64 -> panic "trivColorable ArchARM64"
+ ArchAlpha -> panic "trivColorable ArchAlpha"
+ ArchMipseb -> panic "trivColorable ArchMipseb"
+ ArchMipsel -> panic "trivColorable ArchMipsel"
+ ArchS390X -> panic "trivColorable ArchS390X"
+ ArchJavaScript-> panic "trivColorable ArchJavaScript"
+ ArchUnknown -> panic "trivColorable ArchUnknown")
+ , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
+ (virtualRegSqueeze RcDouble)
+ conflicts
+
+ , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
+ (realRegSqueeze RcDouble)
+ exclusions
+
+ = count3 < cALLOCATABLE_REGS_DOUBLE
+
+
+
+
+-- Specification Code ----------------------------------------------------------
+--
+-- The trivColorable function for each particular architecture should
+-- implement the following function, but faster.
+--
+
+{-
+trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
+trivColorable classN conflicts exclusions
+ = let
+
+ acc :: Reg -> (Int, Int) -> (Int, Int)
+ acc r (cd, cf)
+ = case regClass r of
+ RcInteger -> (cd+1, cf)
+ RcFloat -> (cd, cf+1)
+ _ -> panic "Regs.trivColorable: reg class not handled"
+
+ tmp = nonDetFoldUFM acc (0, 0) conflicts
+ (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions
+
+ squeese = worst countInt classN RcInteger
+ + worst countFloat classN RcFloat
+
+ in squeese < allocatableRegsInClass classN
+
+-- | Worst case displacement
+-- node N of classN has n neighbors of class C.
+--
+-- We currently only have RcInteger and RcDouble, which don't conflict at all.
+-- This is a bit boring compared to what's in RegArchX86.
+--
+worst :: Int -> RegClass -> RegClass -> Int
+worst n classN classC
+ = case classN of
+ RcInteger
+ -> case classC of
+ RcInteger -> min n (allocatableRegsInClass RcInteger)
+ RcFloat -> 0
+
+ RcDouble
+ -> case classC of
+ RcFloat -> min n (allocatableRegsInClass RcFloat)
+ RcInteger -> 0
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+ = let isFree i = freeReg i
+ in filter isFree allMachRegNos
+
+
+-- | The number of regs in each class.
+-- We go via top level CAFs to ensure that we're not recomputing
+-- the length of these lists each time the fn is called.
+allocatableRegsInClass :: RegClass -> Int
+allocatableRegsInClass cls
+ = case cls of
+ RcInteger -> allocatableRegsInteger
+ RcFloat -> allocatableRegsDouble
+
+allocatableRegsInteger :: Int
+allocatableRegsInteger
+ = length $ filter (\r -> regClass r == RcInteger)
+ $ map RealReg allocatableRegs
+
+allocatableRegsFloat :: Int
+allocatableRegsFloat
+ = length $ filter (\r -> regClass r == RcFloat
+ $ map RealReg allocatableRegs
+-}
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs b/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs
new file mode 100644
index 0000000000..0d4c56ba21
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs
@@ -0,0 +1,161 @@
+
+-- | A description of the register set of the X86.
+--
+-- This isn't used directly in GHC proper.
+--
+-- See RegArchBase.hs for the reference.
+-- See MachRegs.hs for the actual trivColorable function used in GHC.
+--
+module GHC.CmmToAsm.Reg.Graph.X86 (
+ classOfReg,
+ regsOfClass,
+ regName,
+ regAlias,
+ worst,
+ squeese,
+) where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Graph.Base (Reg(..), RegSub(..), RegClass(..))
+import UniqSet
+
+import qualified Data.Array as A
+
+
+-- | Determine the class of a register
+classOfReg :: Reg -> RegClass
+classOfReg reg
+ = case reg of
+ Reg c _ -> c
+
+ RegSub SubL16 _ -> ClassG16
+ RegSub SubL8 _ -> ClassG8
+ RegSub SubL8H _ -> ClassG8
+
+
+-- | Determine all the regs that make up a certain class.
+regsOfClass :: RegClass -> UniqSet Reg
+regsOfClass c
+ = case c of
+ ClassG32
+ -> mkUniqSet [ Reg ClassG32 i
+ | i <- [0..7] ]
+
+ ClassG16
+ -> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i)
+ | i <- [0..7] ]
+
+ ClassG8
+ -> unionUniqSets
+ (mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ])
+ (mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ])
+
+ ClassF64
+ -> mkUniqSet [ Reg ClassF64 i
+ | i <- [0..5] ]
+
+
+-- | Determine the common name of a reg
+-- returns Nothing if this reg is not part of the machine.
+regName :: Reg -> Maybe String
+regName reg
+ = case reg of
+ Reg ClassG32 i
+ | i <= 7 ->
+ let names = A.listArray (0,8)
+ [ "eax", "ebx", "ecx", "edx"
+ , "ebp", "esi", "edi", "esp" ]
+ in Just $ names A.! i
+
+ RegSub SubL16 (Reg ClassG32 i)
+ | i <= 7 ->
+ let names = A.listArray (0,8)
+ [ "ax", "bx", "cx", "dx"
+ , "bp", "si", "di", "sp"]
+ in Just $ names A.! i
+
+ RegSub SubL8 (Reg ClassG32 i)
+ | i <= 3 ->
+ let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"]
+ in Just $ names A.! i
+
+ RegSub SubL8H (Reg ClassG32 i)
+ | i <= 3 ->
+ let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"]
+ in Just $ names A.! i
+
+ _ -> Nothing
+
+
+-- | Which regs alias what other regs.
+regAlias :: Reg -> UniqSet Reg
+regAlias reg
+ = case reg of
+
+ -- 32 bit regs alias all of the subregs
+ Reg ClassG32 i
+
+ -- for eax, ebx, ecx, eds
+ | i <= 3
+ -> mkUniqSet
+ $ [ Reg ClassG32 i, RegSub SubL16 reg
+ , RegSub SubL8 reg, RegSub SubL8H reg ]
+
+ -- for esi, edi, esp, ebp
+ | 4 <= i && i <= 7
+ -> mkUniqSet
+ $ [ Reg ClassG32 i, RegSub SubL16 reg ]
+
+ -- 16 bit subregs alias the whole reg
+ RegSub SubL16 r@(Reg ClassG32 _)
+ -> regAlias r
+
+ -- 8 bit subregs alias the 32 and 16, but not the other 8 bit subreg
+ RegSub SubL8 r@(Reg ClassG32 _)
+ -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8 r ]
+
+ RegSub SubL8H r@(Reg ClassG32 _)
+ -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8H r ]
+
+ -- fp
+ Reg ClassF64 _
+ -> unitUniqSet reg
+
+ _ -> error "regAlias: invalid register"
+
+
+-- | Optimised versions of RegColorBase.{worst, squeese} specific to x86
+worst :: Int -> RegClass -> RegClass -> Int
+worst n classN classC
+ = case classN of
+ ClassG32
+ -> case classC of
+ ClassG32 -> min n 8
+ ClassG16 -> min n 8
+ ClassG8 -> min n 4
+ ClassF64 -> 0
+
+ ClassG16
+ -> case classC of
+ ClassG32 -> min n 8
+ ClassG16 -> min n 8
+ ClassG8 -> min n 4
+ ClassF64 -> 0
+
+ ClassG8
+ -> case classC of
+ ClassG32 -> min (n*2) 8
+ ClassG16 -> min (n*2) 8
+ ClassG8 -> min n 8
+ ClassF64 -> 0
+
+ ClassF64
+ -> case classC of
+ ClassF64 -> min n 6
+ _ -> 0
+
+squeese :: RegClass -> [(Int, RegClass)] -> Int
+squeese classN countCs
+ = sum (map (\(i, classC) -> worst i classN classC) countCs)
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs
new file mode 100644
index 0000000000..9b263889d8
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -0,0 +1,920 @@
+{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
+
+{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+
+-----------------------------------------------------------------------------
+--
+-- The register allocator
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+{-
+The algorithm is roughly:
+
+ 1) Compute strongly connected components of the basic block list.
+
+ 2) Compute liveness (mapping from pseudo register to
+ point(s) of death?).
+
+ 3) Walk instructions in each basic block. We keep track of
+ (a) Free real registers (a bitmap?)
+ (b) Current assignment of temporaries to machine registers and/or
+ spill slots (call this the "assignment").
+ (c) Partial mapping from basic block ids to a virt-to-loc mapping.
+ When we first encounter a branch to a basic block,
+ we fill in its entry in this table with the current mapping.
+
+ For each instruction:
+ (a) For each temporary *read* by the instruction:
+ If the temporary does not have a real register allocation:
+ - Allocate a real register from the free list. If
+ the list is empty:
+ - Find a temporary to spill. Pick one that is
+ not used in this instruction (ToDo: not
+ used for a while...)
+ - generate a spill instruction
+ - If the temporary was previously spilled,
+ generate an instruction to read the temp from its spill loc.
+ (optimisation: if we can see that a real register is going to
+ be used soon, then don't use it for allocation).
+
+ (b) For each real register clobbered by this instruction:
+ If a temporary resides in it,
+ If the temporary is live after this instruction,
+ Move the temporary to another (non-clobbered & free) reg,
+ or spill it to memory. Mark the temporary as residing
+ in both memory and a register if it was spilled (it might
+ need to be read by this instruction).
+
+ (ToDo: this is wrong for jump instructions?)
+
+ We do this after step (a), because if we start with
+ movq v1, %rsi
+ which is an instruction that clobbers %rsi, if v1 currently resides
+ in %rsi we want to get
+ movq %rsi, %freereg
+ movq %rsi, %rsi -- will disappear
+ instead of
+ movq %rsi, %freereg
+ movq %freereg, %rsi
+
+ (c) Update the current assignment
+
+ (d) If the instruction is a branch:
+ if the destination block already has a register assignment,
+ Generate a new block with fixup code and redirect the
+ jump to the new block.
+ else,
+ Update the block id->assignment mapping with the current
+ assignment.
+
+ (e) Delete all register assignments for temps which are read
+ (only) and die here. Update the free register list.
+
+ (f) Mark all registers clobbered by this instruction as not free,
+ and mark temporaries which have been spilled due to clobbering
+ as in memory (step (a) marks then as in both mem & reg).
+
+ (g) For each temporary *written* by this instruction:
+ Allocate a real register as for (b), spilling something
+ else if necessary.
+ - except when updating the assignment, drop any memory
+ locations that the temporary was previously in, since
+ they will be no longer valid after this instruction.
+
+ (h) Delete all register assignments for temps which are
+ written and die here (there should rarely be any). Update
+ the free register list.
+
+ (i) Rewrite the instruction with the new mapping.
+
+ (j) For each spilled reg known to be now dead, re-add its stack slot
+ to the free list.
+
+-}
+
+module GHC.CmmToAsm.Reg.Linear (
+ regAlloc,
+ module GHC.CmmToAsm.Reg.Linear.Base,
+ module GHC.CmmToAsm.Reg.Linear.Stats
+ ) where
+
+#include "HsVersions.h"
+
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Linear.State
+import GHC.CmmToAsm.Reg.Linear.Base
+import GHC.CmmToAsm.Reg.Linear.StackMap
+import GHC.CmmToAsm.Reg.Linear.FreeRegs
+import GHC.CmmToAsm.Reg.Linear.Stats
+import GHC.CmmToAsm.Reg.Linear.JoinToTargets
+import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
+import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC
+import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
+import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
+import GHC.CmmToAsm.Reg.Target
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm hiding (RegSet)
+
+import Digraph
+import GHC.Driver.Session
+import Unique
+import UniqSet
+import UniqFM
+import UniqSupply
+import Outputable
+import GHC.Platform
+
+import Data.Maybe
+import Data.List
+import Control.Monad
+
+-- -----------------------------------------------------------------------------
+-- Top level of the register allocator
+
+-- Allocate registers
+regAlloc
+ :: (Outputable instr, Instruction instr)
+ => DynFlags
+ -> LiveCmmDecl statics instr
+ -> UniqSM ( NatCmmDecl statics instr
+ , Maybe Int -- number of extra stack slots required,
+ -- beyond maxSpillSlots
+ , Maybe RegAllocStats
+ )
+
+regAlloc _ (CmmData sec d)
+ = return
+ ( CmmData sec d
+ , Nothing
+ , Nothing )
+
+regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live [])
+ = return ( CmmProc info lbl live (ListGraph [])
+ , Nothing
+ , Nothing )
+
+regAlloc dflags (CmmProc static lbl live sccs)
+ | LiveInfo info entry_ids@(first_id:_) block_live _ <- static
+ = do
+ -- do register allocation on each component.
+ (final_blocks, stats, stack_use)
+ <- linearRegAlloc dflags entry_ids block_live sccs
+
+ -- make sure the block that was first in the input list
+ -- stays at the front of the output
+ let ((first':_), rest')
+ = partition ((== first_id) . blockId) final_blocks
+
+ let max_spill_slots = maxSpillSlots dflags
+ extra_stack
+ | stack_use > max_spill_slots
+ = Just (stack_use - max_spill_slots)
+ | otherwise
+ = Nothing
+
+ return ( CmmProc info lbl live (ListGraph (first' : rest'))
+ , extra_stack
+ , Just stats)
+
+-- bogus. to make non-exhaustive match warning go away.
+regAlloc _ (CmmProc _ _ _ _)
+ = panic "RegAllocLinear.regAlloc: no match"
+
+
+-- -----------------------------------------------------------------------------
+-- Linear sweep to allocate registers
+
+
+-- | Do register allocation on some basic blocks.
+-- But be careful to allocate a block in an SCC only if it has
+-- an entry in the block map or it is the first block.
+--
+linearRegAlloc
+ :: (Outputable instr, Instruction instr)
+ => DynFlags
+ -> [BlockId] -- ^ entry points
+ -> BlockMap RegSet
+ -- ^ live regs on entry to each basic block
+ -> [SCC (LiveBasicBlock instr)]
+ -- ^ instructions annotated with "deaths"
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
+
+linearRegAlloc dflags entry_ids block_live sccs
+ = case platformArch platform of
+ ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs)
+ ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs)
+ ArchS390X -> panic "linearRegAlloc ArchS390X"
+ ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs)
+ ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64"
+ ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
+ ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
+ ArchARM64 -> panic "linearRegAlloc ArchARM64"
+ ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
+ ArchAlpha -> panic "linearRegAlloc ArchAlpha"
+ ArchMipseb -> panic "linearRegAlloc ArchMipseb"
+ ArchMipsel -> panic "linearRegAlloc ArchMipsel"
+ ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
+ ArchUnknown -> panic "linearRegAlloc ArchUnknown"
+ where
+ go f = linearRegAlloc' dflags f entry_ids block_live sccs
+ platform = targetPlatform dflags
+
+linearRegAlloc'
+ :: (FR freeRegs, Outputable instr, Instruction instr)
+ => DynFlags
+ -> freeRegs
+ -> [BlockId] -- ^ entry points
+ -> BlockMap RegSet -- ^ live regs on entry to each basic block
+ -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
+
+linearRegAlloc' dflags initFreeRegs entry_ids block_live sccs
+ = do us <- getUniqueSupplyM
+ let (_, stack, stats, blocks) =
+ runR dflags mapEmpty initFreeRegs emptyRegMap (emptyStackMap dflags) us
+ $ linearRA_SCCs entry_ids block_live [] sccs
+ return (blocks, stats, getStackUse stack)
+
+
+linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
+ => [BlockId]
+ -> BlockMap RegSet
+ -> [NatBasicBlock instr]
+ -> [SCC (LiveBasicBlock instr)]
+ -> RegM freeRegs [NatBasicBlock instr]
+
+linearRA_SCCs _ _ blocksAcc []
+ = return $ reverse blocksAcc
+
+linearRA_SCCs entry_ids block_live blocksAcc (AcyclicSCC block : sccs)
+ = do blocks' <- processBlock block_live block
+ linearRA_SCCs entry_ids block_live
+ ((reverse blocks') ++ blocksAcc)
+ sccs
+
+linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
+ = do
+ blockss' <- process entry_ids block_live blocks [] (return []) False
+ linearRA_SCCs entry_ids block_live
+ (reverse (concat blockss') ++ blocksAcc)
+ sccs
+
+{- from John Dias's patch 2008/10/16:
+ The linear-scan allocator sometimes allocates a block
+ before allocating one of its predecessors, which could lead to
+ inconsistent allocations. Make it so a block is only allocated
+ if a predecessor has set the "incoming" assignments for the block, or
+ if it's the procedure's entry block.
+
+ BL 2009/02: Careful. If the assignment for a block doesn't get set for
+ some reason then this function will loop. We should probably do some
+ more sanity checking to guard against this eventuality.
+-}
+
+process :: (FR freeRegs, Instruction instr, Outputable instr)
+ => [BlockId]
+ -> BlockMap RegSet
+ -> [GenBasicBlock (LiveInstr instr)]
+ -> [GenBasicBlock (LiveInstr instr)]
+ -> [[NatBasicBlock instr]]
+ -> Bool
+ -> RegM freeRegs [[NatBasicBlock instr]]
+
+process _ _ [] [] accum _
+ = return $ reverse accum
+
+process entry_ids block_live [] next_round accum madeProgress
+ | not madeProgress
+
+ {- BUGS: There are so many unreachable blocks in the code the warnings are overwhelming.
+ pprTrace "RegAlloc.Linear.Main.process: no progress made, bailing out."
+ ( text "Unreachable blocks:"
+ $$ vcat (map ppr next_round)) -}
+ = return $ reverse accum
+
+ | otherwise
+ = process entry_ids block_live
+ next_round [] accum False
+
+process entry_ids block_live (b@(BasicBlock id _) : blocks)
+ next_round accum madeProgress
+ = do
+ block_assig <- getBlockAssigR
+
+ if isJust (mapLookup id block_assig)
+ || id `elem` entry_ids
+ then do
+ b' <- processBlock block_live b
+ process entry_ids block_live blocks
+ next_round (b' : accum) True
+
+ else process entry_ids block_live blocks
+ (b : next_round) accum madeProgress
+
+
+-- | Do register allocation on this basic block
+--
+processBlock
+ :: (FR freeRegs, Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ live regs on entry to each basic block
+ -> LiveBasicBlock instr -- ^ block to do register allocation on
+ -> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated
+
+processBlock block_live (BasicBlock id instrs)
+ = do initBlock id block_live
+ (instrs', fixups)
+ <- linearRA block_live [] [] id instrs
+ return $ BasicBlock id instrs' : fixups
+
+
+-- | Load the freeregs and current reg assignment into the RegM state
+-- for the basic block with this BlockId.
+initBlock :: FR freeRegs
+ => BlockId -> BlockMap RegSet -> RegM freeRegs ()
+initBlock id block_live
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ block_assig <- getBlockAssigR
+ case mapLookup id block_assig of
+ -- no prior info about this block: we must consider
+ -- any fixed regs to be allocated, but we can ignore
+ -- virtual regs (presumably this is part of a loop,
+ -- and we'll iterate again). The assignment begins
+ -- empty.
+ Nothing
+ -> do -- pprTrace "initFreeRegs" (text $ show initFreeRegs) (return ())
+ case mapLookup id block_live of
+ Nothing ->
+ setFreeRegsR (frInitFreeRegs platform)
+ Just live ->
+ setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform)
+ [ r | RegReal r <- nonDetEltsUniqSet live ]
+ -- See Note [Unique Determinism and code generation]
+ setAssigR emptyRegMap
+
+ -- load info about register assignments leading into this block.
+ Just (freeregs, assig)
+ -> do setFreeRegsR freeregs
+ setAssigR assig
+
+
+-- | Do allocation for a sequence of instructions.
+linearRA
+ :: (FR freeRegs, Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
+ -> [instr] -- ^ accumulator for instructions already processed.
+ -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code.
+ -> BlockId -- ^ id of the current block, for debugging.
+ -> [LiveInstr instr] -- ^ liveness annotated instructions in this block.
+
+ -> RegM freeRegs
+ ( [instr] -- instructions after register allocation
+ , [NatBasicBlock instr]) -- fresh blocks of fixup code.
+
+
+linearRA _ accInstr accFixup _ []
+ = return
+ ( reverse accInstr -- instrs need to be returned in the correct order.
+ , accFixup) -- it doesn't matter what order the fixup blocks are returned in.
+
+
+linearRA block_live accInstr accFixups id (instr:instrs)
+ = do
+ (accInstr', new_fixups) <- raInsn block_live accInstr id instr
+
+ linearRA block_live accInstr' (new_fixups ++ accFixups) id instrs
+
+
+-- | Do allocation for a single instruction.
+raInsn
+ :: (FR freeRegs, Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
+ -> [instr] -- ^ accumulator for instructions already processed.
+ -> BlockId -- ^ the id of the current block, for debugging
+ -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info.
+ -> RegM freeRegs
+ ( [instr] -- new instructions
+ , [NatBasicBlock instr]) -- extra fixup blocks
+
+raInsn _ new_instrs _ (LiveInstr ii Nothing)
+ | Just n <- takeDeltaInstr ii
+ = do setDeltaR n
+ return (new_instrs, [])
+
+raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing)
+ | isMetaInstr ii
+ = return (i : new_instrs, [])
+
+
+raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
+ = do
+ assig <- getAssigR
+
+ -- If we have a reg->reg move between virtual registers, where the
+ -- src register is not live after this instruction, and the dst
+ -- register does not already have an assignment,
+ -- and the source register is assigned to a register, not to a spill slot,
+ -- then we can eliminate the instruction.
+ -- (we can't eliminate it if the source register is on the stack, because
+ -- we do not want to use one spill slot for different virtual registers)
+ case takeRegRegMoveInstr instr of
+ Just (src,dst) | src `elementOfUniqSet` (liveDieRead live),
+ isVirtualReg dst,
+ not (dst `elemUFM` assig),
+ isRealReg src || isInReg src assig -> do
+ case src of
+ (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr))
+ -- if src is a fixed reg, then we just map dest to this
+ -- reg in the assignment. src must be an allocatable reg,
+ -- otherwise it wouldn't be in r_dying.
+ _virt -> case lookupUFM assig src of
+ Nothing -> panic "raInsn"
+ Just loc ->
+ setAssigR (addToUFM (delFromUFM assig src) dst loc)
+
+ -- we have eliminated this instruction
+ {-
+ freeregs <- getFreeRegsR
+ assig <- getAssigR
+ pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr)
+ $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do
+ -}
+ return (new_instrs, [])
+
+ _ -> genRaInsn block_live new_instrs id instr
+ (nonDetEltsUniqSet $ liveDieRead live)
+ (nonDetEltsUniqSet $ liveDieWrite live)
+ -- See Note [Unique Determinism and code generation]
+
+raInsn _ _ _ instr
+ = pprPanic "raInsn" (text "no match for:" <> ppr instr)
+
+-- ToDo: what can we do about
+--
+-- R1 = x
+-- jump I64[x] // [R1]
+--
+-- where x is mapped to the same reg as R1. We want to coalesce x and
+-- R1, but the register allocator doesn't know whether x will be
+-- assigned to again later, in which case x and R1 should be in
+-- different registers. Right now we assume the worst, and the
+-- assignment to R1 will clobber x, so we'll spill x into another reg,
+-- generating another reg->reg move.
+
+
+isInReg :: Reg -> RegMap Loc -> Bool
+isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
+ | otherwise = False
+
+
+genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
+ => BlockMap RegSet
+ -> [instr]
+ -> BlockId
+ -> instr
+ -> [Reg]
+ -> [Reg]
+ -> RegM freeRegs ([instr], [NatBasicBlock instr])
+
+genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
+ dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ case regUsageOfInstr platform instr of { RU read written ->
+ do
+ let real_written = [ rr | (RegReal rr) <- written ]
+ let virt_written = [ vr | (RegVirtual vr) <- written ]
+
+ -- we don't need to do anything with real registers that are
+ -- only read by this instr. (the list is typically ~2 elements,
+ -- so using nub isn't a problem).
+ let virt_read = nub [ vr | (RegVirtual vr) <- read ]
+
+ -- debugging
+{- freeregs <- getFreeRegsR
+ assig <- getAssigR
+ pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn"
+ (ppr instr
+ $$ text "r_dying = " <+> ppr r_dying
+ $$ text "w_dying = " <+> ppr w_dying
+ $$ text "virt_read = " <+> ppr virt_read
+ $$ text "virt_written = " <+> ppr virt_written
+ $$ text "freeregs = " <+> text (show freeregs)
+ $$ text "assig = " <+> ppr assig)
+ $ do
+-}
+
+ -- (a), (b) allocate real regs for all regs read by this instruction.
+ (r_spills, r_allocd) <-
+ allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read
+
+ -- (c) save any temporaries which will be clobbered by this instruction
+ clobber_saves <- saveClobberedTemps real_written r_dying
+
+ -- (d) Update block map for new destinations
+ -- NB. do this before removing dead regs from the assignment, because
+ -- these dead regs might in fact be live in the jump targets (they're
+ -- only dead in the code that follows in the current basic block).
+ (fixup_blocks, adjusted_instr)
+ <- joinToTargets block_live block_id instr
+
+ -- Debugging - show places where the reg alloc inserted
+ -- assignment fixup blocks.
+ -- when (not $ null fixup_blocks) $
+ -- pprTrace "fixup_blocks" (ppr fixup_blocks) (return ())
+
+ -- (e) Delete all register assignments for temps which are read
+ -- (only) and die here. Update the free register list.
+ releaseRegs r_dying
+
+ -- (f) Mark regs which are clobbered as unallocatable
+ clobberRegs real_written
+
+ -- (g) Allocate registers for temporaries *written* (only)
+ (w_spills, w_allocd) <-
+ allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written
+
+ -- (h) Release registers for temps which are written here and not
+ -- used again.
+ releaseRegs w_dying
+
+ let
+ -- (i) Patch the instruction
+ patch_map
+ = listToUFM
+ [ (t, RegReal r)
+ | (t, r) <- zip virt_read r_allocd
+ ++ zip virt_written w_allocd ]
+
+ patched_instr
+ = patchRegsOfInstr adjusted_instr patchLookup
+
+ patchLookup x
+ = case lookupUFM patch_map x of
+ Nothing -> x
+ Just y -> y
+
+
+ -- (j) free up stack slots for dead spilled regs
+ -- TODO (can't be bothered right now)
+
+ -- erase reg->reg moves where the source and destination are the same.
+ -- If the src temp didn't die in this instr but happened to be allocated
+ -- to the same real reg as the destination, then we can erase the move anyway.
+ let squashed_instr = case takeRegRegMoveInstr patched_instr of
+ Just (src, dst)
+ | src == dst -> []
+ _ -> [patched_instr]
+
+ let code = squashed_instr ++ w_spills ++ reverse r_spills
+ ++ clobber_saves ++ new_instrs
+
+-- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do
+-- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do
+
+ return (code, fixup_blocks)
+
+ }
+
+-- -----------------------------------------------------------------------------
+-- releaseRegs
+
+releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs ()
+releaseRegs regs = do
+ dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ assig <- getAssigR
+ free <- getFreeRegsR
+ let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return ()
+ loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs
+ loop assig !free (r:rs) =
+ case lookupUFM assig r of
+ Just (InBoth real _) -> loop (delFromUFM assig r)
+ (frReleaseReg platform real free) rs
+ Just (InReg real) -> loop (delFromUFM assig r)
+ (frReleaseReg platform real free) rs
+ _ -> loop (delFromUFM assig r) free rs
+ loop assig free regs
+
+
+-- -----------------------------------------------------------------------------
+-- Clobber real registers
+
+-- For each temp in a register that is going to be clobbered:
+-- - if the temp dies after this instruction, do nothing
+-- - otherwise, put it somewhere safe (another reg if possible,
+-- otherwise spill and record InBoth in the assignment).
+-- - for allocateRegs on the temps *read*,
+-- - clobbered regs are allocatable.
+--
+-- for allocateRegs on the temps *written*,
+-- - clobbered regs are not allocatable.
+--
+
+saveClobberedTemps
+ :: (Instruction instr, FR freeRegs)
+ => [RealReg] -- real registers clobbered by this instruction
+ -> [Reg] -- registers which are no longer live after this insn
+ -> RegM freeRegs [instr] -- return: instructions to spill any temps that will
+ -- be clobbered.
+
+saveClobberedTemps [] _
+ = return []
+
+saveClobberedTemps clobbered dying
+ = do
+ assig <- getAssigR
+ let to_spill
+ = [ (temp,reg)
+ | (temp, InReg reg) <- nonDetUFMToList assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ , any (realRegsAlias reg) clobbered
+ , temp `notElem` map getUnique dying ]
+
+ (instrs,assig') <- clobber assig [] to_spill
+ setAssigR assig'
+ return instrs
+
+ where
+ clobber assig instrs []
+ = return (instrs, assig)
+
+ clobber assig instrs ((temp, reg) : rest)
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+
+ freeRegs <- getFreeRegsR
+ let regclass = targetClassOfRealReg platform reg
+ freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs
+
+ case filter (`notElem` clobbered) freeRegs_thisClass of
+
+ -- (1) we have a free reg of the right class that isn't
+ -- clobbered by this instruction; use it to save the
+ -- clobbered value.
+ (my_reg : _) -> do
+ setFreeRegsR (frAllocateReg platform my_reg freeRegs)
+
+ let new_assign = addToUFM assig temp (InReg my_reg)
+ let instr = mkRegRegMoveInstr platform
+ (RegReal reg) (RegReal my_reg)
+
+ clobber new_assign (instr : instrs) rest
+
+ -- (2) no free registers: spill the value
+ [] -> do
+ (spill, slot) <- spillR (RegReal reg) temp
+
+ -- record why this reg was spilled for profiling
+ recordSpill (SpillClobber temp)
+
+ let new_assign = addToUFM assig temp (InBoth reg slot)
+
+ clobber new_assign (spill : instrs) rest
+
+
+
+-- | Mark all these real regs as allocated,
+-- and kick out their vreg assignments.
+--
+clobberRegs :: FR freeRegs => [RealReg] -> RegM freeRegs ()
+clobberRegs []
+ = return ()
+
+clobberRegs clobbered
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+
+ freeregs <- getFreeRegsR
+ setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered
+
+ assig <- getAssigR
+ setAssigR $! clobber assig (nonDetUFMToList assig)
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+
+ where
+ -- if the temp was InReg and clobbered, then we will have
+ -- saved it in saveClobberedTemps above. So the only case
+ -- we have to worry about here is InBoth. Note that this
+ -- also catches temps which were loaded up during allocation
+ -- of read registers, not just those saved in saveClobberedTemps.
+
+ clobber assig []
+ = assig
+
+ clobber assig ((temp, InBoth reg slot) : rest)
+ | any (realRegsAlias reg) clobbered
+ = clobber (addToUFM assig temp (InMem slot)) rest
+
+ clobber assig (_:rest)
+ = clobber assig rest
+
+-- -----------------------------------------------------------------------------
+-- allocateRegsAndSpill
+
+-- Why are we performing a spill?
+data SpillLoc = ReadMem StackSlot -- reading from register only in memory
+ | WriteNew -- writing to a new variable
+ | WriteMem -- writing to register only in memory
+-- Note that ReadNew is not valid, since you don't want to be reading
+-- from an uninitialized register. We also don't need the location of
+-- the register in memory, since that will be invalidated by the write.
+-- Technically, we could coalesce WriteNew and WriteMem into a single
+-- entry as well. -- EZY
+
+-- This function does several things:
+-- For each temporary referred to by this instruction,
+-- we allocate a real register (spilling another temporary if necessary).
+-- We load the temporary up from memory if necessary.
+-- We also update the register assignment in the process, and
+-- the list of free registers and free stack slots.
+
+allocateRegsAndSpill
+ :: (FR freeRegs, Outputable instr, Instruction instr)
+ => Bool -- True <=> reading (load up spilled regs)
+ -> [VirtualReg] -- don't push these out
+ -> [instr] -- spill insns
+ -> [RealReg] -- real registers allocated (accum.)
+ -> [VirtualReg] -- temps to allocate
+ -> RegM freeRegs ( [instr] , [RealReg])
+
+allocateRegsAndSpill _ _ spills alloc []
+ = return (spills, reverse alloc)
+
+allocateRegsAndSpill reading keep spills alloc (r:rs)
+ = do assig <- getAssigR
+ let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
+ case lookupUFM assig r of
+ -- case (1a): already in a register
+ Just (InReg my_reg) ->
+ allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+
+ -- case (1b): already in a register (and memory)
+ -- NB1. if we're writing this register, update its assignment to be
+ -- InReg, because the memory value is no longer valid.
+ -- NB2. This is why we must process written registers here, even if they
+ -- are also read by the same instruction.
+ Just (InBoth my_reg _)
+ -> do when (not reading) (setAssigR (addToUFM assig r (InReg my_reg)))
+ allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
+
+ -- Not already in a register, so we need to find a free one...
+ Just (InMem slot) | reading -> doSpill (ReadMem slot)
+ | otherwise -> doSpill WriteMem
+ Nothing | reading ->
+ pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr r)
+ -- NOTE: if the input to the NCG contains some
+ -- unreachable blocks with junk code, this panic
+ -- might be triggered. Make sure you only feed
+ -- sensible code into the NCG. In GHC.Cmm.Pipeline we
+ -- call removeUnreachableBlocks at the end for this
+ -- reason.
+
+ | otherwise -> doSpill WriteNew
+
+
+-- reading is redundant with reason, but we keep it around because it's
+-- convenient and it maintains the recursive structure of the allocator. -- EZY
+allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
+ => Bool
+ -> [VirtualReg]
+ -> [instr]
+ -> [RealReg]
+ -> VirtualReg
+ -> [VirtualReg]
+ -> UniqFM Loc
+ -> SpillLoc
+ -> RegM freeRegs ([instr], [RealReg])
+allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ freeRegs <- getFreeRegsR
+ let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs
+
+ case freeRegs_thisClass of
+
+ -- case (2): we have a free register
+ (my_reg : _) ->
+ do spills' <- loadTemp r spill_loc my_reg spills
+
+ setAssigR (addToUFM assig r $! newLocation spill_loc my_reg)
+ setFreeRegsR $ frAllocateReg platform my_reg freeRegs
+
+ allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs
+
+
+ -- case (3): we need to push something out to free up a register
+ [] ->
+ do let inRegOrBoth (InReg _) = True
+ inRegOrBoth (InBoth _ _) = True
+ inRegOrBoth _ = False
+ let candidates' =
+ flip delListFromUFM keep $
+ filterUFM inRegOrBoth $
+ assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ let candidates = nonDetUFMToList candidates'
+
+ -- the vregs we could kick out that are already in a slot
+ let candidates_inBoth
+ = [ (temp, reg, mem)
+ | (temp, InBoth reg mem) <- candidates
+ , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+
+ -- the vregs we could kick out that are only in a reg
+ -- this would require writing the reg to a new slot before using it.
+ let candidates_inReg
+ = [ (temp, reg)
+ | (temp, InReg reg) <- candidates
+ , targetClassOfRealReg platform reg == classOfVirtualReg r ]
+
+ let result
+
+ -- we have a temporary that is in both register and mem,
+ -- just free up its register for use.
+ | (temp, my_reg, slot) : _ <- candidates_inBoth
+ = do spills' <- loadTemp r spill_loc my_reg spills
+ let assig1 = addToUFM assig temp (InMem slot)
+ let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
+
+ setAssigR assig2
+ allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
+
+ -- otherwise, we need to spill a temporary that currently
+ -- resides in a register.
+ | (temp_to_push_out, (my_reg :: RealReg)) : _
+ <- candidates_inReg
+ = do
+ (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out
+ let spill_store = (if reading then id else reverse)
+ [ -- COMMENT (fsLit "spill alloc")
+ spill_insn ]
+
+ -- record that this temp was spilled
+ recordSpill (SpillAlloc temp_to_push_out)
+
+ -- update the register assignment
+ let assig1 = addToUFM assig temp_to_push_out (InMem slot)
+ let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
+ setAssigR assig2
+
+ -- if need be, load up a spilled temp into the reg we've just freed up.
+ spills' <- loadTemp r spill_loc my_reg spills
+
+ allocateRegsAndSpill reading keep
+ (spill_store ++ spills')
+ (my_reg:alloc) rs
+
+
+ -- there wasn't anything to spill, so we're screwed.
+ | otherwise
+ = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n")
+ $ vcat
+ [ text "allocating vreg: " <> text (show r)
+ , text "assignment: " <> ppr assig
+ , text "freeRegs: " <> text (show freeRegs)
+ , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ]
+
+ result
+
+
+-- | Calculate a new location after a register has been loaded.
+newLocation :: SpillLoc -> RealReg -> Loc
+-- if the tmp was read from a slot, then now its in a reg as well
+newLocation (ReadMem slot) my_reg = InBoth my_reg slot
+-- writes will always result in only the register being available
+newLocation _ my_reg = InReg my_reg
+
+-- | Load up a spilled temporary if we need to (read from memory).
+loadTemp
+ :: (Instruction instr)
+ => VirtualReg -- the temp being loaded
+ -> SpillLoc -- the current location of this temp
+ -> RealReg -- the hreg to load the temp into
+ -> [instr]
+ -> RegM freeRegs [instr]
+
+loadTemp vreg (ReadMem slot) hreg spills
+ = do
+ insn <- loadR (RegReal hreg) slot
+ recordSpill (SpillLoad $ getUnique vreg)
+ return $ {- COMMENT (fsLit "spill load") : -} insn : spills
+
+loadTemp _ _ _ spills =
+ return spills
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
new file mode 100644
index 0000000000..43dbab843b
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
@@ -0,0 +1,141 @@
+
+-- | Put common type definitions here to break recursive module dependencies.
+
+module GHC.CmmToAsm.Reg.Linear.Base (
+ BlockAssignment,
+
+ Loc(..),
+ regsOfLoc,
+
+ -- for stats
+ SpillReason(..),
+ RegAllocStats(..),
+
+ -- the allocator monad
+ RA_State(..),
+)
+
+where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Linear.StackMap
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.Platform.Reg
+
+import GHC.Driver.Session
+import Outputable
+import Unique
+import UniqFM
+import UniqSupply
+import GHC.Cmm.BlockId
+
+
+-- | Used to store the register assignment on entry to a basic block.
+-- We use this to handle join points, where multiple branch instructions
+-- target a particular label. We have to insert fixup code to make
+-- the register assignments from the different sources match up.
+--
+type BlockAssignment freeRegs
+ = BlockMap (freeRegs, RegMap Loc)
+
+
+-- | Where a vreg is currently stored
+-- A temporary can be marked as living in both a register and memory
+-- (InBoth), for example if it was recently loaded from a spill location.
+-- This makes it cheap to spill (no save instruction required), but we
+-- have to be careful to turn this into InReg if the value in the
+-- register is changed.
+
+-- This is also useful when a temporary is about to be clobbered. We
+-- save it in a spill location, but mark it as InBoth because the current
+-- instruction might still want to read it.
+--
+data Loc
+ -- | vreg is in a register
+ = InReg !RealReg
+
+ -- | vreg is held in a stack slot
+ | InMem {-# UNPACK #-} !StackSlot
+
+
+ -- | vreg is held in both a register and a stack slot
+ | InBoth !RealReg
+ {-# UNPACK #-} !StackSlot
+ deriving (Eq, Show, Ord)
+
+instance Outputable Loc where
+ ppr l = text (show l)
+
+
+-- | Get the reg numbers stored in this Loc.
+regsOfLoc :: Loc -> [RealReg]
+regsOfLoc (InReg r) = [r]
+regsOfLoc (InBoth r _) = [r]
+regsOfLoc (InMem _) = []
+
+
+-- | Reasons why instructions might be inserted by the spiller.
+-- Used when generating stats for -ddrop-asm-stats.
+--
+data SpillReason
+ -- | vreg was spilled to a slot so we could use its
+ -- current hreg for another vreg
+ = SpillAlloc !Unique
+
+ -- | vreg was moved because its hreg was clobbered
+ | SpillClobber !Unique
+
+ -- | vreg was loaded from a spill slot
+ | SpillLoad !Unique
+
+ -- | reg-reg move inserted during join to targets
+ | SpillJoinRR !Unique
+
+ -- | reg-mem move inserted during join to targets
+ | SpillJoinRM !Unique
+
+
+-- | Used to carry interesting stats out of the register allocator.
+data RegAllocStats
+ = RegAllocStats
+ { ra_spillInstrs :: UniqFM [Int]
+ , ra_fixupList :: [(BlockId,BlockId,BlockId)]
+ -- ^ (from,fixup,to) : We inserted fixup code between from and to
+ }
+
+
+-- | The register allocator state
+data RA_State freeRegs
+ = RA_State
+
+ {
+ -- | the current mapping from basic blocks to
+ -- the register assignments at the beginning of that block.
+ ra_blockassig :: BlockAssignment freeRegs
+
+ -- | free machine registers
+ , ra_freeregs :: !freeRegs
+
+ -- | assignment of temps to locations
+ , ra_assig :: RegMap Loc
+
+ -- | current stack delta
+ , ra_delta :: Int
+
+ -- | free stack slots for spilling
+ , ra_stack :: StackMap
+
+ -- | unique supply for generating names for join point fixup blocks.
+ , ra_us :: UniqSupply
+
+ -- | Record why things were spilled, for -ddrop-asm-stats.
+ -- Just keep a list here instead of a map of regs -> reasons.
+ -- We don't want to slow down the allocator if we're not going to emit the stats.
+ , ra_spills :: [SpillReason]
+ , ra_DynFlags :: DynFlags
+
+ -- | (from,fixup,to) : We inserted fixup code between from and to
+ , ra_fixups :: [(BlockId,BlockId,BlockId)] }
+
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
new file mode 100644
index 0000000000..0d72d8b6e9
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
@@ -0,0 +1,89 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.CmmToAsm.Reg.Linear.FreeRegs (
+ FR(..),
+ maxSpillSlots
+)
+
+#include "HsVersions.h"
+
+where
+
+import GhcPrelude
+
+import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
+
+import GHC.Driver.Session
+import Panic
+import GHC.Platform
+
+-- -----------------------------------------------------------------------------
+-- The free register set
+-- This needs to be *efficient*
+-- Here's an inefficient 'executable specification' of the FreeRegs data type:
+--
+-- type FreeRegs = [RegNo]
+-- noFreeRegs = 0
+-- releaseReg n f = if n `elem` f then f else (n : f)
+-- initFreeRegs = allocatableRegs
+-- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
+-- allocateReg f r = filter (/= r) f
+
+import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
+import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC
+import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
+import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
+
+import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
+import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr
+import qualified GHC.CmmToAsm.X86.Instr as X86.Instr
+
+class Show freeRegs => FR freeRegs where
+ frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
+ frGetFreeRegs :: Platform -> RegClass -> freeRegs -> [RealReg]
+ frInitFreeRegs :: Platform -> freeRegs
+ frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs
+
+instance FR X86.FreeRegs where
+ frAllocateReg = \_ -> X86.allocateReg
+ frGetFreeRegs = X86.getFreeRegs
+ frInitFreeRegs = X86.initFreeRegs
+ frReleaseReg = \_ -> X86.releaseReg
+
+instance FR X86_64.FreeRegs where
+ frAllocateReg = \_ -> X86_64.allocateReg
+ frGetFreeRegs = X86_64.getFreeRegs
+ frInitFreeRegs = X86_64.initFreeRegs
+ frReleaseReg = \_ -> X86_64.releaseReg
+
+instance FR PPC.FreeRegs where
+ frAllocateReg = \_ -> PPC.allocateReg
+ frGetFreeRegs = \_ -> PPC.getFreeRegs
+ frInitFreeRegs = PPC.initFreeRegs
+ frReleaseReg = \_ -> PPC.releaseReg
+
+instance FR SPARC.FreeRegs where
+ frAllocateReg = SPARC.allocateReg
+ frGetFreeRegs = \_ -> SPARC.getFreeRegs
+ frInitFreeRegs = SPARC.initFreeRegs
+ frReleaseReg = SPARC.releaseReg
+
+maxSpillSlots :: DynFlags -> Int
+maxSpillSlots dflags
+ = case platformArch (targetPlatform dflags) of
+ ArchX86 -> X86.Instr.maxSpillSlots dflags
+ ArchX86_64 -> X86.Instr.maxSpillSlots dflags
+ ArchPPC -> PPC.Instr.maxSpillSlots dflags
+ ArchS390X -> panic "maxSpillSlots ArchS390X"
+ ArchSPARC -> SPARC.Instr.maxSpillSlots dflags
+ ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64"
+ ArchARM _ _ _ -> panic "maxSpillSlots ArchARM"
+ ArchARM64 -> panic "maxSpillSlots ArchARM64"
+ ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags
+ ArchAlpha -> panic "maxSpillSlots ArchAlpha"
+ ArchMipseb -> panic "maxSpillSlots ArchMipseb"
+ ArchMipsel -> panic "maxSpillSlots ArchMipsel"
+ ArchJavaScript-> panic "maxSpillSlots ArchJavaScript"
+ ArchUnknown -> panic "maxSpillSlots ArchUnknown"
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
new file mode 100644
index 0000000000..b4ad1b948c
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
@@ -0,0 +1,378 @@
+{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+
+-- | Handles joining of a jump instruction to its targets.
+
+-- The first time we encounter a jump to a particular basic block, we
+-- record the assignment of temporaries. The next time we encounter a
+-- jump to the same block, we compare our current assignment to the
+-- stored one. They might be different if spilling has occurred in one
+-- branch; so some fixup code will be required to match up the assignments.
+--
+module GHC.CmmToAsm.Reg.Linear.JoinToTargets (joinToTargets) where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Linear.State
+import GHC.CmmToAsm.Reg.Linear.Base
+import GHC.CmmToAsm.Reg.Linear.FreeRegs
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import Digraph
+import GHC.Driver.Session
+import Outputable
+import Unique
+import UniqFM
+import UniqSet
+
+-- | For a jump instruction at the end of a block, generate fixup code so its
+-- vregs are in the correct regs for its destination.
+--
+joinToTargets
+ :: (FR freeRegs, Instruction instr, Outputable instr)
+ => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ -- that are known to be live on the entry to each block.
+
+ -> BlockId -- ^ id of the current block
+ -> instr -- ^ branch instr on the end of the source block.
+
+ -> RegM freeRegs ([NatBasicBlock instr] -- fresh blocks of fixup code.
+ , instr) -- the original branch
+ -- instruction, but maybe
+ -- patched to jump
+ -- to a fixup block first.
+
+joinToTargets block_live id instr
+
+ -- we only need to worry about jump instructions.
+ | not $ isJumpishInstr instr
+ = return ([], instr)
+
+ | otherwise
+ = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
+
+-----
+joinToTargets'
+ :: (FR freeRegs, Instruction instr, Outputable instr)
+ => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ -- that are known to be live on the entry to each block.
+
+ -> [NatBasicBlock instr] -- ^ acc blocks of fixup code.
+
+ -> BlockId -- ^ id of the current block
+ -> instr -- ^ branch instr on the end of the source block.
+
+ -> [BlockId] -- ^ branch destinations still to consider.
+
+ -> RegM freeRegs ([NatBasicBlock instr], instr)
+
+-- no more targets to consider. all done.
+joinToTargets' _ new_blocks _ instr []
+ = return (new_blocks, instr)
+
+-- handle a branch target.
+joinToTargets' block_live new_blocks block_id instr (dest:dests)
+ = do
+ -- get the map of where the vregs are stored on entry to each basic block.
+ block_assig <- getBlockAssigR
+
+ -- get the assignment on entry to the branch instruction.
+ assig <- getAssigR
+
+ -- adjust the current assignment to remove any vregs that are not live
+ -- on entry to the destination block.
+ let Just live_set = mapLookup dest block_live
+ let still_live uniq _ = uniq `elemUniqSet_Directly` live_set
+ let adjusted_assig = filterUFM_Directly still_live assig
+
+ -- and free up those registers which are now free.
+ let to_free =
+ [ r | (reg, loc) <- nonDetUFMToList assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ , not (elemUniqSet_Directly reg live_set)
+ , r <- regsOfLoc loc ]
+
+ case mapLookup dest block_assig of
+ Nothing
+ -> joinToTargets_first
+ block_live new_blocks block_id instr dest dests
+ block_assig adjusted_assig to_free
+
+ Just (_, dest_assig)
+ -> joinToTargets_again
+ block_live new_blocks block_id instr dest dests
+ adjusted_assig dest_assig
+
+
+-- this is the first time we jumped to this block.
+joinToTargets_first :: (FR freeRegs, Instruction instr, Outputable instr)
+ => BlockMap RegSet
+ -> [NatBasicBlock instr]
+ -> BlockId
+ -> instr
+ -> BlockId
+ -> [BlockId]
+ -> BlockAssignment freeRegs
+ -> RegMap Loc
+ -> [RealReg]
+ -> RegM freeRegs ([NatBasicBlock instr], instr)
+joinToTargets_first block_live new_blocks block_id instr dest dests
+ block_assig src_assig
+ to_free
+
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+
+ -- free up the regs that are not live on entry to this block.
+ freeregs <- getFreeRegsR
+ let freeregs' = foldl' (flip $ frReleaseReg platform) freeregs to_free
+
+ -- remember the current assignment on entry to this block.
+ setBlockAssigR (mapInsert dest (freeregs', src_assig) block_assig)
+
+ joinToTargets' block_live new_blocks block_id instr dests
+
+
+-- we've jumped to this block before
+joinToTargets_again :: (Instruction instr, FR freeRegs, Outputable instr)
+ => BlockMap RegSet
+ -> [NatBasicBlock instr]
+ -> BlockId
+ -> instr
+ -> BlockId
+ -> [BlockId]
+ -> UniqFM Loc
+ -> UniqFM Loc
+ -> RegM freeRegs ([NatBasicBlock instr], instr)
+joinToTargets_again
+ block_live new_blocks block_id instr dest dests
+ src_assig dest_assig
+
+ -- the assignments already match, no problem.
+ | nonDetUFMToList dest_assig == nonDetUFMToList src_assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ = joinToTargets' block_live new_blocks block_id instr dests
+
+ -- assignments don't match, need fixup code
+ | otherwise
+ = do
+
+ -- make a graph of what things need to be moved where.
+ let graph = makeRegMovementGraph src_assig dest_assig
+
+ -- look for cycles in the graph. This can happen if regs need to be swapped.
+ -- Note that we depend on the fact that this function does a
+ -- bottom up traversal of the tree-like portions of the graph.
+ --
+ -- eg, if we have
+ -- R1 -> R2 -> R3
+ --
+ -- ie move value in R1 to R2 and value in R2 to R3.
+ --
+ -- We need to do the R2 -> R3 move before R1 -> R2.
+ --
+ let sccs = stronglyConnCompFromEdgedVerticesOrdR graph
+
+ -- debugging
+ {-
+ pprTrace
+ ("joinToTargets: making fixup code")
+ (vcat [ text " in block: " <> ppr block_id
+ , text " jmp instruction: " <> ppr instr
+ , text " src assignment: " <> ppr src_assig
+ , text " dest assignment: " <> ppr dest_assig
+ , text " movement graph: " <> ppr graph
+ , text " sccs of graph: " <> ppr sccs
+ , text ""])
+ (return ())
+ -}
+ delta <- getDeltaR
+ fixUpInstrs_ <- mapM (handleComponent delta instr) sccs
+ let fixUpInstrs = concat fixUpInstrs_
+
+ -- make a new basic block containing the fixup code.
+ -- A the end of the current block we will jump to the fixup one,
+ -- then that will jump to our original destination.
+ fixup_block_id <- mkBlockId <$> getUniqueR
+ let block = BasicBlock fixup_block_id
+ $ fixUpInstrs ++ mkJumpInstr dest
+
+ -- if we didn't need any fixups, then don't include the block
+ case fixUpInstrs of
+ [] -> joinToTargets' block_live new_blocks block_id instr dests
+
+ -- patch the original branch instruction so it goes to our
+ -- fixup block instead.
+ _ -> let instr' = patchJumpInstr instr
+ (\bid -> if bid == dest
+ then fixup_block_id
+ else bid) -- no change!
+
+ in do
+ {- --debugging
+ pprTrace "FixUpEdge info:"
+ (
+ text "inBlock:" <> ppr block_id $$
+ text "instr:" <> ppr instr $$
+ text "instr':" <> ppr instr' $$
+ text "fixup_block_id':" <>
+ ppr fixup_block_id $$
+ text "dest:" <> ppr dest
+ ) (return ())
+ -}
+ recordFixupBlock block_id fixup_block_id dest
+ joinToTargets' block_live (block : new_blocks)
+ block_id instr' dests
+
+
+-- | Construct a graph of register\/spill movements.
+--
+-- Cyclic components seem to occur only very rarely.
+--
+-- We cut some corners by not handling memory-to-memory moves.
+-- This shouldn't happen because every temporary gets its own stack slot.
+--
+makeRegMovementGraph :: RegMap Loc -> RegMap Loc -> [Node Loc Unique]
+makeRegMovementGraph adjusted_assig dest_assig
+ = [ node | (vreg, src) <- nonDetUFMToList adjusted_assig
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ -- source reg might not be needed at the dest:
+ , Just loc <- [lookupUFM_Directly dest_assig vreg]
+ , node <- expandNode vreg src loc ]
+
+
+-- | Expand out the destination, so InBoth destinations turn into
+-- a combination of InReg and InMem.
+
+-- The InBoth handling is a little tricky here. If the destination is
+-- InBoth, then we must ensure that the value ends up in both locations.
+-- An InBoth destination must conflict with an InReg or InMem source, so
+-- we expand an InBoth destination as necessary.
+--
+-- An InBoth source is slightly different: we only care about the register
+-- that the source value is in, so that we can move it to the destinations.
+--
+expandNode
+ :: a
+ -> Loc -- ^ source of move
+ -> Loc -- ^ destination of move
+ -> [Node Loc a ]
+
+expandNode vreg loc@(InReg src) (InBoth dst mem)
+ | src == dst = [DigraphNode vreg loc [InMem mem]]
+ | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]]
+
+expandNode vreg loc@(InMem src) (InBoth dst mem)
+ | src == mem = [DigraphNode vreg loc [InReg dst]]
+ | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]]
+
+expandNode _ (InBoth _ src) (InMem dst)
+ | src == dst = [] -- guaranteed to be true
+
+expandNode _ (InBoth src _) (InReg dst)
+ | src == dst = []
+
+expandNode vreg (InBoth src _) dst
+ = expandNode vreg (InReg src) dst
+
+expandNode vreg src dst
+ | src == dst = []
+ | otherwise = [DigraphNode vreg src [dst]]
+
+
+-- | Generate fixup code for a particular component in the move graph
+-- This component tells us what values need to be moved to what
+-- destinations. We have eliminated any possibility of single-node
+-- cycles in expandNode above.
+--
+handleComponent
+ :: Instruction instr
+ => Int -> instr -> SCC (Node Loc Unique)
+ -> RegM freeRegs [instr]
+
+-- If the graph is acyclic then we won't get the swapping problem below.
+-- In this case we can just do the moves directly, and avoid having to
+-- go via a spill slot.
+--
+handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts))
+ = mapM (makeMove delta vreg src) dsts
+
+
+-- Handle some cyclic moves.
+-- This can happen if we have two regs that need to be swapped.
+-- eg:
+-- vreg source loc dest loc
+-- (vreg1, InReg r1, [InReg r2])
+-- (vreg2, InReg r2, [InReg r1])
+--
+-- To avoid needing temp register, we just spill all the source regs, then
+-- reaload them into their destination regs.
+--
+-- Note that we can not have cycles that involve memory locations as
+-- sources as single destination because memory locations (stack slots)
+-- are allocated exclusively for a virtual register and therefore can not
+-- require a fixup.
+--
+handleComponent delta instr
+ (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
+ -- dest list may have more than one element, if the reg is also InMem.
+ = do
+ -- spill the source into its slot
+ (instrSpill, slot)
+ <- spillR (RegReal sreg) vreg
+
+ -- reload into destination reg
+ instrLoad <- loadR (RegReal dreg) slot
+
+ remainingFixUps <- mapM (handleComponent delta instr)
+ (stronglyConnCompFromEdgedVerticesOrdR rest)
+
+ -- make sure to do all the reloads after all the spills,
+ -- so we don't end up clobbering the source values.
+ return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad])
+
+handleComponent _ _ (CyclicSCC _)
+ = panic "Register Allocator: handleComponent cyclic"
+
+
+-- | Move a vreg between these two locations.
+--
+makeMove
+ :: Instruction instr
+ => Int -- ^ current C stack delta.
+ -> Unique -- ^ unique of the vreg that we're moving.
+ -> Loc -- ^ source location.
+ -> Loc -- ^ destination location.
+ -> RegM freeRegs instr -- ^ move instruction.
+
+makeMove delta vreg src dst
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+
+ case (src, dst) of
+ (InReg s, InReg d) ->
+ do recordSpill (SpillJoinRR vreg)
+ return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d)
+ (InMem s, InReg d) ->
+ do recordSpill (SpillJoinRM vreg)
+ return $ mkLoadInstr dflags (RegReal d) delta s
+ (InReg s, InMem d) ->
+ do recordSpill (SpillJoinRM vreg)
+ return $ mkSpillInstr dflags (RegReal s) delta d
+ _ ->
+ -- we don't handle memory to memory moves.
+ -- they shouldn't happen because we don't share
+ -- stack slots between vregs.
+ panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") ("
+ ++ show dst ++ ")"
+ ++ " we don't handle mem->mem moves.")
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
new file mode 100644
index 0000000000..ce0a187647
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
@@ -0,0 +1,60 @@
+-- | Free regs map for PowerPC
+module GHC.CmmToAsm.Reg.Linear.PPC where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.PPC.Regs
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import Outputable
+import GHC.Platform
+
+import Data.Word
+import Data.Bits
+
+-- The PowerPC has 32 integer and 32 floating point registers.
+-- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
+-- better.
+-- Note that when getFreeRegs scans for free registers, it starts at register
+-- 31 and counts down. This is a hack for the PowerPC - the higher-numbered
+-- registers are callee-saves, while the lower regs are caller-saves, so it
+-- makes sense to start at the high end.
+-- Apart from that, the code does nothing PowerPC-specific, so feel free to
+-- add your favourite platform to the #if (if you have 64 registers but only
+-- 32-bit words).
+
+data FreeRegs = FreeRegs !Word32 !Word32
+ deriving( Show ) -- The Show is used in an ASSERT
+
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0 0
+
+releaseReg :: RealReg -> FreeRegs -> FreeRegs
+releaseReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 = FreeRegs g (f .|. (1 `shiftL` (r - 32)))
+ | otherwise = FreeRegs (g .|. (1 `shiftL` r)) f
+
+releaseReg _ _
+ = panic "RegAlloc.Linear.PPC.releaseReg: bad reg"
+
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
+
+getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily
+getFreeRegs cls (FreeRegs g f)
+ | RcDouble <- cls = go f (0x80000000) 63
+ | RcInteger <- cls = go g (0x80000000) 31
+ | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls)
+ where
+ go _ 0 _ = []
+ go x m i | x .&. m /= 0 = RealRegSingle i : (go x (m `shiftR` 1) $! i-1)
+ | otherwise = go x (m `shiftR` 1) $! i-1
+
+allocateReg :: RealReg -> FreeRegs -> FreeRegs
+allocateReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (r - 32)))
+ | otherwise = FreeRegs (g .&. complement (1 `shiftL` r)) f
+
+allocateReg _ _
+ = panic "RegAlloc.Linear.PPC.allocateReg: bad reg"
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
new file mode 100644
index 0000000000..7fa85f0913
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
@@ -0,0 +1,188 @@
+{-# LANGUAGE CPP #-}
+
+-- | Free regs map for SPARC
+module GHC.CmmToAsm.Reg.Linear.SPARC where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.SPARC.Regs
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import GHC.Platform.Regs
+import Outputable
+import GHC.Platform
+
+import Data.Word
+import Data.Bits
+
+
+--------------------------------------------------------------------------------
+-- SPARC is like PPC, except for twinning of floating point regs.
+-- When we allocate a double reg we must take an even numbered
+-- float reg, as well as the one after it.
+
+
+-- Holds bitmaps showing what registers are currently allocated.
+-- The float and double reg bitmaps overlap, but we only alloc
+-- float regs into the float map, and double regs into the double map.
+--
+-- Free regs have a bit set in the corresponding bitmap.
+--
+data FreeRegs
+ = FreeRegs
+ !Word32 -- int reg bitmap regs 0..31
+ !Word32 -- float reg bitmap regs 32..63
+ !Word32 -- double reg bitmap regs 32..63
+
+instance Show FreeRegs where
+ show = showFreeRegs
+
+-- | A reg map where no regs are free to be allocated.
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0 0 0
+
+
+-- | The initial set of free regs.
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform
+ = foldl' (flip $ releaseReg platform) noFreeRegs allocatableRegs
+
+
+-- | Get all the free registers of this class.
+getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily
+getFreeRegs cls (FreeRegs g f d)
+ | RcInteger <- cls = map RealRegSingle $ go 1 g 1 0
+ | RcFloat <- cls = map RealRegSingle $ go 1 f 1 32
+ | RcDouble <- cls = map (\i -> RealRegPair i (i+1)) $ go 2 d 1 32
+#if __GLASGOW_HASKELL__ <= 810
+ | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls)
+#endif
+ where
+ go _ _ 0 _
+ = []
+
+ go step bitmap mask ix
+ | bitmap .&. mask /= 0
+ = ix : (go step bitmap (mask `shiftL` step) $! ix + step)
+
+ | otherwise
+ = go step bitmap (mask `shiftL` step) $! ix + step
+
+
+-- | Grab a register.
+allocateReg :: Platform -> RealReg -> FreeRegs -> FreeRegs
+allocateReg platform
+ reg@(RealRegSingle r)
+ (FreeRegs g f d)
+
+ -- can't allocate free regs
+ | not $ freeReg platform r
+ = pprPanic "SPARC.FreeRegs.allocateReg: not allocating pinned reg" (ppr reg)
+
+ -- a general purpose reg
+ | r <= 31
+ = let mask = complement (bitMask r)
+ in FreeRegs
+ (g .&. mask)
+ f
+ d
+
+ -- a float reg
+ | r >= 32, r <= 63
+ = let mask = complement (bitMask (r - 32))
+
+ -- the mask of the double this FP reg aliases
+ maskLow = if r `mod` 2 == 0
+ then complement (bitMask (r - 32))
+ else complement (bitMask (r - 32 - 1))
+ in FreeRegs
+ g
+ (f .&. mask)
+ (d .&. maskLow)
+
+ | otherwise
+ = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg)
+
+allocateReg _
+ reg@(RealRegPair r1 r2)
+ (FreeRegs g f d)
+
+ | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0
+ , r2 >= 32, r2 <= 63
+ = let mask1 = complement (bitMask (r1 - 32))
+ mask2 = complement (bitMask (r2 - 32))
+ in
+ FreeRegs
+ g
+ ((f .&. mask1) .&. mask2)
+ (d .&. mask1)
+
+ | otherwise
+ = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg)
+
+
+
+-- | Release a register from allocation.
+-- The register liveness information says that most regs die after a C call,
+-- but we still don't want to allocate to some of them.
+--
+releaseReg :: Platform -> RealReg -> FreeRegs -> FreeRegs
+releaseReg platform
+ reg@(RealRegSingle r)
+ regs@(FreeRegs g f d)
+
+ -- don't release pinned reg
+ | not $ freeReg platform r
+ = regs
+
+ -- a general purpose reg
+ | r <= 31
+ = let mask = bitMask r
+ in FreeRegs (g .|. mask) f d
+
+ -- a float reg
+ | r >= 32, r <= 63
+ = let mask = bitMask (r - 32)
+
+ -- the mask of the double this FP reg aliases
+ maskLow = if r `mod` 2 == 0
+ then bitMask (r - 32)
+ else bitMask (r - 32 - 1)
+ in FreeRegs
+ g
+ (f .|. mask)
+ (d .|. maskLow)
+
+ | otherwise
+ = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg)
+
+releaseReg _
+ reg@(RealRegPair r1 r2)
+ (FreeRegs g f d)
+
+ | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0
+ , r2 >= 32, r2 <= 63
+ = let mask1 = bitMask (r1 - 32)
+ mask2 = bitMask (r2 - 32)
+ in
+ FreeRegs
+ g
+ ((f .|. mask1) .|. mask2)
+ (d .|. mask1)
+
+ | otherwise
+ = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg)
+
+
+
+bitMask :: Int -> Word32
+bitMask n = 1 `shiftL` n
+
+
+showFreeRegs :: FreeRegs -> String
+showFreeRegs regs
+ = "FreeRegs\n"
+ ++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n"
+ ++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n"
+ ++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n"
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs b/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
new file mode 100644
index 0000000000..630b101fc7
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
@@ -0,0 +1,61 @@
+
+-- | The assignment of virtual registers to stack slots
+
+-- We have lots of stack slots. Memory-to-memory moves are a pain on most
+-- architectures. Therefore, we avoid having to generate memory-to-memory moves
+-- by simply giving every virtual register its own stack slot.
+
+-- The StackMap stack map keeps track of virtual register - stack slot
+-- associations and of which stack slots are still free. Once it has been
+-- associated, a stack slot is never "freed" or removed from the StackMap again,
+-- it remains associated until we are done with the current CmmProc.
+--
+module GHC.CmmToAsm.Reg.Linear.StackMap (
+ StackSlot,
+ StackMap(..),
+ emptyStackMap,
+ getStackSlotFor,
+ getStackUse
+)
+
+where
+
+import GhcPrelude
+
+import GHC.Driver.Session
+import UniqFM
+import Unique
+
+
+-- | Identifier for a stack slot.
+type StackSlot = Int
+
+data StackMap
+ = StackMap
+ { -- | The slots that are still available to be allocated.
+ stackMapNextFreeSlot :: !Int
+
+ -- | Assignment of vregs to stack slots.
+ , stackMapAssignment :: UniqFM StackSlot }
+
+
+-- | An empty stack map, with all slots available.
+emptyStackMap :: DynFlags -> StackMap
+emptyStackMap _ = StackMap 0 emptyUFM
+
+
+-- | If this vreg unique already has a stack assignment then return the slot number,
+-- otherwise allocate a new slot, and update the map.
+--
+getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
+
+getStackSlotFor fs@(StackMap _ reserved) reg
+ | Just slot <- lookupUFM reserved reg = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) reg =
+ (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+
+-- | Return the number of stack slots that were allocated
+getStackUse :: StackMap -> Int
+getStackUse (StackMap freeSlot _) = freeSlot
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
new file mode 100644
index 0000000000..a167cc7e00
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-}
+
+#if !defined(GHC_LOADED_INTO_GHCI)
+{-# LANGUAGE UnboxedTuples #-}
+#endif
+
+-- | State monad for the linear register allocator.
+
+-- Here we keep all the state that the register allocator keeps track
+-- of as it walks the instructions in a basic block.
+
+module GHC.CmmToAsm.Reg.Linear.State (
+ RA_State(..),
+ RegM,
+ runR,
+
+ spillR,
+ loadR,
+
+ getFreeRegsR,
+ setFreeRegsR,
+
+ getAssigR,
+ setAssigR,
+
+ getBlockAssigR,
+ setBlockAssigR,
+
+ setDeltaR,
+ getDeltaR,
+
+ getUniqueR,
+
+ recordSpill,
+ recordFixupBlock
+)
+where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Linear.Stats
+import GHC.CmmToAsm.Reg.Linear.StackMap
+import GHC.CmmToAsm.Reg.Linear.Base
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+import GHC.Platform.Reg
+import GHC.Cmm.BlockId
+
+import GHC.Driver.Session
+import Unique
+import UniqSupply
+
+import Control.Monad (ap)
+
+-- Avoids using unboxed tuples when loading into GHCi
+#if !defined(GHC_LOADED_INTO_GHCI)
+
+type RA_Result freeRegs a = (# RA_State freeRegs, a #)
+
+pattern RA_Result :: a -> b -> (# a, b #)
+pattern RA_Result a b = (# a, b #)
+{-# COMPLETE RA_Result #-}
+#else
+
+data RA_Result freeRegs a = RA_Result {-# UNPACK #-} !(RA_State freeRegs) !a
+ deriving (Functor)
+
+#endif
+
+-- | The register allocator monad type.
+newtype RegM freeRegs a
+ = RegM { unReg :: RA_State freeRegs -> RA_Result freeRegs a }
+ deriving (Functor)
+
+instance Applicative (RegM freeRegs) where
+ pure a = RegM $ \s -> RA_Result s a
+ (<*>) = ap
+
+instance Monad (RegM freeRegs) where
+ m >>= k = RegM $ \s -> case unReg m s of { RA_Result s a -> unReg (k a) s }
+
+instance HasDynFlags (RegM a) where
+ getDynFlags = RegM $ \s -> RA_Result s (ra_DynFlags s)
+
+
+-- | Run a computation in the RegM register allocator monad.
+runR :: DynFlags
+ -> BlockAssignment freeRegs
+ -> freeRegs
+ -> RegMap Loc
+ -> StackMap
+ -> UniqSupply
+ -> RegM freeRegs a
+ -> (BlockAssignment freeRegs, StackMap, RegAllocStats, a)
+
+runR dflags block_assig freeregs assig stack us thing =
+ case unReg thing
+ (RA_State
+ { ra_blockassig = block_assig
+ , ra_freeregs = freeregs
+ , ra_assig = assig
+ , ra_delta = 0{-???-}
+ , ra_stack = stack
+ , ra_us = us
+ , ra_spills = []
+ , ra_DynFlags = dflags
+ , ra_fixups = [] })
+ of
+ RA_Result state returned_thing
+ -> (ra_blockassig state, ra_stack state, makeRAStats state, returned_thing)
+
+
+-- | Make register allocator stats from its final state.
+makeRAStats :: RA_State freeRegs -> RegAllocStats
+makeRAStats state
+ = RegAllocStats
+ { ra_spillInstrs = binSpillReasons (ra_spills state)
+ , ra_fixupList = ra_fixups state }
+
+
+spillR :: Instruction instr
+ => Reg -> Unique -> RegM freeRegs (instr, Int)
+
+spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack0} ->
+ let dflags = ra_DynFlags s
+ (stack1,slot) = getStackSlotFor stack0 temp
+ instr = mkSpillInstr dflags reg delta slot
+ in
+ RA_Result s{ra_stack=stack1} (instr,slot)
+
+
+loadR :: Instruction instr
+ => Reg -> Int -> RegM freeRegs instr
+
+loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
+ let dflags = ra_DynFlags s
+ in RA_Result s (mkLoadInstr dflags reg delta slot)
+
+getFreeRegsR :: RegM freeRegs freeRegs
+getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} ->
+ RA_Result s freeregs
+
+setFreeRegsR :: freeRegs -> RegM freeRegs ()
+setFreeRegsR regs = RegM $ \ s ->
+ RA_Result s{ra_freeregs = regs} ()
+
+getAssigR :: RegM freeRegs (RegMap Loc)
+getAssigR = RegM $ \ s@RA_State{ra_assig = assig} ->
+ RA_Result s assig
+
+setAssigR :: RegMap Loc -> RegM freeRegs ()
+setAssigR assig = RegM $ \ s ->
+ RA_Result s{ra_assig=assig} ()
+
+getBlockAssigR :: RegM freeRegs (BlockAssignment freeRegs)
+getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} ->
+ RA_Result s assig
+
+setBlockAssigR :: BlockAssignment freeRegs -> RegM freeRegs ()
+setBlockAssigR assig = RegM $ \ s ->
+ RA_Result s{ra_blockassig = assig} ()
+
+setDeltaR :: Int -> RegM freeRegs ()
+setDeltaR n = RegM $ \ s ->
+ RA_Result s{ra_delta = n} ()
+
+getDeltaR :: RegM freeRegs Int
+getDeltaR = RegM $ \s -> RA_Result s (ra_delta s)
+
+getUniqueR :: RegM freeRegs Unique
+getUniqueR = RegM $ \s ->
+ case takeUniqFromSupply (ra_us s) of
+ (uniq, us) -> RA_Result s{ra_us = us} uniq
+
+
+-- | Record that a spill instruction was inserted, for profiling.
+recordSpill :: SpillReason -> RegM freeRegs ()
+recordSpill spill
+ = RegM $ \s -> RA_Result (s { ra_spills = spill : ra_spills s }) ()
+
+-- | Record a created fixup block
+recordFixupBlock :: BlockId -> BlockId -> BlockId -> RegM freeRegs ()
+recordFixupBlock from between to
+ = RegM $ \s -> RA_Result (s { ra_fixups = (from,between,to) : ra_fixups s }) ()
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs
new file mode 100644
index 0000000000..1176b220a3
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs
@@ -0,0 +1,87 @@
+module GHC.CmmToAsm.Reg.Linear.Stats (
+ binSpillReasons,
+ countRegRegMovesNat,
+ pprStats
+)
+
+where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.Reg.Linear.Base
+import GHC.CmmToAsm.Reg.Liveness
+import GHC.CmmToAsm.Instr
+
+import UniqFM
+import Outputable
+
+import State
+
+-- | Build a map of how many times each reg was alloced, clobbered, loaded etc.
+binSpillReasons
+ :: [SpillReason] -> UniqFM [Int]
+
+binSpillReasons reasons
+ = addListToUFM_C
+ (zipWith (+))
+ emptyUFM
+ (map (\reason -> case reason of
+ SpillAlloc r -> (r, [1, 0, 0, 0, 0])
+ SpillClobber r -> (r, [0, 1, 0, 0, 0])
+ SpillLoad r -> (r, [0, 0, 1, 0, 0])
+ SpillJoinRR r -> (r, [0, 0, 0, 1, 0])
+ SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons)
+
+
+-- | Count reg-reg moves remaining in this code.
+countRegRegMovesNat
+ :: Instruction instr
+ => NatCmmDecl statics instr -> Int
+
+countRegRegMovesNat cmm
+ = execState (mapGenBlockTopM countBlock cmm) 0
+ where
+ countBlock b@(BasicBlock _ instrs)
+ = do mapM_ countInstr instrs
+ return b
+
+ countInstr instr
+ | Just _ <- takeRegRegMoveInstr instr
+ = do modify (+ 1)
+ return instr
+
+ | otherwise
+ = return instr
+
+
+-- | Pretty print some RegAllocStats
+pprStats
+ :: Instruction instr
+ => [NatCmmDecl statics instr] -> [RegAllocStats] -> SDoc
+
+pprStats code statss
+ = let -- sum up all the instrs inserted by the spiller
+ spills = foldl' (plusUFM_C (zipWith (+)))
+ emptyUFM
+ $ map ra_spillInstrs statss
+
+ spillTotals = foldl' (zipWith (+))
+ [0, 0, 0, 0, 0]
+ $ nonDetEltsUFM spills
+ -- See Note [Unique Determinism and code generation]
+
+ -- count how many reg-reg-moves remain in the code
+ moves = sum $ map countRegRegMovesNat code
+
+ pprSpill (reg, spills)
+ = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills))
+
+ in ( text "-- spills-added-total"
+ $$ text "-- (allocs, clobbers, loads, joinRR, joinRM, reg_reg_moves_remaining)"
+ $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals ++ [ppr moves])))
+ $$ text ""
+ $$ text "-- spills-added"
+ $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)"
+ $$ (pprUFMWithKeys spills (vcat . map pprSpill))
+ $$ text "")
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
new file mode 100644
index 0000000000..ce103bd6b2
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
@@ -0,0 +1,52 @@
+
+-- | Free regs map for i386
+module GHC.CmmToAsm.Reg.Linear.X86 where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.X86.Regs
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+import Panic
+import GHC.Platform
+
+import Data.Word
+import Data.Bits
+
+newtype FreeRegs = FreeRegs Word32
+ deriving Show
+
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0
+
+releaseReg :: RealReg -> FreeRegs -> FreeRegs
+releaseReg (RealRegSingle n) (FreeRegs f)
+ = FreeRegs (f .|. (1 `shiftL` n))
+
+releaseReg _ _
+ = panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg"
+
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform
+ = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
+
+getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
+getFreeRegs platform cls (FreeRegs f) = go f 0
+
+ where go 0 _ = []
+ go n m
+ | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+ = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
+
+ | otherwise
+ = go (n `shiftR` 1) $! (m+1)
+ -- ToDo: there's no point looking through all the integer registers
+ -- in order to find a floating-point one.
+
+allocateReg :: RealReg -> FreeRegs -> FreeRegs
+allocateReg (RealRegSingle r) (FreeRegs f)
+ = FreeRegs (f .&. complement (1 `shiftL` r))
+
+allocateReg _ _
+ = panic "RegAlloc.Linear.X86.FreeRegs.allocateReg: no reg"
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
new file mode 100644
index 0000000000..322ddd6bdd
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
@@ -0,0 +1,53 @@
+
+-- | Free regs map for x86_64
+module GHC.CmmToAsm.Reg.Linear.X86_64 where
+
+import GhcPrelude
+
+import GHC.CmmToAsm.X86.Regs
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+import Panic
+import GHC.Platform
+
+import Data.Word
+import Data.Bits
+
+newtype FreeRegs = FreeRegs Word64
+ deriving Show
+
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0
+
+releaseReg :: RealReg -> FreeRegs -> FreeRegs
+releaseReg (RealRegSingle n) (FreeRegs f)
+ = FreeRegs (f .|. (1 `shiftL` n))
+
+releaseReg _ _
+ = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg"
+
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform
+ = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
+
+getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
+getFreeRegs platform cls (FreeRegs f) = go f 0
+
+ where go 0 _ = []
+ go n m
+ | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
+ = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
+
+ | otherwise
+ = go (n `shiftR` 1) $! (m+1)
+ -- ToDo: there's no point looking through all the integer registers
+ -- in order to find a floating-point one.
+
+allocateReg :: RealReg -> FreeRegs -> FreeRegs
+allocateReg (RealRegSingle r) (FreeRegs f)
+ = FreeRegs (f .&. complement (1 `shiftL` r))
+
+allocateReg _ _
+ = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg"
+
+
diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs
new file mode 100644
index 0000000000..03b8123f93
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs
@@ -0,0 +1,1025 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+
+{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+
+-----------------------------------------------------------------------------
+--
+-- The register liveness determinator
+--
+-- (c) The University of Glasgow 2004-2013
+--
+-----------------------------------------------------------------------------
+
+module GHC.CmmToAsm.Reg.Liveness (
+ RegSet,
+ RegMap, emptyRegMap,
+ BlockMap, mapEmpty,
+ LiveCmmDecl,
+ InstrSR (..),
+ LiveInstr (..),
+ Liveness (..),
+ LiveInfo (..),
+ LiveBasicBlock,
+
+ mapBlockTop, mapBlockTopM, mapSCCM,
+ mapGenBlockTop, mapGenBlockTopM,
+ stripLive,
+ stripLiveBlock,
+ slurpConflicts,
+ slurpReloadCoalesce,
+ eraseDeltasLive,
+ patchEraseLive,
+ patchRegsLiveInstr,
+ reverseBlocksInTops,
+ regLiveness,
+ cmmTopLiveness
+ ) where
+import GhcPrelude
+
+import GHC.Platform.Reg
+import GHC.CmmToAsm.Instr
+
+import GHC.Cmm.BlockId
+import GHC.CmmToAsm.CFG
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm hiding (RegSet, emptyRegSet)
+
+import Digraph
+import GHC.Driver.Session
+import MonadUtils
+import Outputable
+import GHC.Platform
+import UniqSet
+import UniqFM
+import UniqSupply
+import Bag
+import State
+
+import Data.List
+import Data.Maybe
+import Data.IntSet (IntSet)
+
+-----------------------------------------------------------------------------
+type RegSet = UniqSet Reg
+
+type RegMap a = UniqFM a
+
+emptyRegMap :: UniqFM a
+emptyRegMap = emptyUFM
+
+emptyRegSet :: RegSet
+emptyRegSet = emptyUniqSet
+
+type BlockMap a = LabelMap a
+
+
+-- | A top level thing which carries liveness information.
+type LiveCmmDecl statics instr
+ = GenCmmDecl
+ statics
+ LiveInfo
+ [SCC (LiveBasicBlock instr)]
+
+
+-- | The register allocator also wants to use SPILL/RELOAD meta instructions,
+-- so we'll keep those here.
+data InstrSR instr
+ -- | A real machine instruction
+ = Instr instr
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
+instance Instruction instr => Instruction (InstrSR instr) where
+ regUsageOfInstr platform i
+ = case i of
+ Instr instr -> regUsageOfInstr platform instr
+ SPILL reg _ -> RU [reg] []
+ RELOAD _ reg -> RU [] [reg]
+
+ patchRegsOfInstr i f
+ = case i of
+ Instr instr -> Instr (patchRegsOfInstr instr f)
+ SPILL reg slot -> SPILL (f reg) slot
+ RELOAD slot reg -> RELOAD slot (f reg)
+
+ isJumpishInstr i
+ = case i of
+ Instr instr -> isJumpishInstr instr
+ _ -> False
+
+ jumpDestsOfInstr i
+ = case i of
+ Instr instr -> jumpDestsOfInstr instr
+ _ -> []
+
+ patchJumpInstr i f
+ = case i of
+ Instr instr -> Instr (patchJumpInstr instr f)
+ _ -> i
+
+ mkSpillInstr = error "mkSpillInstr[InstrSR]: Not making SPILL meta-instr"
+ mkLoadInstr = error "mkLoadInstr[InstrSR]: Not making LOAD meta-instr"
+
+ takeDeltaInstr i
+ = case i of
+ Instr instr -> takeDeltaInstr instr
+ _ -> Nothing
+
+ isMetaInstr i
+ = case i of
+ Instr instr -> isMetaInstr instr
+ _ -> False
+
+ mkRegRegMoveInstr platform r1 r2
+ = Instr (mkRegRegMoveInstr platform r1 r2)
+
+ takeRegRegMoveInstr i
+ = case i of
+ Instr instr -> takeRegRegMoveInstr instr
+ _ -> Nothing
+
+ mkJumpInstr target = map Instr (mkJumpInstr target)
+
+ mkStackAllocInstr platform amount =
+ Instr <$> mkStackAllocInstr platform amount
+
+ mkStackDeallocInstr platform amount =
+ Instr <$> mkStackDeallocInstr platform amount
+
+
+-- | An instruction with liveness information.
+data LiveInstr instr
+ = LiveInstr (InstrSR instr) (Maybe Liveness)
+
+-- | Liveness information.
+-- The regs which die are ones which are no longer live in the *next* instruction
+-- in this sequence.
+-- (NB. if the instruction is a jump, these registers might still be live
+-- at the jump target(s) - you have to check the liveness at the destination
+-- block to find out).
+
+data Liveness
+ = Liveness
+ { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time).
+ , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time.
+ , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something.
+
+
+-- | Stash regs live on entry to each basic block in the info part of the cmm code.
+data LiveInfo
+ = LiveInfo
+ (LabelMap RawCmmStatics) -- cmm info table static stuff
+ [BlockId] -- entry points (first one is the
+ -- entry point for the proc).
+ (BlockMap RegSet) -- argument locals live on entry to this block
+ (BlockMap IntSet) -- stack slots live on entry to this block
+
+
+-- | A basic block with liveness information.
+type LiveBasicBlock instr
+ = GenBasicBlock (LiveInstr instr)
+
+
+instance Outputable instr
+ => Outputable (InstrSR instr) where
+
+ ppr (Instr realInstr)
+ = ppr realInstr
+
+ ppr (SPILL reg slot)
+ = hcat [
+ text "\tSPILL",
+ char ' ',
+ ppr reg,
+ comma,
+ text "SLOT" <> parens (int slot)]
+
+ ppr (RELOAD slot reg)
+ = hcat [
+ text "\tRELOAD",
+ char ' ',
+ text "SLOT" <> parens (int slot),
+ comma,
+ ppr reg]
+
+instance Outputable instr
+ => Outputable (LiveInstr instr) where
+
+ ppr (LiveInstr instr Nothing)
+ = ppr instr
+
+ ppr (LiveInstr instr (Just live))
+ = ppr instr
+ $$ (nest 8
+ $ vcat
+ [ pprRegs (text "# born: ") (liveBorn live)
+ , pprRegs (text "# r_dying: ") (liveDieRead live)
+ , pprRegs (text "# w_dying: ") (liveDieWrite live) ]
+ $+$ space)
+
+ where pprRegs :: SDoc -> RegSet -> SDoc
+ pprRegs name regs
+ | isEmptyUniqSet regs = empty
+ | otherwise = name <>
+ (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr))
+
+instance Outputable LiveInfo where
+ ppr (LiveInfo mb_static entryIds liveVRegsOnEntry liveSlotsOnEntry)
+ = (ppr mb_static)
+ $$ text "# entryIds = " <> ppr entryIds
+ $$ text "# liveVRegsOnEntry = " <> ppr liveVRegsOnEntry
+ $$ text "# liveSlotsOnEntry = " <> text (show liveSlotsOnEntry)
+
+
+
+-- | map a function across all the basic blocks in this code
+--
+mapBlockTop
+ :: (LiveBasicBlock instr -> LiveBasicBlock instr)
+ -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
+
+mapBlockTop f cmm
+ = evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
+
+
+-- | map a function across all the basic blocks in this code (monadic version)
+--
+mapBlockTopM
+ :: Monad m
+ => (LiveBasicBlock instr -> m (LiveBasicBlock instr))
+ -> LiveCmmDecl statics instr -> m (LiveCmmDecl statics instr)
+
+mapBlockTopM _ cmm@(CmmData{})
+ = return cmm
+
+mapBlockTopM f (CmmProc header label live sccs)
+ = do sccs' <- mapM (mapSCCM f) sccs
+ return $ CmmProc header label live sccs'
+
+mapSCCM :: Monad m => (a -> m b) -> SCC a -> m (SCC b)
+mapSCCM f (AcyclicSCC x)
+ = do x' <- f x
+ return $ AcyclicSCC x'
+
+mapSCCM f (CyclicSCC xs)
+ = do xs' <- mapM f xs
+ return $ CyclicSCC xs'
+
+
+-- map a function across all the basic blocks in this code
+mapGenBlockTop
+ :: (GenBasicBlock i -> GenBasicBlock i)
+ -> (GenCmmDecl d h (ListGraph i) -> GenCmmDecl d h (ListGraph i))
+
+mapGenBlockTop f cmm
+ = evalState (mapGenBlockTopM (\x -> return $ f x) cmm) ()
+
+
+-- | map a function across all the basic blocks in this code (monadic version)
+mapGenBlockTopM
+ :: Monad m
+ => (GenBasicBlock i -> m (GenBasicBlock i))
+ -> (GenCmmDecl d h (ListGraph i) -> m (GenCmmDecl d h (ListGraph i)))
+
+mapGenBlockTopM _ cmm@(CmmData{})
+ = return cmm
+
+mapGenBlockTopM f (CmmProc header label live (ListGraph blocks))
+ = do blocks' <- mapM f blocks
+ return $ CmmProc header label live (ListGraph blocks')
+
+
+-- | Slurp out the list of register conflicts and reg-reg moves from this top level thing.
+-- Slurping of conflicts and moves is wrapped up together so we don't have
+-- to make two passes over the same code when we want to build the graph.
+--
+slurpConflicts
+ :: Instruction instr
+ => LiveCmmDecl statics instr
+ -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+
+slurpConflicts live
+ = slurpCmm (emptyBag, emptyBag) live
+
+ where slurpCmm rs CmmData{} = rs
+ slurpCmm rs (CmmProc info _ _ sccs)
+ = foldl' (slurpSCC info) rs sccs
+
+ slurpSCC info rs (AcyclicSCC b)
+ = slurpBlock info rs b
+
+ slurpSCC info rs (CyclicSCC bs)
+ = foldl' (slurpBlock info) rs bs
+
+ slurpBlock info rs (BasicBlock blockId instrs)
+ | LiveInfo _ _ blockLive _ <- info
+ , Just rsLiveEntry <- mapLookup blockId blockLive
+ , (conflicts, moves) <- slurpLIs rsLiveEntry rs instrs
+ = (consBag rsLiveEntry conflicts, moves)
+
+ | otherwise
+ = panic "Liveness.slurpConflicts: bad block"
+
+ slurpLIs rsLive (conflicts, moves) []
+ = (consBag rsLive conflicts, moves)
+
+ slurpLIs rsLive rs (LiveInstr _ Nothing : lis)
+ = slurpLIs rsLive rs lis
+
+ slurpLIs rsLiveEntry (conflicts, moves) (LiveInstr instr (Just live) : lis)
+ = let
+ -- regs that die because they are read for the last time at the start of an instruction
+ -- are not live across it.
+ rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live)
+
+ -- regs live on entry to the next instruction.
+ -- be careful of orphans, make sure to delete dying regs _after_ unioning
+ -- in the ones that are born here.
+ rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live))
+ `minusUniqSet` (liveDieWrite live)
+
+ -- orphan vregs are the ones that die in the same instruction they are born in.
+ -- these are likely to be results that are never used, but we still
+ -- need to assign a hreg to them..
+ rsOrphans = intersectUniqSets
+ (liveBorn live)
+ (unionUniqSets (liveDieWrite live) (liveDieRead live))
+
+ --
+ rsConflicts = unionUniqSets rsLiveNext rsOrphans
+
+ in case takeRegRegMoveInstr instr of
+ Just rr -> slurpLIs rsLiveNext
+ ( consBag rsConflicts conflicts
+ , consBag rr moves) lis
+
+ Nothing -> slurpLIs rsLiveNext
+ ( consBag rsConflicts conflicts
+ , moves) lis
+
+
+-- | For spill\/reloads
+--
+-- SPILL v1, slot1
+-- ...
+-- RELOAD slot1, v2
+--
+-- If we can arrange that v1 and v2 are allocated to the same hreg it's more likely
+-- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
+--
+--
+slurpReloadCoalesce
+ :: forall statics instr. Instruction instr
+ => LiveCmmDecl statics instr
+ -> Bag (Reg, Reg)
+
+slurpReloadCoalesce live
+ = slurpCmm emptyBag live
+
+ where
+ slurpCmm :: Bag (Reg, Reg)
+ -> GenCmmDecl t t1 [SCC (LiveBasicBlock instr)]
+ -> Bag (Reg, Reg)
+ slurpCmm cs CmmData{} = cs
+ slurpCmm cs (CmmProc _ _ _ sccs)
+ = slurpComp cs (flattenSCCs sccs)
+
+ slurpComp :: Bag (Reg, Reg)
+ -> [LiveBasicBlock instr]
+ -> Bag (Reg, Reg)
+ slurpComp cs blocks
+ = let (moveBags, _) = runState (slurpCompM blocks) emptyUFM
+ in unionManyBags (cs : moveBags)
+
+ slurpCompM :: [LiveBasicBlock instr]
+ -> State (UniqFM [UniqFM Reg]) [Bag (Reg, Reg)]
+ slurpCompM blocks
+ = do -- run the analysis once to record the mapping across jumps.
+ mapM_ (slurpBlock False) blocks
+
+ -- run it a second time while using the information from the last pass.
+ -- We /could/ run this many more times to deal with graphical control
+ -- flow and propagating info across multiple jumps, but it's probably
+ -- not worth the trouble.
+ mapM (slurpBlock True) blocks
+
+ slurpBlock :: Bool -> LiveBasicBlock instr
+ -> State (UniqFM [UniqFM Reg]) (Bag (Reg, Reg))
+ slurpBlock propagate (BasicBlock blockId instrs)
+ = do -- grab the slot map for entry to this block
+ slotMap <- if propagate
+ then getSlotMap blockId
+ else return emptyUFM
+
+ (_, mMoves) <- mapAccumLM slurpLI slotMap instrs
+ return $ listToBag $ catMaybes mMoves
+
+ slurpLI :: UniqFM Reg -- current slotMap
+ -> LiveInstr instr
+ -> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg]
+ -- for tracking slotMaps across jumps
+
+ ( UniqFM Reg -- new slotMap
+ , Maybe (Reg, Reg)) -- maybe a new coalesce edge
+
+ slurpLI slotMap li
+
+ -- remember what reg was stored into the slot
+ | LiveInstr (SPILL reg slot) _ <- li
+ , slotMap' <- addToUFM slotMap slot reg
+ = return (slotMap', Nothing)
+
+ -- add an edge between the this reg and the last one stored into the slot
+ | LiveInstr (RELOAD slot reg) _ <- li
+ = case lookupUFM slotMap slot of
+ Just reg2
+ | reg /= reg2 -> return (slotMap, Just (reg, reg2))
+ | otherwise -> return (slotMap, Nothing)
+
+ Nothing -> return (slotMap, Nothing)
+
+ -- if we hit a jump, remember the current slotMap
+ | LiveInstr (Instr instr) _ <- li
+ , targets <- jumpDestsOfInstr instr
+ , not $ null targets
+ = do mapM_ (accSlotMap slotMap) targets
+ return (slotMap, Nothing)
+
+ | otherwise
+ = return (slotMap, Nothing)
+
+ -- record a slotmap for an in edge to this block
+ accSlotMap slotMap blockId
+ = modify (\s -> addToUFM_C (++) s blockId [slotMap])
+
+ -- work out the slot map on entry to this block
+ -- if we have slot maps for multiple in-edges then we need to merge them.
+ getSlotMap blockId
+ = do map <- get
+ let slotMaps = fromMaybe [] (lookupUFM map blockId)
+ return $ foldr mergeSlotMaps emptyUFM slotMaps
+
+ mergeSlotMaps :: UniqFM Reg -> UniqFM Reg -> UniqFM Reg
+ mergeSlotMaps map1 map2
+ = listToUFM
+ $ [ (k, r1)
+ | (k, r1) <- nonDetUFMToList map1
+ -- This is non-deterministic but we do not
+ -- currently support deterministic code-generation.
+ -- See Note [Unique Determinism and code generation]
+ , case lookupUFM map2 k of
+ Nothing -> False
+ Just r2 -> r1 == r2 ]
+
+
+-- | Strip away liveness information, yielding NatCmmDecl
+stripLive
+ :: (Outputable statics, Outputable instr, Instruction instr)
+ => DynFlags
+ -> LiveCmmDecl statics instr
+ -> NatCmmDecl statics instr
+
+stripLive dflags live
+ = stripCmm live
+
+ where stripCmm :: (Outputable statics, Outputable instr, Instruction instr)
+ => LiveCmmDecl statics instr -> NatCmmDecl statics instr
+ stripCmm (CmmData sec ds) = CmmData sec ds
+ stripCmm (CmmProc (LiveInfo info (first_id:_) _ _) label live sccs)
+ = let final_blocks = flattenSCCs sccs
+
+ -- make sure the block that was first in the input list
+ -- stays at the front of the output. This is the entry point
+ -- of the proc, and it needs to come first.
+ ((first':_), rest')
+ = partition ((== first_id) . blockId) final_blocks
+
+ in CmmProc info label live
+ (ListGraph $ map (stripLiveBlock dflags) $ first' : rest')
+
+ -- If the proc has blocks but we don't know what the first one was, then we're dead.
+ stripCmm proc
+ = pprPanic "RegAlloc.Liveness.stripLive: no first_id on proc" (ppr proc)
+
+-- | Strip away liveness information from a basic block,
+-- and make real spill instructions out of SPILL, RELOAD pseudos along the way.
+
+stripLiveBlock
+ :: Instruction instr
+ => DynFlags
+ -> LiveBasicBlock instr
+ -> NatBasicBlock instr
+
+stripLiveBlock dflags (BasicBlock i lis)
+ = BasicBlock i instrs'
+
+ where (instrs', _)
+ = runState (spillNat [] lis) 0
+
+ spillNat acc []
+ = return (reverse acc)
+
+ spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
+ = do delta <- get
+ spillNat (mkSpillInstr dflags reg delta slot : acc) instrs
+
+ spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
+ = do delta <- get
+ spillNat (mkLoadInstr dflags reg delta slot : acc) instrs
+
+ spillNat acc (LiveInstr (Instr instr) _ : instrs)
+ | Just i <- takeDeltaInstr instr
+ = do put i
+ spillNat acc instrs
+
+ spillNat acc (LiveInstr (Instr instr) _ : instrs)
+ = spillNat (instr : acc) instrs
+
+
+-- | Erase Delta instructions.
+
+eraseDeltasLive
+ :: Instruction instr
+ => LiveCmmDecl statics instr
+ -> LiveCmmDecl statics instr
+
+eraseDeltasLive cmm
+ = mapBlockTop eraseBlock cmm
+ where
+ eraseBlock (BasicBlock id lis)
+ = BasicBlock id
+ $ filter (\(LiveInstr i _) -> not $ isJust $ takeDeltaInstr i)
+ $ lis
+
+
+-- | Patch the registers in this code according to this register mapping.
+-- also erase reg -> reg moves when the reg is the same.
+-- also erase reg -> reg moves when the destination dies in this instr.
+patchEraseLive
+ :: Instruction instr
+ => (Reg -> Reg)
+ -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
+
+patchEraseLive patchF cmm
+ = patchCmm cmm
+ where
+ patchCmm cmm@CmmData{} = cmm
+
+ patchCmm (CmmProc info label live sccs)
+ | LiveInfo static id blockMap mLiveSlots <- info
+ = let
+ patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set
+ -- See Note [Unique Determinism and code generation]
+ blockMap' = mapMap (patchRegSet . getUniqSet) blockMap
+
+ info' = LiveInfo static id blockMap' mLiveSlots
+ in CmmProc info' label live $ map patchSCC sccs
+
+ patchSCC (AcyclicSCC b) = AcyclicSCC (patchBlock b)
+ patchSCC (CyclicSCC bs) = CyclicSCC (map patchBlock bs)
+
+ patchBlock (BasicBlock id lis)
+ = BasicBlock id $ patchInstrs lis
+
+ patchInstrs [] = []
+ patchInstrs (li : lis)
+
+ | LiveInstr i (Just live) <- li'
+ , Just (r1, r2) <- takeRegRegMoveInstr i
+ , eatMe r1 r2 live
+ = patchInstrs lis
+
+ | otherwise
+ = li' : patchInstrs lis
+
+ where li' = patchRegsLiveInstr patchF li
+
+ eatMe r1 r2 live
+ -- source and destination regs are the same
+ | r1 == r2 = True
+
+ -- destination reg is never used
+ | elementOfUniqSet r2 (liveBorn live)
+ , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live)
+ = True
+
+ | otherwise = False
+
+
+-- | Patch registers in this LiveInstr, including the liveness information.
+--
+patchRegsLiveInstr
+ :: Instruction instr
+ => (Reg -> Reg)
+ -> LiveInstr instr -> LiveInstr instr
+
+patchRegsLiveInstr patchF li
+ = case li of
+ LiveInstr instr Nothing
+ -> LiveInstr (patchRegsOfInstr instr patchF) Nothing
+
+ LiveInstr instr (Just live)
+ -> LiveInstr
+ (patchRegsOfInstr instr patchF)
+ (Just live
+ { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
+ liveBorn = mapUniqSet patchF $ liveBorn live
+ , liveDieRead = mapUniqSet patchF $ liveDieRead live
+ , liveDieWrite = mapUniqSet patchF $ liveDieWrite live })
+ -- See Note [Unique Determinism and code generation]
+
+
+--------------------------------------------------------------------------------
+-- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information
+
+cmmTopLiveness
+ :: (Outputable instr, Instruction instr)
+ => Maybe CFG -> Platform
+ -> NatCmmDecl statics instr
+ -> UniqSM (LiveCmmDecl statics instr)
+cmmTopLiveness cfg platform cmm
+ = regLiveness platform $ natCmmTopToLive cfg cmm
+
+natCmmTopToLive
+ :: (Instruction instr, Outputable instr)
+ => Maybe CFG -> NatCmmDecl statics instr
+ -> LiveCmmDecl statics instr
+
+natCmmTopToLive _ (CmmData i d)
+ = CmmData i d
+
+natCmmTopToLive _ (CmmProc info lbl live (ListGraph []))
+ = CmmProc (LiveInfo info [] mapEmpty mapEmpty) lbl live []
+
+natCmmTopToLive mCfg proc@(CmmProc info lbl live (ListGraph blocks@(first : _)))
+ = CmmProc (LiveInfo info' (first_id : entry_ids) mapEmpty mapEmpty)
+ lbl live sccsLive
+ where
+ first_id = blockId first
+ all_entry_ids = entryBlocks proc
+ sccs = sccBlocks blocks all_entry_ids mCfg
+ sccsLive = map (fmap (\(BasicBlock l instrs) ->
+ BasicBlock l (map (\i -> LiveInstr (Instr i) Nothing) instrs)))
+ $ sccs
+
+ entry_ids = filter (reachable_node) .
+ filter (/= first_id) $ all_entry_ids
+ info' = mapFilterWithKey (\node _ -> reachable_node node) info
+ reachable_node
+ | Just cfg <- mCfg
+ = hasNode cfg
+ | otherwise
+ = const True
+
+--
+-- Compute the liveness graph of the set of basic blocks. Important:
+-- we also discard any unreachable code here, starting from the entry
+-- points (the first block in the list, and any blocks with info
+-- tables). Unreachable code arises when code blocks are orphaned in
+-- earlier optimisation passes, and may confuse the register allocator
+-- by referring to registers that are not initialised. It's easy to
+-- discard the unreachable code as part of the SCC pass, so that's
+-- exactly what we do. (#7574)
+--
+sccBlocks
+ :: forall instr . Instruction instr
+ => [NatBasicBlock instr]
+ -> [BlockId]
+ -> Maybe CFG
+ -> [SCC (NatBasicBlock instr)]
+
+sccBlocks blocks entries mcfg = map (fmap node_payload) sccs
+ where
+ nodes :: [ Node BlockId (NatBasicBlock instr) ]
+ nodes = [ DigraphNode block id (getOutEdges instrs)
+ | block@(BasicBlock id instrs) <- blocks ]
+
+ g1 = graphFromEdgedVerticesUniq nodes
+
+ reachable :: LabelSet
+ reachable
+ | Just cfg <- mcfg
+ -- Our CFG only contains reachable nodes by construction at this point.
+ = setFromList $ getCfgNodes cfg
+ | otherwise
+ = setFromList $ [ node_key node | node <- reachablesG g1 roots ]
+
+ g2 = graphFromEdgedVerticesUniq [ node | node <- nodes
+ , node_key node
+ `setMember` reachable ]
+
+ sccs = stronglyConnCompG g2
+
+ getOutEdges :: Instruction instr => [instr] -> [BlockId]
+ getOutEdges instrs = concatMap jumpDestsOfInstr instrs
+
+ -- This is truly ugly, but I don't see a good alternative.
+ -- Digraph just has the wrong API. We want to identify nodes
+ -- by their keys (BlockId), but Digraph requires the whole
+ -- node: (NatBasicBlock, BlockId, [BlockId]). This takes
+ -- advantage of the fact that Digraph only looks at the key,
+ -- even though it asks for the whole triple.
+ roots = [DigraphNode (panic "sccBlocks") b (panic "sccBlocks")
+ | b <- entries ]
+
+--------------------------------------------------------------------------------
+-- Annotate code with register liveness information
+--
+
+regLiveness
+ :: (Outputable instr, Instruction instr)
+ => Platform
+ -> LiveCmmDecl statics instr
+ -> UniqSM (LiveCmmDecl statics instr)
+
+regLiveness _ (CmmData i d)
+ = return $ CmmData i d
+
+regLiveness _ (CmmProc info lbl live [])
+ | LiveInfo static mFirst _ _ <- info
+ = return $ CmmProc
+ (LiveInfo static mFirst mapEmpty mapEmpty)
+ lbl live []
+
+regLiveness platform (CmmProc info lbl live sccs)
+ | LiveInfo static mFirst _ liveSlotsOnEntry <- info
+ = let (ann_sccs, block_live) = computeLiveness platform sccs
+
+ in return $ CmmProc (LiveInfo static mFirst block_live liveSlotsOnEntry)
+ lbl live ann_sccs
+
+
+-- -----------------------------------------------------------------------------
+-- | Check ordering of Blocks
+-- The computeLiveness function requires SCCs to be in reverse
+-- dependent order. If they're not the liveness information will be
+-- wrong, and we'll get a bad allocation. Better to check for this
+-- precondition explicitly or some other poor sucker will waste a
+-- day staring at bad assembly code..
+--
+checkIsReverseDependent
+ :: Instruction instr
+ => [SCC (LiveBasicBlock instr)] -- ^ SCCs of blocks that we're about to run the liveness determinator on.
+ -> Maybe BlockId -- ^ BlockIds that fail the test (if any)
+
+checkIsReverseDependent sccs'
+ = go emptyUniqSet sccs'
+
+ where go _ []
+ = Nothing
+
+ go blocksSeen (AcyclicSCC block : sccs)
+ = let dests = slurpJumpDestsOfBlock block
+ blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet [blockId block]
+ badDests = dests `minusUniqSet` blocksSeen'
+ in case nonDetEltsUniqSet badDests of
+ -- See Note [Unique Determinism and code generation]
+ [] -> go blocksSeen' sccs
+ bad : _ -> Just bad
+
+ go blocksSeen (CyclicSCC blocks : sccs)
+ = let dests = unionManyUniqSets $ map slurpJumpDestsOfBlock blocks
+ blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet $ map blockId blocks
+ badDests = dests `minusUniqSet` blocksSeen'
+ in case nonDetEltsUniqSet badDests of
+ -- See Note [Unique Determinism and code generation]
+ [] -> go blocksSeen' sccs
+ bad : _ -> Just bad
+
+ slurpJumpDestsOfBlock (BasicBlock _ instrs)
+ = unionManyUniqSets
+ $ map (mkUniqSet . jumpDestsOfInstr)
+ [ i | LiveInstr i _ <- instrs]
+
+
+-- | If we've compute liveness info for this code already we have to reverse
+-- the SCCs in each top to get them back to the right order so we can do it again.
+reverseBlocksInTops :: LiveCmmDecl statics instr -> LiveCmmDecl statics instr
+reverseBlocksInTops top
+ = case top of
+ CmmData{} -> top
+ CmmProc info lbl live sccs -> CmmProc info lbl live (reverse sccs)
+
+
+-- | Computing liveness
+--
+-- On entry, the SCCs must be in "reverse" order: later blocks may transfer
+-- control to earlier ones only, else `panic`.
+--
+-- The SCCs returned are in the *opposite* order, which is exactly what we
+-- want for the next pass.
+--
+computeLiveness
+ :: (Outputable instr, Instruction instr)
+ => Platform
+ -> [SCC (LiveBasicBlock instr)]
+ -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers
+ -- which are "dead after this instruction".
+ BlockMap RegSet) -- blocks annotated with set of live registers
+ -- on entry to the block.
+
+computeLiveness platform sccs
+ = case checkIsReverseDependent sccs of
+ Nothing -> livenessSCCs platform mapEmpty [] sccs
+ Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness"
+ (vcat [ text "SCCs aren't in reverse dependent order"
+ , text "bad blockId" <+> ppr bad
+ , ppr sccs])
+
+livenessSCCs
+ :: Instruction instr
+ => Platform
+ -> BlockMap RegSet
+ -> [SCC (LiveBasicBlock instr)] -- accum
+ -> [SCC (LiveBasicBlock instr)]
+ -> ( [SCC (LiveBasicBlock instr)]
+ , BlockMap RegSet)
+
+livenessSCCs _ blockmap done []
+ = (done, blockmap)
+
+livenessSCCs platform blockmap done (AcyclicSCC block : sccs)
+ = let (blockmap', block') = livenessBlock platform blockmap block
+ in livenessSCCs platform blockmap' (AcyclicSCC block' : done) sccs
+
+livenessSCCs platform blockmap done
+ (CyclicSCC blocks : sccs) =
+ livenessSCCs platform blockmap' (CyclicSCC blocks':done) sccs
+ where (blockmap', blocks')
+ = iterateUntilUnchanged linearLiveness equalBlockMaps
+ blockmap blocks
+
+ iterateUntilUnchanged
+ :: (a -> b -> (a,c)) -> (a -> a -> Bool)
+ -> a -> b
+ -> (a,c)
+
+ iterateUntilUnchanged f eq a b
+ = head $
+ concatMap tail $
+ groupBy (\(a1, _) (a2, _) -> eq a1 a2) $
+ iterate (\(a, _) -> f a b) $
+ (a, panic "RegLiveness.livenessSCCs")
+
+
+ linearLiveness
+ :: Instruction instr
+ => BlockMap RegSet -> [LiveBasicBlock instr]
+ -> (BlockMap RegSet, [LiveBasicBlock instr])
+
+ linearLiveness = mapAccumL (livenessBlock platform)
+
+ -- probably the least efficient way to compare two
+ -- BlockMaps for equality.
+ equalBlockMaps a b
+ = a' == b'
+ where a' = map f $ mapToList a
+ b' = map f $ mapToList b
+ f (key,elt) = (key, nonDetEltsUniqSet elt)
+ -- See Note [Unique Determinism and code generation]
+
+
+
+-- | Annotate a basic block with register liveness information.
+--
+livenessBlock
+ :: Instruction instr
+ => Platform
+ -> BlockMap RegSet
+ -> LiveBasicBlock instr
+ -> (BlockMap RegSet, LiveBasicBlock instr)
+
+livenessBlock platform blockmap (BasicBlock block_id instrs)
+ = let
+ (regsLiveOnEntry, instrs1)
+ = livenessBack platform emptyUniqSet blockmap [] (reverse instrs)
+ blockmap' = mapInsert block_id regsLiveOnEntry blockmap
+
+ instrs2 = livenessForward platform regsLiveOnEntry instrs1
+
+ output = BasicBlock block_id instrs2
+
+ in ( blockmap', output)
+
+-- | Calculate liveness going forwards,
+-- filling in when regs are born
+
+livenessForward
+ :: Instruction instr
+ => Platform
+ -> RegSet -- regs live on this instr
+ -> [LiveInstr instr] -> [LiveInstr instr]
+
+livenessForward _ _ [] = []
+livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
+ | Just live <- mLive
+ = let
+ RU _ written = regUsageOfInstr platform instr
+ -- Regs that are written to but weren't live on entry to this instruction
+ -- are recorded as being born here.
+ rsBorn = mkUniqSet
+ $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written
+
+ rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn)
+ `minusUniqSet` (liveDieRead live)
+ `minusUniqSet` (liveDieWrite live)
+
+ in LiveInstr instr (Just live { liveBorn = rsBorn })
+ : livenessForward platform rsLiveNext lis
+
+ | otherwise
+ = li : livenessForward platform rsLiveEntry lis
+
+
+-- | Calculate liveness going backwards,
+-- filling in when regs die, and what regs are live across each instruction
+
+livenessBack
+ :: Instruction instr
+ => Platform
+ -> RegSet -- regs live on this instr
+ -> BlockMap RegSet -- regs live on entry to other BBs
+ -> [LiveInstr instr] -- instructions (accum)
+ -> [LiveInstr instr] -- instructions
+ -> (RegSet, [LiveInstr instr])
+
+livenessBack _ liveregs _ done [] = (liveregs, done)
+
+livenessBack platform liveregs blockmap acc (instr : instrs)
+ = let (liveregs', instr') = liveness1 platform liveregs blockmap instr
+ in livenessBack platform liveregs' blockmap (instr' : acc) instrs
+
+
+-- don't bother tagging comments or deltas with liveness
+liveness1
+ :: Instruction instr
+ => Platform
+ -> RegSet
+ -> BlockMap RegSet
+ -> LiveInstr instr
+ -> (RegSet, LiveInstr instr)
+
+liveness1 _ liveregs _ (LiveInstr instr _)
+ | isMetaInstr instr
+ = (liveregs, LiveInstr instr Nothing)
+
+liveness1 platform liveregs blockmap (LiveInstr instr _)
+
+ | not_a_branch
+ = (liveregs1, LiveInstr instr
+ (Just $ Liveness
+ { liveBorn = emptyUniqSet
+ , liveDieRead = mkUniqSet r_dying
+ , liveDieWrite = mkUniqSet w_dying }))
+
+ | otherwise
+ = (liveregs_br, LiveInstr instr
+ (Just $ Liveness
+ { liveBorn = emptyUniqSet
+ , liveDieRead = mkUniqSet r_dying_br
+ , liveDieWrite = mkUniqSet w_dying }))
+
+ where
+ !(RU read written) = regUsageOfInstr platform instr
+
+ -- registers that were written here are dead going backwards.
+ -- registers that were read here are live going backwards.
+ liveregs1 = (liveregs `delListFromUniqSet` written)
+ `addListToUniqSet` read
+
+ -- registers that are not live beyond this point, are recorded
+ -- as dying here.
+ r_dying = [ reg | reg <- read, reg `notElem` written,
+ not (elementOfUniqSet reg liveregs) ]
+
+ w_dying = [ reg | reg <- written,
+ not (elementOfUniqSet reg liveregs) ]
+
+ -- union in the live regs from all the jump destinations of this
+ -- instruction.
+ targets = jumpDestsOfInstr instr -- where we go from here
+ not_a_branch = null targets
+
+ targetLiveRegs target
+ = case mapLookup target blockmap of
+ Just ra -> ra
+ Nothing -> emptyRegSet
+
+ live_from_branch = unionManyUniqSets (map targetLiveRegs targets)
+
+ liveregs_br = liveregs1 `unionUniqSets` live_from_branch
+
+ -- registers that are live only in the branch targets should
+ -- be listed as dying here.
+ live_branch_only = live_from_branch `minusUniqSet` liveregs
+ r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets`
+ live_branch_only)
+ -- See Note [Unique Determinism and code generation]
diff --git a/compiler/GHC/CmmToAsm/Reg/Target.hs b/compiler/GHC/CmmToAsm/Reg/Target.hs
new file mode 100644
index 0000000000..a45d70c826
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Target.hs
@@ -0,0 +1,135 @@
+{-# LANGUAGE CPP #-}
+-- | Hard wired things related to registers.
+-- This is module is preventing the native code generator being able to
+-- emit code for non-host architectures.
+--
+-- TODO: Do a better job of the overloading, and eliminate this module.
+-- We'd probably do better with a Register type class, and hook this to
+-- Instruction somehow.
+--
+-- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable
+module GHC.CmmToAsm.Reg.Target (
+ targetVirtualRegSqueeze,
+ targetRealRegSqueeze,
+ targetClassOfRealReg,
+ targetMkVirtualReg,
+ targetRegDotColor,
+ targetClassOfReg
+)
+
+where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
+import GHC.CmmToAsm.Format
+
+import Outputable
+import Unique
+import GHC.Platform
+
+import qualified GHC.CmmToAsm.X86.Regs as X86
+import qualified GHC.CmmToAsm.X86.RegInfo as X86
+import qualified GHC.CmmToAsm.PPC.Regs as PPC
+import qualified GHC.CmmToAsm.SPARC.Regs as SPARC
+
+targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int
+targetVirtualRegSqueeze platform
+ = case platformArch platform of
+ ArchX86 -> X86.virtualRegSqueeze
+ ArchX86_64 -> X86.virtualRegSqueeze
+ ArchPPC -> PPC.virtualRegSqueeze
+ ArchS390X -> panic "targetVirtualRegSqueeze ArchS390X"
+ ArchSPARC -> SPARC.virtualRegSqueeze
+ ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64"
+ ArchPPC_64 _ -> PPC.virtualRegSqueeze
+ ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM"
+ ArchARM64 -> panic "targetVirtualRegSqueeze ArchARM64"
+ ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
+ ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
+ ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel"
+ ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript"
+ ArchUnknown -> panic "targetVirtualRegSqueeze ArchUnknown"
+
+
+targetRealRegSqueeze :: Platform -> RegClass -> RealReg -> Int
+targetRealRegSqueeze platform
+ = case platformArch platform of
+ ArchX86 -> X86.realRegSqueeze
+ ArchX86_64 -> X86.realRegSqueeze
+ ArchPPC -> PPC.realRegSqueeze
+ ArchS390X -> panic "targetRealRegSqueeze ArchS390X"
+ ArchSPARC -> SPARC.realRegSqueeze
+ ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64"
+ ArchPPC_64 _ -> PPC.realRegSqueeze
+ ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM"
+ ArchARM64 -> panic "targetRealRegSqueeze ArchARM64"
+ ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
+ ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
+ ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel"
+ ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript"
+ ArchUnknown -> panic "targetRealRegSqueeze ArchUnknown"
+
+targetClassOfRealReg :: Platform -> RealReg -> RegClass
+targetClassOfRealReg platform
+ = case platformArch platform of
+ ArchX86 -> X86.classOfRealReg platform
+ ArchX86_64 -> X86.classOfRealReg platform
+ ArchPPC -> PPC.classOfRealReg
+ ArchS390X -> panic "targetClassOfRealReg ArchS390X"
+ ArchSPARC -> SPARC.classOfRealReg
+ ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64"
+ ArchPPC_64 _ -> PPC.classOfRealReg
+ ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM"
+ ArchARM64 -> panic "targetClassOfRealReg ArchARM64"
+ ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
+ ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
+ ArchMipsel -> panic "targetClassOfRealReg ArchMipsel"
+ ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript"
+ ArchUnknown -> panic "targetClassOfRealReg ArchUnknown"
+
+targetMkVirtualReg :: Platform -> Unique -> Format -> VirtualReg
+targetMkVirtualReg platform
+ = case platformArch platform of
+ ArchX86 -> X86.mkVirtualReg
+ ArchX86_64 -> X86.mkVirtualReg
+ ArchPPC -> PPC.mkVirtualReg
+ ArchS390X -> panic "targetMkVirtualReg ArchS390X"
+ ArchSPARC -> SPARC.mkVirtualReg
+ ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64"
+ ArchPPC_64 _ -> PPC.mkVirtualReg
+ ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM"
+ ArchARM64 -> panic "targetMkVirtualReg ArchARM64"
+ ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
+ ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
+ ArchMipsel -> panic "targetMkVirtualReg ArchMipsel"
+ ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript"
+ ArchUnknown -> panic "targetMkVirtualReg ArchUnknown"
+
+targetRegDotColor :: Platform -> RealReg -> SDoc
+targetRegDotColor platform
+ = case platformArch platform of
+ ArchX86 -> X86.regDotColor platform
+ ArchX86_64 -> X86.regDotColor platform
+ ArchPPC -> PPC.regDotColor
+ ArchS390X -> panic "targetRegDotColor ArchS390X"
+ ArchSPARC -> SPARC.regDotColor
+ ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64"
+ ArchPPC_64 _ -> PPC.regDotColor
+ ArchARM _ _ _ -> panic "targetRegDotColor ArchARM"
+ ArchARM64 -> panic "targetRegDotColor ArchARM64"
+ ArchAlpha -> panic "targetRegDotColor ArchAlpha"
+ ArchMipseb -> panic "targetRegDotColor ArchMipseb"
+ ArchMipsel -> panic "targetRegDotColor ArchMipsel"
+ ArchJavaScript-> panic "targetRegDotColor ArchJavaScript"
+ ArchUnknown -> panic "targetRegDotColor ArchUnknown"
+
+
+targetClassOfReg :: Platform -> Reg -> RegClass
+targetClassOfReg platform reg
+ = case reg of
+ RegVirtual vr -> classOfVirtualReg vr
+ RegReal rr -> targetClassOfRealReg platform rr